* stdlib/Makefile (mpn-stuff): New target.

author Roland McGrath <roland@gnu.org>

Fri, 1 Mar 1996 18:45:35 +0000 (18:45 +0000)

committer Roland McGrath <roland@gnu.org>

Fri, 1 Mar 1996 18:45:35 +0000 (18:45 +0000)
author Roland McGrath <roland@gnu.org>
Fri, 1 Mar 1996 18:45:35 +0000 (18:45 +0000)
committer Roland McGrath <roland@gnu.org>
Fri, 1 Mar 1996 18:45:35 +0000 (18:45 +0000)
diff --git a/ChangeLog b/ChangeLog

index 5d3f708..d71be84 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
  Fri Mar  1 10:09:46 1996  Roland McGrath  <roland@charlie-brown.gnu.ai.mit.edu>
  
+       * stdlib/Makefile (mpn-stuff): New target.
+       (copy-mpn): Use it.
+
+       * Code copied from GMP updated to 1.937 version.
+       * stdlib/strtod.c (HAVE_ALLOCA): Define this for gmp headers.
+
         * posix/glob.c: Use canonical code from autoconf manual for dirent
         include.
         [_D_NAMLEN]: Redefine NAMLEN using this.
diff --git a/libc-symbols.h b/libc-symbols.h

index bd3bb9a..f211f48 100644 (file)
--- a/libc-symbols.h
+++ b/libc-symbols.h
@@ -170,7 +170,7 @@ extern const char _libc_intl_domainname[];
     are better clued in to what we are doing.  */
  #undef strong_alias
  #define strong_alias(name, aliasname) \
-  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+  __typeof (name) aliasname __attribute__ ((alias (#name)));
  
  #ifdef HAVE_WEAK_SYMBOLS
  #undef weak_symbol
@@ -178,7 +178,7 @@ extern const char _libc_intl_domainname[];
    extern __typeof (name) name __attribute__ ((weak));
  #undef weak_alias
  #define weak_alias(name, aliasname) \
-  extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
+  __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
  #endif /* HAVE_WEAK_SYMBOLS.  */
  #endif /* Not ASSEMBLER, and GCC 2.8 or later.  */
  
diff --git a/stdlib/Makefile b/stdlib/Makefile

index 8228808..77940d1 100644 (file)
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -102,7 +102,8 @@ endef
  mpn-copy = $(filter-out $(mpn-sysdep),$(mpn-headers))# mp_clz_tab.c)
  $(mpn-copy): %: $(ignore gmp2glibc.sed) $(gmp-srcdir)/%; $(gmp2glibc)
  
-.PHONY: copy-mpn clean-mpn
+.PHONY: copy-mpn clean-mpn mpn-stuff
+mpn-stuff: $(mpn-stuff)
  copy-mpn: $(mpn-stuff)
         test ! -d CVS || cvs commit -m'Updated from $(gmp-srcdir)' $+
  clean-mpn:
diff --git a/stdlib/gmp-impl.h b/stdlib/gmp-impl.h

index 2f0956d..83d4e32 100644 (file)
--- a/stdlib/gmp-impl.h
+++ b/stdlib/gmp-impl.h
@@ -1,6 +1,6 @@
  /* Include file for internal GNU MP types and definitions.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -18,25 +18,50 @@ You should have received a copy of the GNU Library General Public License
  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
-#if ! defined (alloca)
-#if defined (__GNUC__)
+/* When using gcc, make sure to use its builtin alloca.  */
+#if ! defined (alloca) && defined (__GNUC__)
  #define alloca __builtin_alloca
-#endif
+#define HAVE_ALLOCA
  #endif
  
+/* When using cc, do whatever necessary to allow use of alloca.  For many
+   machines, this means including alloca.h.  IBM's compilers need a #pragma
+   in "each module that needs to use alloca".  */
  #if ! defined (alloca)
-#if defined (__sparc__) || defined (sparc) || defined (__sgi)
+/* We need lots of variants for MIPS, to cover all versions and perversions
+   of OSes for MIPS.  */
+#if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
+ || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
+ || defined (__alpha) || defined (__sparc) || defined (sparc) \
+ || defined (__ksr__)
  #include <alloca.h>
+#define HAVE_ALLOCA
+#endif
+#if defined (_IBMR2)
+#pragma alloca
+#define HAVE_ALLOCA
+#endif
+#if defined (__DECC)
+#define alloca(x) __ALLOCA(x)
+#define HAVE_ALLOCA
  #endif
  #endif
  
+#if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
+#include "stack-alloc.h"
+#else
+#define TMP_DECL(m)
+#define TMP_ALLOC(x) alloca(x)
+#define TMP_MARK(m)
+#define TMP_FREE(m)
+#endif
+
  #ifndef NULL
-#define NULL 0L
+#define NULL ((void *) 0)
  #endif
  
  #if ! defined (__GNUC__)
  #define inline                 /* Empty */
-void *alloca();
  #endif
  
  #define ABS(x) (x >= 0 ? x : -x)
@@ -46,7 +71,7 @@ void *alloca();
  #include "gmp-mparam.h"
  /* #include "longlong.h" */
  
-#ifdef __STDC__
+#if defined (__STDC__)  || defined (__cplusplus)
  void *malloc (size_t);
  void *realloc (void *, size_t);
  void free (void *);
@@ -119,35 +144,6 @@ void _mp_default_free ();
        }                                                                        \
    } while (0)
  
-/*  Swap (mp_ptr, mp_size_t) (U, UL) with (V, VL)  */
-#define MPN_SWAP(u, l, v, m) \
-  do {                                                                 \
-    { mp_ptr _; _ = (u), (u) = (v), (v) = _;}                          \
-    { mp_size_t _; _ = (l), (l) = (m), (m) = _;}                       \
-  } while (0)
-
-/*  Return true iff the limb X has less bits than the limb Y.  */
-#define MPN_LESS_BITS_LIMB(x,y) ((x) < (y) && (x) < ((x) ^ (y)))
-
-/*  Return true iff (mp_ptr, mp_size_t) (U, UL) has less bits than (V, VL).  */
-#define MPN_LESS_BITS(u, l, v, m) \
-  ((l) < (m)                                                           \
-   || ((l) == (m) && (l) != 0 && MPN_LESS_BITS_LIMB ((u)[(l - 1)], (v)[(l) - 1])))
-
-/*  Return true iff (mp_ptr, mp_size_t) (U, UL) has more bits than (V, VL).  */
-#define MPN_MORE_BITS(u, l, v, m) MPN_LESS_BITS (v, m, u, l)
-
-/*  Perform twos complement on (mp_ptr, mp_size_t) (U, UL), 
-    putting result at (v, VL).  Precondition: U[0] != 0.  */
-#define MPN_COMPL_INCR(u, v, l)        \
-  do {                                                                 \
-    mp_size_t _ = 0;                                                   \
-    (u)[0] = -(v)[_];                                                  \
-    while (_++ < (l))                                                  \
-      (u)[_] = ~(v)[_];                                                        \
-  } while (0)
-#define MPN_COMPL MPN_COMPL_INCR
-
  /* Initialize the MP_INT X with space for NLIMBS limbs.
     X should be a temporary variable, and it will be automatically
     cleared out when the running function returns.
@@ -156,23 +152,23 @@ void _mp_default_free ();
  #define MPZ_TMP_INIT(X, NLIMBS) \
    do {                                                                 \
      mpz_ptr __x = (X);                                                 \
-    __x->alloc = (NLIMBS);                                             \
-    __x->d = (mp_ptr) alloca ((NLIMBS) * BYTES_PER_MP_LIMB);           \
+    __x->_mp_alloc = (NLIMBS);                                         \
+    __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB);    \
    } while (0)
  
  #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
    do {                                                                 \
      if ((size) < KARATSUBA_THRESHOLD)                                  \
-      ____mpn_mul_n_basecase (prodp, up, vp, size);                    \
+      impn_mul_n_basecase (prodp, up, vp, size);                       \
      else                                                               \
-      ____mpn_mul_n (prodp, up, vp, size, tspace);                     \
+      impn_mul_n (prodp, up, vp, size, tspace);                        \
    } while (0);
  #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
    do {                                                                 \
      if ((size) < KARATSUBA_THRESHOLD)                                  \
-      ____mpn_sqr_n_basecase (prodp, up, size);                                \
+      impn_sqr_n_basecase (prodp, up, size);                           \
      else                                                               \
-      ____mpn_sqr_n (prodp, up, size, tspace);                         \
+      impn_sqr_n (prodp, up, size, tspace);                            \
    } while (0);
  
  /* Structure for conversion between internal binary format and
@@ -198,6 +194,13 @@ struct bases
    mp_limb big_base_inverted;
  };
  
+/* Access macros for structure fields for user-visible structures with
+   hidden fields.  */
+#define size(X) (X)._mp_size
+#define alloc(X) (X)._mp_alloc
+#define prec(X) (X)._mp_prec
+#define limbs(X) (X)._mp_d
+
  extern const struct bases __mp_bases[];
  extern mp_size_t __gmp_default_fp_limb_precision;
  
@@ -288,6 +291,11 @@ typedef mp_limb UWtype;
  typedef unsigned int UHWtype;
  #define W_TYPE_SIZE BITS_PER_MP_LIMB
  
+/* Internal mpn calls */
+#define impn_mul_n_basecase    __MPN(impn_mul_n_basecase)
+#define impn_mul_n             __MPN(impn_mul_n)
+#define impn_sqr_n_basecase    __MPN(impn_sqr_n_basecase)
+#define impn_sqr_n             __MPN(impn_sqr_n)
  
  #ifndef IEEE_DOUBLE_BIG_ENDIAN
  #define IEEE_DOUBLE_BIG_ENDIAN 1
@@ -298,10 +306,10 @@ union ieee_double_extract
  {
    struct
      {
-      unsigned long sig:1;
-      unsigned long exp:11;
-      unsigned long manh:20;
-      unsigned long manl:32;
+      unsigned int sig:1;
+      unsigned int exp:11;
+      unsigned int manh:20;
+      unsigned int manl:32;
      } s;
    double d;
  };
@@ -310,10 +318,10 @@ union ieee_double_extract
  {
    struct
      {
-      unsigned long manl:32;
-      unsigned long manh:20;
-      unsigned long exp:11;
-      unsigned long sig:1;
+      unsigned int manl:32;
+      unsigned int manh:20;
+      unsigned int exp:11;
+      unsigned int sig:1;
      } s;
    double d;
  };
diff --git a/stdlib/gmp.h b/stdlib/gmp.h

index 2437799..5f1b48d 100644 (file)
--- a/stdlib/gmp.h
+++ b/stdlib/gmp.h
@@ -1,6 +1,6 @@
  /* gmp.h -- Definitions for GNU multiple precision functions.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,22 +21,31 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  #ifndef __GMP_H__
  
  #ifndef __GNU_MP__
+#define __GNU_MP__ 2
  #define __need_size_t
  #include <stddef.h>
  #undef __need_size_t
  
-#if defined (__STDC__)
+#if defined (__STDC__) || defined (__cplusplus)
  #define __gmp_const const
  #else
  #define __gmp_const
  #endif
  
  #if defined (__GNUC__)
-#define __gmp_inline inline
+#define __gmp_inline __inline__
  #else
  #define __gmp_inline
  #endif
  
+#ifndef _EXTERN_INLINE
+#ifdef __GNUC__
+#define _EXTERN_INLINE extern __inline__
+#else
+#define _EXTERN_INLINE static
+#endif
+#endif
+
  #ifdef _SHORT_LIMB
  typedef unsigned int           mp_limb;
  typedef int                    mp_limb_signed;
@@ -52,30 +61,30 @@ typedef long int            mp_limb_signed;
  
  typedef mp_limb *              mp_ptr;
  typedef __gmp_const mp_limb *  mp_srcptr;
-typedef int                    mp_size_t;
+typedef long int               mp_size_t;
  typedef long int               mp_exp_t;
  
  #ifndef __MP_SMALL__
  typedef struct
  {
-  mp_size_t alloc;             /* Number of *limbs* allocated and pointed
+  int _mp_alloc;               /* Number of *limbs* allocated and pointed
                                    to by the D field.  */
-  mp_size_t size;              /* abs(SIZE) is the number of limbs
+  int _mp_size;                        /* abs(SIZE) is the number of limbs
                                    the last field points to.  If SIZE
                                    is negative this is a negative
                                    number.  */
-  mp_limb *d;                  /* Pointer to the limbs.  */
+  mp_limb *_mp_d;              /* Pointer to the limbs.  */
  } __mpz_struct;
  #else
  typedef struct
  {
-  short int alloc;             /* Number of *limbs* allocated and pointed
+  short int _mp_alloc;         /* Number of *limbs* allocated and pointed
                                    to by the D field.  */
-  short int size;              /* abs(SIZE) is the number of limbs
+  short int _mp_size;          /* abs(SIZE) is the number of limbs
                                    the last field points to.  If SIZE
                                    is negative this is a negative
                                    number.  */
-  mp_limb *d;                  /* Pointer to the limbs.  */
+  mp_limb *_mp_d;              /* Pointer to the limbs.  */
  } __mpz_struct;
  #endif
  #endif /* __GNU_MP__ */
@@ -89,20 +98,20 @@ typedef __mpz_struct mpz_t[1];
     the numerator.  */
  typedef struct
  {
-  __mpz_struct num;
-  __mpz_struct den;
+  __mpz_struct _mp_num;
+  __mpz_struct _mp_den;
  #if 0
-  long int num_alloc;          /* Number of limbs allocated
+  int _mp_num_alloc;           /* Number of limbs allocated
                                    for the numerator.  */
-  long int num_size;           /* The absolute value of this field is the
+  int _mp_num_size;            /* The absolute value of this field is the
                                    length of the numerator; the sign is the
                                    sign of the entire rational number.  */
-  mp_ptr num;                  /* Pointer to the numerator limbs.  */
-  long int den_alloc;          /* Number of limbs allocated
+  mp_ptr _mp_num;              /* Pointer to the numerator limbs.  */
+  int _mp_den_alloc;           /* Number of limbs allocated
                                    for the denominator.  */
-  long int den_size;           /* Length of the denominator.  (This field
+  int _mp_den_size;            /* Length of the denominator.  (This field
                                    should always be positive.) */
-  mp_ptr den;                  /* Pointer to the denominator limbs.  */
+  mp_ptr _mp_den;              /* Pointer to the denominator limbs.  */
  #endif
  } __mpq_struct;
  
@@ -111,17 +120,17 @@ typedef __mpq_struct mpq_t[1];
  
  typedef struct
  {
-  mp_size_t prec;              /* Max precision, in number of `mp_limb's.
+  int _mp_prec;                        /* Max precision, in number of `mp_limb's.
                                    Set by mpf_init and modified by
                                    mpf_set_prec.  The area pointed to
                                    by the `d' field contains `prec' + 1
                                    limbs.  */
-  mp_size_t size;              /* abs(SIZE) is the number of limbs
+  int _mp_size;                        /* abs(SIZE) is the number of limbs
                                    the last field points to.  If SIZE
                                    is negative this is a negative
                                    number.  */
-  mp_exp_t exp;                        /* Exponent, in the base of `mp_limb'.  */
-  mp_limb *d;                  /* Pointer to the limbs.  */
+  mp_exp_t _mp_exp;            /* Exponent, in the base of `mp_limb'.  */
+  mp_limb *_mp_d;              /* Pointer to the limbs.  */
  } __mpf_struct;
  
  /* typedef __mpf_struct MP_FLOAT; */
@@ -136,36 +145,62 @@ typedef __mpf_struct *mpf_ptr;
  typedef __gmp_const __mpq_struct *mpq_srcptr;
  typedef __mpq_struct *mpq_ptr;
  
-#if defined (__STDC__)
+#ifndef _PROTO
+#if defined (__STDC__) || defined (__cplusplus)
  #define _PROTO(x) x
  #else
  #define _PROTO(x) ()
  #endif
+#endif
+
+#ifndef __MPN
+#define __MPN(x) __mpn_##x
+#endif
  
  #if defined (FILE) || defined (_STDIO_H_) || defined (__STDIO_H__) || defined (H_STDIO)
  #define _GMP_H_HAVE_FILE 1
  #endif
  
-void mp_set_memory_functions _PROTO((void *(*) (size_t),
-                                    void *(*) (void *, size_t, size_t),
-                                    void (*) (void *, size_t)));
+void mp_set_memory_functions _PROTO ((void *(*) (size_t),
+                                     void *(*) (void *, size_t, size_t),
+                                     void (*) (void *, size_t)));
  
  /**************** Integer (i.e. Z) routines.  ****************/
  
+#if defined (__cplusplus)
+extern "C" {
+#endif
  void *_mpz_realloc _PROTO ((mpz_ptr, mp_size_t));
  
  void mpz_abs _PROTO ((mpz_ptr, mpz_srcptr));
  void mpz_add _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
  void mpz_add_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
  void mpz_and _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+void mpz_array_init _PROTO ((mpz_ptr, mp_size_t, mp_size_t));
+void mpz_cdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+unsigned long int mpz_cdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_cdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+unsigned long int mpz_cdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_cdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+unsigned long int mpz_cdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+unsigned long int mpz_cdiv_ui _PROTO ((mpz_srcptr, unsigned long int));
  void mpz_clear _PROTO ((mpz_ptr));
  void mpz_clrbit _PROTO ((mpz_ptr, unsigned long int));
  int mpz_cmp _PROTO ((mpz_srcptr, mpz_srcptr));
  int mpz_cmp_si _PROTO ((mpz_srcptr, signed long int));
  int mpz_cmp_ui _PROTO ((mpz_srcptr, unsigned long int));
  void mpz_com _PROTO ((mpz_ptr, mpz_srcptr));
-void mpz_div_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_divexact _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
  void mpz_fac_ui _PROTO ((mpz_ptr, unsigned long int));
+void mpz_fdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+void mpz_fdiv_q_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+unsigned long int mpz_fdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_fdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+unsigned long int mpz_fdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_fdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+void mpz_fdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+unsigned long int mpz_fdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+unsigned long int mpz_fdiv_ui _PROTO ((mpz_srcptr, unsigned long int));
  void mpz_gcd _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
  unsigned long int mpz_gcd_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
  void mpz_gcdext _PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
@@ -176,23 +211,27 @@ mp_limb mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t));
  mp_size_t mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr));
  void mpz_init _PROTO ((mpz_ptr));
  #ifdef _GMP_H_HAVE_FILE
-void mpz_inp_raw _PROTO ((mpz_ptr, FILE *));
-int mpz_inp_str _PROTO ((mpz_ptr, FILE *, int));
+size_t mpz_inp_binary _PROTO ((mpz_ptr, FILE *));
+size_t mpz_inp_raw _PROTO ((mpz_ptr, FILE *));
+size_t mpz_inp_str _PROTO ((mpz_ptr, FILE *, int));
  #endif
-void mpz_ior _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
  void mpz_init_set _PROTO ((mpz_ptr, mpz_srcptr));
  void mpz_init_set_si _PROTO ((mpz_ptr, signed long int));
  int mpz_init_set_str _PROTO ((mpz_ptr, const char *, int));
  void mpz_init_set_ui _PROTO ((mpz_ptr, unsigned long int));
-void mpz_lcm _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
-void mpz_mod_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+int mpz_invert _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+void mpz_ior _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+int mpz_jacobi _PROTO ((mpz_srcptr, mpz_srcptr));
+int mpz_legendre _PROTO ((mpz_srcptr, mpz_srcptr));
+void mpz_mod _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
  void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
  void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
  void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
  void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr));
  #ifdef _GMP_H_HAVE_FILE
-void mpz_out_raw _PROTO ((FILE *, mpz_srcptr));
-void mpz_out_str _PROTO ((FILE *, int, mpz_srcptr));
+size_t mpz_out_binary _PROTO ((FILE *, mpz_srcptr));
+size_t mpz_out_raw _PROTO ((FILE *, mpz_srcptr));
+size_t mpz_out_str _PROTO ((FILE *, int, mpz_srcptr));
  #endif
  int mpz_perfect_square_p _PROTO ((mpz_srcptr));
  mp_size_t mpz_popcount _PROTO ((mpz_srcptr));
@@ -202,34 +241,30 @@ void mpz_powm_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr));
  int mpz_probab_prime_p _PROTO ((mpz_srcptr, int));
  void mpz_random _PROTO ((mpz_ptr, mp_size_t));
  void mpz_random2 _PROTO ((mpz_ptr, mp_size_t));
+mp_size_t mpz_scan0 _PROTO ((mpz_srcptr, mp_size_t));
+mp_size_t mpz_scan1 _PROTO ((mpz_srcptr, mp_size_t));
  void mpz_set _PROTO ((mpz_ptr, mpz_srcptr));
+void mpz_set_d _PROTO ((mpz_ptr, double));
  void mpz_set_si _PROTO ((mpz_ptr, signed long int));
  int mpz_set_str _PROTO ((mpz_ptr, const char *, int));
  void mpz_set_ui _PROTO ((mpz_ptr, unsigned long int));
+void mpz_setbit _PROTO ((mpz_ptr, unsigned long int));
  size_t mpz_size _PROTO ((mpz_srcptr));
  size_t mpz_sizeinbase _PROTO ((mpz_srcptr, int));
  void mpz_sqrt _PROTO ((mpz_ptr, mpz_srcptr));
  void mpz_sqrtrem _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr));
  void mpz_sub _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
  void mpz_sub_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_tdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+void mpz_tdiv_q_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_tdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_tdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+void mpz_tdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_tdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+void mpz_tdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_tdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
  void mpz_ui_pow_ui _PROTO ((mpz_ptr, unsigned long int, unsigned long int));
  
-void mpz_fdiv_q _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr));
-unsigned long int mpz_fdiv_q_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int));
-void mpz_fdiv_qr _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
-unsigned long int mpz_fdiv_qr_ui _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
-void mpz_fdiv_r _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr));
-unsigned long int mpz_fdiv_r_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int));
-unsigned long int mpz_fdiv_ui _PROTO((mpz_srcptr, unsigned long int));
-void mpz_tdiv_q _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr));
-void mpz_tdiv_q_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int));
-void mpz_tdiv_qr _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
-void mpz_tdiv_qr_ui _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
-void mpz_tdiv_r _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr));
-void mpz_tdiv_r_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int));
-
-void mpz_array_init (mpz_ptr, size_t, mp_size_t);
-
  /**************** Rational (i.e. Q) routines.  ****************/
  
  void mpq_init _PROTO ((mpq_ptr));
@@ -243,11 +278,14 @@ void mpq_mul _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
  void mpq_div _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
  void mpq_neg _PROTO ((mpq_ptr, mpq_srcptr));
  int mpq_cmp _PROTO ((mpq_srcptr, mpq_srcptr));
+int mpq_cmp_ui _PROTO ((mpq_srcptr, unsigned long int, unsigned long int));
  void mpq_inv _PROTO ((mpq_ptr, mpq_srcptr));
  void mpq_set_num _PROTO ((mpq_ptr, mpz_srcptr));
  void mpq_set_den _PROTO ((mpq_ptr, mpz_srcptr));
  void mpq_get_num _PROTO ((mpz_ptr, mpq_srcptr));
  void mpq_get_den _PROTO ((mpz_ptr, mpq_srcptr));
+double mpq_get_d _PROTO ((mpq_srcptr));
+void mpq_canonicalize _PROTO ((mpq_ptr));
  
  /**************** Float (i.e. F) routines.  ****************/
  
@@ -256,8 +294,9 @@ void mpf_add _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
  void mpf_add_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
  void mpf_clear _PROTO ((mpf_ptr));
  int mpf_cmp _PROTO ((mpf_srcptr, mpf_srcptr));
-int mpf_cmp_si _PROTO ((mpf_srcptr, long int));
+int mpf_cmp_si _PROTO ((mpf_srcptr, signed long int));
  int mpf_cmp_ui _PROTO ((mpf_srcptr, unsigned long int));
+int mpf_diff _PROTO ((mpf_srcptr, mpf_srcptr, unsigned long int));
  void mpf_div _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
  void mpf_div_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
  void mpf_div_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
@@ -266,11 +305,11 @@ char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
  void mpf_init _PROTO ((mpf_ptr));
  void mpf_init2 _PROTO ((mpf_ptr, mp_size_t));
  #ifdef _GMP_H_HAVE_FILE
-void mpf_inp_str _PROTO ((mpf_ptr, FILE *, int));
+size_t mpf_inp_str _PROTO ((mpf_ptr, FILE *, int));
  #endif
  void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr));
  void mpf_init_set_d _PROTO ((mpf_ptr, double));
-void mpf_init_set_si _PROTO ((mpf_ptr, long int));
+void mpf_init_set_si _PROTO ((mpf_ptr, signed long int));
  int mpf_init_set_str _PROTO ((mpf_ptr, char *, int));
  void mpf_init_set_ui _PROTO ((mpf_ptr, unsigned long int));
  void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
@@ -278,12 +317,14 @@ void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
  void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
  void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr));
  #ifdef _GMP_H_HAVE_FILE
-void mpf_out_str _PROTO ((mpf_ptr, int, size_t, FILE *));
+size_t mpf_out_str _PROTO ((FILE *, int, size_t, mpf_srcptr));
  #endif
+void mpf_random2 _PROTO ((mpf_ptr, mp_size_t, mp_size_t));
  void mpf_set _PROTO ((mpf_ptr, mpf_srcptr));
  void mpf_set_d _PROTO ((mpf_ptr, double));
  mp_size_t mpf_set_default_prec _PROTO ((mp_size_t));
-void mpf_set_si _PROTO ((mpf_ptr, long int));
+void mpf_set_prec _PROTO ((mpf_ptr, mp_size_t));
+void mpf_set_si _PROTO ((mpf_ptr, signed long int));
  int mpf_set_str _PROTO ((mpf_ptr, const char *, int));
  void mpf_set_ui _PROTO ((mpf_ptr, unsigned long int));
  size_t mpf_size _PROTO ((mpf_srcptr));
@@ -292,68 +333,93 @@ void mpf_sqrt_ui _PROTO ((mpf_ptr, unsigned long int));
  void mpf_sub _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
  void mpf_sub_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
  void mpf_ui_div _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
-
+void mpf_ui_sub _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+#if defined (__cplusplus)
+}
+#endif
  /************ Low level positive-integer (i.e. N) routines.  ************/
  
  /* This is ugly, but we need to make usr calls reach the prefixed function.  */
-#define mpn_add_n      __mpn_add_n
-#define mpn_sub_n      __mpn_sub_n
-#define mpn_mul_1      __mpn_mul_1
-#define mpn_addmul_1   __mpn_addmul_1
-#define mpn_submul_1   __mpn_submul_1
-#define mpn_lshift     __mpn_lshift
-#define mpn_rshift     __mpn_rshift
-#define mpn_sub                __mpn_sub
-#define mpn_add                __mpn_add
-#define mpn_normal_size        __mpn_normal_size
-#define mpn_cmp                __mpn_cmp
-#define mpn_add_1      __mpn_add_1
-#define mpn_sub_1      __mpn_sub_1
-#define mpn_mul_n      __mpn_mul_n
-#define mpn_mul                __mpn_mul
-#define mpn_divmod     __mpn_divmod
-#define mpn_divmod_1   __mpn_divmod_1
-#define mpn_mod_1      __mpn_mod_1
-#define mpn_sqrt       __mpn_sqrt
-#define mpn_next_bit_set __mpn_next_bit_set
-#define mpn_popcount   __mpn_popcount
-#define mpn_hamdist    __mpn_hamdist
-#define mpn_random2    __mpn_random2
-#define mpn_set_str    __mpn_set_str
-#define mpn_get_str    __mpn_get_str
-#define mpn_gcd_1      __mpn_gcd_1
-
-mp_limb __mpn_add_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-mp_limb __mpn_sub_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-mp_limb __mpn_mul _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
-void __mpn_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-mp_limb __mpn_mul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb));
-mp_limb __mpn_addmul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb));
-mp_limb __mpn_submul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb));
-mp_limb __mpn_divmod _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
-mp_limb __mpn_divmod_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb));
-mp_limb __mpn_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb));
-mp_limb __mpn_lshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
-mp_limb __mpn_rshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
-mp_size_t __mpn_sqrt _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
-int __mpn_cmp _PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
-mp_size_t __mpn_next_bit_set _PROTO ((mp_srcptr, mp_size_t));
-mp_size_t __mpn_popcount _PROTO ((mp_srcptr, mp_size_t));
-mp_size_t __mpn_hamdist _PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
-void __mpn_random2 _PROTO ((mp_ptr, mp_size_t));
-mp_size_t __mpn_set_str _PROTO ((mp_ptr, const unsigned char *, size_t, int));
-size_t __mpn_get_str _PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
-mp_limb __mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb));
-
-
-static __gmp_inline mp_limb
-#if defined (__STDC__)
-__mpn_add_1 (register mp_ptr res_ptr,
-            register mp_srcptr s1_ptr,
-            register mp_size_t s1_size,
-            register mp_limb s2_limb)
+#define mpn_add                        __MPN(add)
+#define mpn_add_1              __MPN(add_1)
+#define mpn_add_n              __MPN(add_n)
+#define mpn_addmul_1           __MPN(addmul_1)
+#define mpn_bdivmod            __MPN(bdivmod)
+#define mpn_cmp                        __MPN(cmp)
+#define mpn_divmod_1           __MPN(divmod_1)
+#define mpn_divrem             __MPN(divrem)
+#define mpn_divrem_1           __MPN(divrem_1)
+#define mpn_dump               __MPN(dump)
+#define mpn_gcd                        __MPN(gcd)
+#define mpn_gcd_1              __MPN(gcd_1)
+#define mpn_gcdext             __MPN(gcdext)
+#define mpn_get_str            __MPN(get_str)
+#define mpn_hamdist            __MPN(hamdist)
+#define mpn_lshift             __MPN(lshift)
+#define mpn_mod_1              __MPN(mod_1)
+#define mpn_mul                        __MPN(mul)
+#define mpn_mul_1              __MPN(mul_1)
+#define mpn_mul_n              __MPN(mul_n)
+#define mpn_perfect_square_p   __MPN(perfect_square_p)
+#define mpn_popcount           __MPN(popcount)
+#define mpn_preinv_mod_1       __MPN(preinv_mod_1)
+#define mpn_random2            __MPN(random2)
+#define mpn_rshift             __MPN(rshift)
+#define mpn_scan0              __MPN(scan0)
+#define mpn_scan1              __MPN(scan1)
+#define mpn_set_str            __MPN(set_str)
+#define mpn_sqrtrem            __MPN(sqrtrem)
+#define mpn_sub                        __MPN(sub)
+#define mpn_sub_1              __MPN(sub_1)
+#define mpn_sub_n              __MPN(sub_n)
+#define mpn_submul_1           __MPN(submul_1)
+#define mpn_udiv_w_sdiv                __MPN(udiv_w_sdiv)
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+mp_limb mpn_add_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+mp_limb mpn_addmul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb));
+mp_limb mpn_bdivmod _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, unsigned long int));
+int mpn_cmp _PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
+mp_limb mpn_divmod_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb));
+mp_limb mpn_divrem _PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+mp_limb mpn_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb));
+void mpn_dump _PROTO ((mp_srcptr, mp_size_t));
+mp_size_t mpn_gcd _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_limb mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb));
+mp_size_t mpn_gcdext _PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+size_t mpn_get_str _PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
+mp_size_t mpn_hamdist _PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
+mp_limb mpn_lshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+mp_limb mpn_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb));
+mp_limb mpn_mul _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+mp_limb mpn_mul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb));
+void mpn_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+int mpn_perfect_square_p _PROTO ((mp_srcptr, mp_size_t));
+mp_size_t mpn_popcount _PROTO ((mp_srcptr, mp_size_t));
+mp_limb mpn_preinv_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb, mp_limb));
+void mpn_random2 _PROTO ((mp_ptr, mp_size_t));
+mp_limb mpn_rshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+mp_size_t mpn_scan0 _PROTO ((mp_srcptr, mp_size_t));
+mp_size_t mpn_scan1 _PROTO ((mp_srcptr, mp_size_t));
+mp_size_t mpn_set_str _PROTO ((mp_ptr, const unsigned char *, size_t, int));
+mp_size_t mpn_sqrtrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+mp_limb mpn_sub_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+mp_limb mpn_submul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb));
+#if defined (__cplusplus)
+}
+#endif
+
+#if defined (__GNUC__) || defined (_FORCE_INLINES)
+_EXTERN_INLINE mp_limb
+#if defined (__STDC__) || defined (__cplusplus)
+mpn_add_1 (register mp_ptr res_ptr,
+          register mp_srcptr s1_ptr,
+          register mp_size_t s1_size,
+          register mp_limb s2_limb)
  #else
-__mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       register mp_size_t s1_size;
@@ -388,15 +454,15 @@ __mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb)
    return 0;
  }
  
-static __gmp_inline mp_limb
-#if defined (__STDC__)
-__mpn_add (register mp_ptr res_ptr,
-          register mp_srcptr s1_ptr,
-          register mp_size_t s1_size,
-          register mp_srcptr s2_ptr,
-          register mp_size_t s2_size)
+_EXTERN_INLINE mp_limb
+#if defined (__STDC__) || defined (__cplusplus)
+mpn_add (register mp_ptr res_ptr,
+        register mp_srcptr s1_ptr,
+        register mp_size_t s1_size,
+        register mp_srcptr s2_ptr,
+        register mp_size_t s2_size)
  #else
-__mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
+mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       register mp_size_t s1_size;
@@ -407,24 +473,24 @@ __mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
    mp_limb cy_limb = 0;
  
    if (s2_size != 0)
-    cy_limb = __mpn_add_n (res_ptr, s1_ptr, s2_ptr, s2_size);
+    cy_limb = mpn_add_n (res_ptr, s1_ptr, s2_ptr, s2_size);
  
    if (s1_size - s2_size != 0)
-    cy_limb =  __mpn_add_1 (res_ptr + s2_size,
-                           s1_ptr + s2_size,
-                           s1_size - s2_size,
-                           cy_limb);
+    cy_limb = mpn_add_1 (res_ptr + s2_size,
+                        s1_ptr + s2_size,
+                        s1_size - s2_size,
+                        cy_limb);
    return cy_limb;
  }
  
-static __gmp_inline mp_limb
-#if defined (__STDC__)
-__mpn_sub_1 (register mp_ptr res_ptr,
-            register mp_srcptr s1_ptr,
-            register mp_size_t s1_size,
-            register mp_limb s2_limb)
+_EXTERN_INLINE mp_limb
+#if defined (__STDC__) || defined (__cplusplus)
+mpn_sub_1 (register mp_ptr res_ptr,
+          register mp_srcptr s1_ptr,
+          register mp_size_t s1_size,
+          register mp_limb s2_limb)
  #else
-__mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       register mp_size_t s1_size;
@@ -459,15 +525,15 @@ __mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb)
    return 0;
  }
  
-static __gmp_inline mp_limb
-#if defined (__STDC__)
-__mpn_sub (register mp_ptr res_ptr,
-          register mp_srcptr s1_ptr,
-          register mp_size_t s1_size,
-          register mp_srcptr s2_ptr,
-          register mp_size_t s2_size)
+_EXTERN_INLINE mp_limb
+#if defined (__STDC__) || defined (__cplusplus)
+mpn_sub (register mp_ptr res_ptr,
+        register mp_srcptr s1_ptr,
+        register mp_size_t s1_size,
+        register mp_srcptr s2_ptr,
+        register mp_size_t s2_size)
  #else
-__mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
+mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       register mp_size_t s1_size;
@@ -478,36 +544,46 @@ __mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
    mp_limb cy_limb = 0;
  
    if (s2_size != 0)
-    cy_limb = __mpn_sub_n (res_ptr, s1_ptr, s2_ptr, s2_size);
+    cy_limb = mpn_sub_n (res_ptr, s1_ptr, s2_ptr, s2_size);
  
    if (s1_size - s2_size != 0)
-    cy_limb =  __mpn_sub_1 (res_ptr + s2_size,
-                           s1_ptr + s2_size,
-                           s1_size - s2_size,
-                           cy_limb);
+    cy_limb = mpn_sub_1 (res_ptr + s2_size,
+                        s1_ptr + s2_size,
+                        s1_size - s2_size,
+                        cy_limb);
    return cy_limb;
  }
+#endif /* __GNUC__ */
  
-static __gmp_inline mp_size_t
-#if defined (__STDC__)
-__mpn_normal_size (mp_srcptr ptr, mp_size_t size)
-#else
-__mpn_normal_size (ptr, size)
-     mp_srcptr ptr;
-     mp_size_t size;
+/* Allow faster testing for negative, zero, and positive.  */
+#define mpz_sign(Z) ((Z)->_mp_size)
+#define mpf_sign(F) ((F)->_mp_size)
+#define mpq_sign(Q) ((Q)->_mp_num._mp_size)
+
+/* Allow direct user access to numerator and denominator of a mpq_t object.  */
+#define mpq_numref(Q) (&((Q)->_mp_num))
+#define mpq_denref(Q) (&((Q)->_mp_den))
+
+/* When using GCC, optimize certain common comparisons.  */
+#if defined (__GNUC__)
+#define mpz_cmp_ui(Z,UI) \
+  (__builtin_constant_p (UI) && (UI) == 0                              \
+   ? mpz_sign (Z) : mpz_cmp_ui (Z,UI))
+#define mpz_cmp_si(Z,UI) \
+  (__builtin_constant_p (UI) && (UI) == 0 ? mpz_sign (Z)               \
+   : __builtin_constant_p (UI) && (UI) > 0 ? mpz_cmp_ui (Z,UI)         \
+   : mpz_cmp_si (Z,UI))
+#define mpq_cmp_ui(Q,NUI,DUI) \
+  (__builtin_constant_p (NUI) && (NUI) == 0                            \
+   ? mpq_sign (Q) : mpq_cmp_ui (Q,NUI,DUI))
  #endif
-{
-  while (size)
-    {
-      size--;
-      if (ptr[size] != 0)
-       return size + 1;
-    }
-  return 0;
-}
  
-/* Compatibility with GMP 1.  */
+#define mpn_divmod(qp,np,nsize,dp,dsize) mpn_divrem (qp,0,np,nsize,dp,dsize)
+#if 0
+#define mpn_divmod_1(qp,np,nsize,dlimb) mpn_divrem_1 (qp,0,np,nsize,dlimb)
+#endif
  
+/* Compatibility with GMP 1.  */
  #define mpz_mdiv       mpz_fdiv_q
  #define mpz_mdivmod    mpz_fdiv_qr
  #define mpz_mmod       mpz_fdiv_r
@@ -516,10 +592,6 @@ __mpn_normal_size (ptr, size)
    ((r == 0) ? mpz_fdiv_q_ui (q,n,d) : mpz_fdiv_qr_ui (q,r,n,d))
  #define mpz_mmod_ui(r,n,d) \
    ((r == 0) ? mpz_fdiv_ui (n,d) : mpz_fdiv_r_ui (r,n,d))
-/* ??? Before release...
-#define mpz_div_2exp   mpz_fdiv_q_2exp
-#define mpz_mod_2exp   mpz_fdiv_r_2exp
-*/
  
  /* Useful synonyms, but not quite compatible with GMP 1.  */
  #define mpz_div                mpz_fdiv_q
@@ -527,10 +599,10 @@ __mpn_normal_size (ptr, size)
  #define mpz_div_ui     mpz_fdiv_q_ui
  #define mpz_divmod_ui  mpz_fdiv_qr_ui
  #define mpz_mod_ui     mpz_fdiv_r_ui
+#define mpz_div_2exp   mpz_fdiv_q_2exp
+#define mpz_mod_2exp   mpz_fdiv_r_2exp
  
-
-#define __GNU_MP__ 2
  #define __GNU_MP_VERSION 2
-#define __GNU_MP_VERSION_MINOR -900 /* ??? */
+#define __GNU_MP_VERSION_MINOR -927 /* ??? */
  #define __GMP_H__
  #endif /* __GMP_H__ */
diff --git a/stdlib/strtod.c b/stdlib/strtod.c

index 6b110f6..3818c81 100644 (file)
--- a/stdlib/strtod.c
+++ b/stdlib/strtod.c
@@ -36,6 +36,10 @@ Cambridge, MA 02139, USA.  */
  #include "../locale/localeinfo.h"
  #include <math.h>
  #include <stdlib.h>
+
+/* The gmp headers need some configuration frobs.  */
+#define HAVE_ALLOCA 1
+
  #include "gmp.h"
  #include "gmp-impl.h"
  #include <gmp-mparam.h>
diff --git a/sysdeps/alpha/addmul_1.s b/sysdeps/alpha/addmul_1.s

index 46d277d..8b168cb 100644 (file)
--- a/sysdeps/alpha/addmul_1.s
+++ b/sysdeps/alpha/addmul_1.s
@@ -26,16 +26,7 @@
   # size                r18
   # s2_limb     r19
  
- # This code runs at 42 cycles/limb on the 21064.
-
- # To improve performance for long multiplications, we would use
- # 'fetch' for S1 and 'fetch_m' for RES.  It's not obvious how to use
- # these instructions without slowing down the general code: 1. We can
- # only have two prefetches in operation at any time in the Alpha
- # architecture.  2. There will seldom be any special alignment
- # between RES_PTR and S1_PTR.  Maybe we can simply divide the current
- # loop into an inner and outer loop, having the inner loop handle
- # exactly one prefetch block?
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
  
         .set    noreorder
         .set    noat
@@ -52,7 +43,7 @@ __mpn_addmul_1:
         mulq    $2,$19,$3       # $3 = prod_low
         ldq     $5,0($16)       # $5 = *res_ptr
         umulh   $2,$19,$0       # $0 = prod_high
-       beq     $18,Lend1       # jump if size was == 1
+       beq     $18,.Lend1      # jump if size was == 1
         ldq     $2,0($17)       # $2 = s1_limb
         addq    $17,8,$17       # s1_ptr++
         subq    $18,1,$18       # size--
@@ -60,10 +51,10 @@ __mpn_addmul_1:
         cmpult  $3,$5,$4
         stq     $3,0($16)
         addq    $16,8,$16       # res_ptr++
-       beq     $18,Lend2       # jump if size was == 2
+       beq     $18,.Lend2      # jump if size was == 2
  
         .align  3
-Loop:  mulq    $2,$19,$3       # $3 = prod_low
+.Loop: mulq    $2,$19,$3       # $3 = prod_low
         ldq     $5,0($16)       # $5 = *res_ptr
         addq    $4,$0,$0        # cy_limb = cy_limb + 'cy'
         subq    $18,1,$18       # size--
@@ -77,9 +68,9 @@ Loop: mulq    $2,$19,$3       # $3 = prod_low
         stq     $3,0($16)
         addq    $16,8,$16       # res_ptr++
         addq    $5,$0,$0        # combine carries
-       bne     $18,Loop
+       bne     $18,.Loop
  
-Lend2: mulq    $2,$19,$3       # $3 = prod_low
+.Lend2:        mulq    $2,$19,$3       # $3 = prod_low
         ldq     $5,0($16)       # $5 = *res_ptr
         addq    $4,$0,$0        # cy_limb = cy_limb + 'cy'
         umulh   $2,$19,$4       # $4 = cy_limb
@@ -91,7 +82,7 @@ Lend2:        mulq    $2,$19,$3       # $3 = prod_low
         addq    $5,$0,$0        # combine carries
         addq    $4,$0,$0        # cy_limb = prod_high + cy
         ret     $31,($26),1
-Lend1: addq    $5,$3,$3
+.Lend1:        addq    $5,$3,$3
         cmpult  $3,$5,$5
         stq     $3,0($16)
         addq    $0,$5,$0
diff --git a/sysdeps/alpha/alphaev5/add_n.s b/sysdeps/alpha/alphaev5/add_n.s

index 2aaf041..66cf82b 100644 (file)
--- a/sysdeps/alpha/alphaev5/add_n.s
+++ b/sysdeps/alpha/alphaev5/add_n.s
@@ -35,84 +35,113 @@
  __mpn_add_n:
         .frame  $30,0,$26,0
  
-       ldq     $3,0($17)
-       ldq     $4,0($18)
-
-       subq    $19,1,$19
-       and     $19,4-1,$2      # number of limbs in first loop
-       bis     $31,$31,$0
-       beq     $2,.L0          # if multiple of 4 limbs, skip first loop
-
-       subq    $19,$2,$19
-
-.Loop0:        subq    $2,1,$2
+       or      $31,$31,$25             # clear cy
+       subq    $19,4,$19               # decr loop cnt
+       blt     $19,.Lend2              # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+       ldq     $0,0($18)
+       ldq     $1,8($18)
+       ldq     $4,0($17)
         ldq     $5,8($17)
-       addq    $4,$0,$4
-       ldq     $6,8($18)
-       cmpult  $4,$0,$1
-       addq    $3,$4,$4
-       cmpult  $4,$3,$0
-       stq     $4,0($16)
-       or      $0,$1,$0
-
-       addq    $17,8,$17
-       addq    $18,8,$18
-       bis     $5,$5,$3
-       bis     $6,$6,$4
-       addq    $16,8,$16
-       bne     $2,.Loop0
-
-.L0:   beq     $19,.Lend
-
+       addq    $17,32,$17              # update s1_ptr
+       ldq     $2,16($18)
+       addq    $0,$4,$20               # 1st main add
+       ldq     $3,24($18)
+       subq    $19,4,$19               # decr loop cnt
+       ldq     $6,-16($17)
+       cmpult  $20,$0,$25              # compute cy from last add
+       ldq     $7,-8($17)
+       addq    $1,$25,$28              # cy add
+       addq    $18,32,$18              # update s2_ptr
+       addq    $5,$28,$21              # 2nd main add
+       cmpult  $28,$25,$8              # compute cy from last add
+       blt     $19,.Lend1              # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
         .align  4
-.Loop: subq    $19,4,$19
-       unop
-
-       ldq     $6,8($18)
-       addq    $4,$0,$0
+.Loop: cmpult  $21,$28,$25             # compute cy from last add
+       ldq     $0,0($18)
+       or      $8,$25,$25              # combine cy from the two adds
+       ldq     $1,8($18)
+       addq    $2,$25,$28              # cy add
+       ldq     $4,0($17)
+       addq    $28,$6,$22              # 3rd main add
         ldq     $5,8($17)
-       cmpult  $0,$4,$1
-       ldq     $4,16($18)
-       addq    $3,$0,$20
-       cmpult  $20,$3,$0
-       ldq     $3,16($17)
-       or      $0,$1,$0
-       addq    $6,$0,$0
-       cmpult  $0,$6,$1
-       ldq     $6,24($18)
-       addq    $5,$0,$21
-       cmpult  $21,$5,$0
-       ldq     $5,24($17)
-       or      $0,$1,$0
-       addq    $4,$0,$0
-       cmpult  $0,$4,$1
-       ldq     $4,32($18)
-       addq    $3,$0,$22
-       cmpult  $22,$3,$0
-       ldq     $3,32($17)
-       or      $0,$1,$0
-       addq    $6,$0,$0
-       cmpult  $0,$6,$1
-       addq    $5,$0,$23
-       cmpult  $23,$5,$0
-       or      $0,$1,$0
-
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $22,$28,$25             # compute cy from last add
         stq     $20,0($16)
+       or      $8,$25,$25              # combine cy from the two adds
         stq     $21,8($16)
-       stq     $22,16($16)
-       stq     $23,24($16)
-
-       addq    $17,32,$17
-       addq    $18,32,$18
-       addq    $16,32,$16
-       bne     $19,.Loop
+       addq    $3,$25,$28              # cy add
+       addq    $28,$7,$23              # 4th main add
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $23,$28,$25             # compute cy from last add
+       addq    $17,32,$17              # update s1_ptr
+       or      $8,$25,$25              # combine cy from the two adds
+       addq    $16,32,$16              # update res_ptr
+       addq    $0,$25,$28              # cy add
+       ldq     $2,16($18)
+       addq    $4,$28,$20              # 1st main add
+       ldq     $3,24($18)
+       cmpult  $28,$25,$8              # compute cy from last add
+       ldq     $6,-16($17)
+       cmpult  $20,$28,$25             # compute cy from last add
+       ldq     $7,-8($17)
+       or      $8,$25,$25              # combine cy from the two adds
+       subq    $19,4,$19               # decr loop cnt
+       stq     $22,-16($16)
+       addq    $1,$25,$28              # cy add
+       stq     $23,-8($16)
+       addq    $5,$28,$21              # 2nd main add
+       addq    $18,32,$18              # update s2_ptr
+       cmpult  $28,$25,$8              # compute cy from last add
+       bge     $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1:        cmpult  $21,$28,$25             # compute cy from last add
+       or      $8,$25,$25              # combine cy from the two adds
+       addq    $2,$25,$28              # cy add
+       addq    $28,$6,$22              # 3rd main add
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $22,$28,$25             # compute cy from last add
+       stq     $20,0($16)
+       or      $8,$25,$25              # combine cy from the two adds
+       stq     $21,8($16)
+       addq    $3,$25,$28              # cy add
+       addq    $28,$7,$23              # 4th main add
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $23,$28,$25             # compute cy from last add
+       or      $8,$25,$25              # combine cy from the two adds
+       addq    $16,32,$16              # update res_ptr
+       stq     $22,-16($16)
+       stq     $23,-8($16)
+.Lend2:        addq    $19,4,$19               # restore loop cnt
+       beq     $19,.Lret
+ # Start software pipeline for 2nd loop
+       ldq     $0,0($18)
+       ldq     $4,0($17)
+       subq    $19,1,$19
+       beq     $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+       .align  4
+.Loop0:        addq    $0,$25,$28              # cy add
+       ldq     $0,8($18)
+       addq    $4,$28,$20              # main add
+       ldq     $4,8($17)
+       addq    $18,8,$18
+       cmpult  $28,$25,$8              # compute cy from last add
+       addq    $17,8,$17
+       stq     $20,0($16)
+       cmpult  $20,$28,$25             # compute cy from last add
+       subq    $19,1,$19               # decr loop cnt
+       or      $8,$25,$25              # combine cy from the two adds
+       addq    $16,8,$16
+       bne     $19,.Loop0
+.Lend0:        addq    $0,$25,$28              # cy add
+       addq    $4,$28,$20              # main add
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $20,$28,$25             # compute cy from last add
+       stq     $20,0($16)
+       or      $8,$25,$25              # combine cy from the two adds
  
-.Lend: addq    $4,$0,$4
-       cmpult  $4,$0,$1
-       addq    $3,$4,$4
-       cmpult  $4,$3,$0
-       stq     $4,0($16)
-       or      $0,$1,$0
+.Lret: or      $25,$31,$0              # return cy
         ret     $31,($26),1
-
         .end    __mpn_add_n
diff --git a/sysdeps/alpha/alphaev5/lshift.s b/sysdeps/alpha/alphaev5/lshift.s

index fdb0895..392b424 100644 (file)
--- a/sysdeps/alpha/alphaev5/lshift.s
+++ b/sysdeps/alpha/alphaev5/lshift.s
@@ -25,7 +25,7 @@
   # size                r18
   # cnt         r19
  
- # This code runs at 4.25 cycles/limb on the EV5.
+ # This code runs at 3.25 cycles/limb on the EV5.
  
         .set    noreorder
         .set    noat
@@ -44,11 +44,11 @@ __mpn_lshift:
         and     $18,4-1,$28     # number of limbs in first loop
         srl     $4,$20,$0       # compute function result
  
-       beq     $28,L0
+       beq     $28,.L0
         subq    $18,$28,$18
  
         .align  3
-Loop0: ldq     $3,-16($17)
+.Loop0:        ldq     $3,-16($17)
         subq    $16,8,$16
         sll     $4,$19,$5
         subq    $17,8,$17
@@ -57,17 +57,17 @@ Loop0:      ldq     $3,-16($17)
         or      $3,$3,$4
         or      $5,$6,$8
         stq     $8,0($16)
-       bne     $28,Loop0
+       bne     $28,.Loop0
  
-L0:    sll     $4,$19,$24
-       beq     $18,Lend
+.L0:   sll     $4,$19,$24
+       beq     $18,.Lend
   # warm up phase 1
         ldq     $1,-16($17)
         subq    $18,4,$18
         ldq     $2,-24($17)
         ldq     $3,-32($17)
         ldq     $4,-40($17)
-       beq     $18,Lcool1
+       beq     $18,.Lend1
   # warm up phase 2
         srl     $1,$20,$7
         sll     $1,$19,$21
@@ -84,10 +84,10 @@ L0: sll     $4,$19,$24
         sll     $4,$19,$24
         ldq     $4,-72($17)
         subq    $18,4,$18
-       beq     $18,Lcool1
+       beq     $18,.Lend2
         .align  4
   # main loop
-Loop:  stq     $7,-8($16)
+.Loop: stq     $7,-8($16)
         or      $5,$22,$5
         stq     $8,-16($16)
         or      $6,$23,$6
@@ -113,16 +113,14 @@ Loop:     stq     $7,-8($16)
         subq    $16,32,$16
  
         srl     $4,$20,$6
-       ldq     $3,-96($17
+       ldq     $3,-96($17)
         sll     $4,$19,$24
         ldq     $4,-104($17)
  
         subq    $17,32,$17
-       bne     $18,Loop
-       unop
-       unop
+       bne     $18,.Loop
   # cool down phase 2/1
-Lcool1:        stq     $7,-8($16)
+.Lend2:        stq     $7,-8($16)
         or      $5,$22,$5
         stq     $8,-16($16)
         or      $6,$23,$6
@@ -150,7 +148,7 @@ Lcool1:     stq     $7,-8($16)
         ret     $31,($26),1
  
   # cool down phase 1/1
-Lcool1:        srl     $1,$20,$7
+.Lend1:        srl     $1,$20,$7
         sll     $1,$19,$21
         srl     $2,$20,$8
         sll     $2,$19,$22
@@ -170,6 +168,6 @@ Lcool1:     srl     $1,$20,$7
         stq     $24,-40($16)
         ret     $31,($26),1
  
-Lend   stq     $24,-8($16)
+.Lend: stq     $24,-8($16)
         ret     $31,($26),1
         .end    __mpn_lshift
diff --git a/sysdeps/alpha/alphaev5/rshift.s b/sysdeps/alpha/alphaev5/rshift.s

index 1da9960..d20dde3 100644 (file)
--- a/sysdeps/alpha/alphaev5/rshift.s
+++ b/sysdeps/alpha/alphaev5/rshift.s
@@ -25,7 +25,7 @@
   # size                r18
   # cnt         r19
  
- # This code runs at 4.25 cycles/limb on the EV5.
+ # This code runs at 3.25 cycles/limb on the EV5.
  
         .set    noreorder
         .set    noat
@@ -42,11 +42,11 @@ __mpn_rshift:
         and     $18,4-1,$28     # number of limbs in first loop
         sll     $4,$20,$0       # compute function result
  
-       beq     $28,L0
+       beq     $28,.L0
         subq    $18,$28,$18
  
         .align  3
-Loop0: ldq     $3,8($17)
+.Loop0:        ldq     $3,8($17)
         addq    $16,8,$16
         srl     $4,$19,$5
         addq    $17,8,$17
@@ -55,17 +55,17 @@ Loop0:      ldq     $3,8($17)
         or      $3,$3,$4
         or      $5,$6,$8
         stq     $8,-8($16)
-       bne     $28,Loop0
+       bne     $28,.Loop0
  
-L0:    srl     $4,$19,$24
-       beq     $18,Lend
+.L0:   srl     $4,$19,$24
+       beq     $18,.Lend
   # warm up phase 1
         ldq     $1,8($17)
         subq    $18,4,$18
         ldq     $2,16($17)
         ldq     $3,24($17)
         ldq     $4,32($17)
-       beq     $18,Lcool1
+       beq     $18,.Lend1
   # warm up phase 2
         sll     $1,$20,$7
         srl     $1,$19,$21
@@ -82,10 +82,10 @@ L0: srl     $4,$19,$24
         srl     $4,$19,$24
         ldq     $4,64($17)
         subq    $18,4,$18
-       beq     $18,Lcool2
+       beq     $18,.Lend2
         .align  4
   # main loop
-Loop:  stq     $7,0($16)
+.Loop: stq     $7,0($16)
         or      $5,$22,$5
         stq     $8,8($16)
         or      $6,$23,$6
@@ -116,11 +116,9 @@ Loop:      stq     $7,0($16)
         ldq     $4,96($17)
  
         addq    $17,32,$17
-       bne     $18,Loop
-       unop
-       unop
+       bne     $18,.Loop
   # cool down phase 2/1
-Lcool2:        stq     $7,0($16)
+.Lend2:        stq     $7,0($16)
         or      $5,$22,$5
         stq     $8,8($16)
         or      $6,$23,$6
@@ -148,7 +146,7 @@ Lcool2:     stq     $7,0($16)
         ret     $31,($26),1
  
   # cool down phase 1/1
-Lcool1:        sll     $1,$20,$7
+.Lend1:        sll     $1,$20,$7
         srl     $1,$19,$21
         sll     $2,$20,$8
         srl     $2,$19,$22
@@ -168,6 +166,6 @@ Lcool1:     sll     $1,$20,$7
         stq     $24,32($16)
         ret     $31,($26),1
  
-Lend:  stq     $24,0($16)
+.Lend: stq     $24,0($16)
         ret     $31,($26),1
         .end    __mpn_rshift
diff --git a/sysdeps/alpha/alphaev5/sub_n.s b/sysdeps/alpha/alphaev5/sub_n.s

new file mode 100644 (file)

index 0000000..c9f3a4e
--- /dev/null
+++ b/sysdeps/alpha/alphaev5/sub_n.s
@@ -0,0 +1,148 @@
+ # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr     $16
+ # s1_ptr      $17
+ # s2_ptr      $18
+ # size                $19
+
+       .set    noreorder
+       .set    noat
+.text
+       .align  3
+       .globl  __mpn_sub_n
+       .ent    __mpn_sub_n
+__mpn_sub_n:
+       .frame  $30,0,$26,0
+
+       or      $31,$31,$25             # clear cy
+       subq    $19,4,$19               # decr loop cnt
+       blt     $19,.Lend2              # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+       ldq     $0,0($18)
+       ldq     $1,8($18)
+       ldq     $4,0($17)
+       ldq     $5,8($17)
+       addq    $17,32,$17              # update s1_ptr
+       ldq     $2,16($18)
+       subq    $4,$0,$20               # 1st main sub
+       ldq     $3,24($18)
+       subq    $19,4,$19               # decr loop cnt
+       ldq     $6,-16($17)
+       cmpult  $4,$20,$25              # compute cy from last sub
+       ldq     $7,-8($17)
+       addq    $1,$25,$28              # cy add
+       addq    $18,32,$18              # update s2_ptr
+       subq    $5,$28,$21              # 2nd main sub
+       cmpult  $28,$25,$8              # compute cy from last add
+       blt     $19,.Lend1              # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
+       .align  4
+.Loop: cmpult  $5,$21,$25              # compute cy from last add
+       ldq     $0,0($18)
+       or      $8,$25,$25              # combine cy from the two adds
+       ldq     $1,8($18)
+       addq    $2,$25,$28              # cy add
+       ldq     $4,0($17)
+       subq    $6,$28,$22              # 3rd main sub
+       ldq     $5,8($17)
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $6,$22,$25              # compute cy from last add
+       stq     $20,0($16)
+       or      $8,$25,$25              # combine cy from the two adds
+       stq     $21,8($16)
+       addq    $3,$25,$28              # cy add
+       subq    $7,$28,$23              # 4th main sub
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $7,$23,$25              # compute cy from last add
+       addq    $17,32,$17              # update s1_ptr
+       or      $8,$25,$25              # combine cy from the two adds
+       addq    $16,32,$16              # update res_ptr
+       addq    $0,$25,$28              # cy add
+       ldq     $2,16($18)
+       subq    $4,$28,$20              # 1st main sub
+       ldq     $3,24($18)
+       cmpult  $28,$25,$8              # compute cy from last add
+       ldq     $6,-16($17)
+       cmpult  $4,$20,$25              # compute cy from last add
+       ldq     $7,-8($17)
+       or      $8,$25,$25              # combine cy from the two adds
+       subq    $19,4,$19               # decr loop cnt
+       stq     $22,-16($16)
+       addq    $1,$25,$28              # cy add
+       stq     $23,-8($16)
+       subq    $5,$28,$21              # 2nd main sub
+       addq    $18,32,$18              # update s2_ptr
+       cmpult  $28,$25,$8              # compute cy from last add
+       bge     $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1:        cmpult  $5,$21,$25              # compute cy from last add
+       or      $8,$25,$25              # combine cy from the two adds
+       addq    $2,$25,$28              # cy add
+       subq    $6,$28,$22              # 3rd main sub
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $6,$22,$25              # compute cy from last add
+       stq     $20,0($16)
+       or      $8,$25,$25              # combine cy from the two adds
+       stq     $21,8($16)
+       addq    $3,$25,$28              # cy add
+       subq    $7,$28,$23              # 4th main sub
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $7,$23,$25              # compute cy from last add
+       or      $8,$25,$25              # combine cy from the two adds
+       addq    $16,32,$16              # update res_ptr
+       stq     $22,-16($16)
+       stq     $23,-8($16)
+.Lend2:        addq    $19,4,$19               # restore loop cnt
+       beq     $19,.Lret
+ # Start software pipeline for 2nd loop
+       ldq     $0,0($18)
+       ldq     $4,0($17)
+       subq    $19,1,$19
+       beq     $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+       .align  4
+.Loop0:        addq    $0,$25,$28              # cy add
+       ldq     $0,8($18)
+       subq    $4,$28,$20              # main sub
+       ldq     $1,8($17)
+       addq    $18,8,$18
+       cmpult  $28,$25,$8              # compute cy from last add
+       addq    $17,8,$17
+       stq     $20,0($16)
+       cmpult  $4,$20,$25              # compute cy from last add
+       subq    $19,1,$19               # decr loop cnt
+       or      $8,$25,$25              # combine cy from the two adds
+       addq    $16,8,$16
+       or      $1,$31,$4
+       bne     $19,.Loop0
+.Lend0:        addq    $0,$25,$28              # cy add
+       subq    $4,$28,$20              # main sub
+       cmpult  $28,$25,$8              # compute cy from last add
+       cmpult  $4,$20,$25              # compute cy from last add
+       stq     $20,0($16)
+       or      $8,$25,$25              # combine cy from the two adds
+
+.Lret: or      $25,$31,$0              # return cy
+       ret     $31,($26),1
+       .end    __mpn_sub_n
diff --git a/sysdeps/alpha/lshift.s b/sysdeps/alpha/lshift.s

index c284349..aa8417b 100644 (file)
--- a/sysdeps/alpha/lshift.s
+++ b/sysdeps/alpha/lshift.s
@@ -53,11 +53,11 @@ __mpn_lshift:
         and     $18,4-1,$20     # number of limbs in first loop
         srl     $4,$7,$0        # compute function result
  
-       beq     $20,L0
+       beq     $20,.L0
         subq    $18,$20,$18
  
         .align  3
-Loop0:
+.Loop0:
         ldq     $3,-8($17)
         subq    $16,8,$16
         subq    $17,8,$17
@@ -67,12 +67,12 @@ Loop0:
         bis     $3,$3,$4
         bis     $5,$6,$8
         stq     $8,0($16)
-       bne     $20,Loop0
+       bne     $20,.Loop0
  
-L0:    beq     $18,Lend
+.L0:   beq     $18,.Lend
  
         .align  3
-Loop:  ldq     $3,-8($17)
+.Loop: ldq     $3,-8($17)
         subq    $16,32,$16
         subq    $18,4,$18
         sll     $4,$19,$5
@@ -100,9 +100,9 @@ Loop:       ldq     $3,-8($17)
         bis     $1,$2,$8
         stq     $8,0($16)
  
-       bgt     $18,Loop
+       bgt     $18,.Loop
  
-Lend:  sll     $4,$19,$8
+.Lend: sll     $4,$19,$8
         stq     $8,-8($16)
         ret     $31,($26),1
         .end    __mpn_lshift
diff --git a/sysdeps/alpha/mul_1.s b/sysdeps/alpha/mul_1.s

index 3ef194d..58a63df 100644 (file)
--- a/sysdeps/alpha/mul_1.s
+++ b/sysdeps/alpha/mul_1.s
@@ -1,7 +1,7 @@
   # Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
   # the result in a second limb vector.
  
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  
   # This file is part of the GNU MP Library.
  
diff --git a/sysdeps/alpha/rshift.s b/sysdeps/alpha/rshift.s

index 74eab04..037b776 100644 (file)
--- a/sysdeps/alpha/rshift.s
+++ b/sysdeps/alpha/rshift.s
@@ -34,7 +34,7 @@
   # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
   # 2. Only aligned instruction pairs can be paired.
   # 3. The store buffer or silo might not be able to deal with the bandwidth.
-      
+
         .set    noreorder
         .set    noat
  .text
@@ -51,11 +51,11 @@ __mpn_rshift:
         and     $18,4-1,$20     # number of limbs in first loop
         sll     $4,$7,$0        # compute function result
  
-       beq     $20,L0
+       beq     $20,.L0
         subq    $18,$20,$18
  
         .align  3
-Loop0:
+.Loop0:
         ldq     $3,0($17)
         addq    $16,8,$16
         addq    $17,8,$17
@@ -65,12 +65,12 @@ Loop0:
         bis     $3,$3,$4
         bis     $5,$6,$8
         stq     $8,-8($16)
-       bne     $20,Loop0
+       bne     $20,.Loop0
  
-L0:    beq     $18,Lend
+.L0:   beq     $18,.Lend
  
         .align  3
-Loop:  ldq     $3,0($17)
+.Loop: ldq     $3,0($17)
         addq    $16,32,$16
         subq    $18,4,$18
         srl     $4,$19,$5
@@ -98,9 +98,9 @@ Loop: ldq     $3,0($17)
         bis     $1,$2,$8
         stq     $8,-8($16)
  
-       bgt     $18,Loop
+       bgt     $18,.Loop
  
-Lend:  srl     $4,$19,$8
+.Lend: srl     $4,$19,$8
         stq     $8,0($16)
         ret     $31,($26),1
         .end    __mpn_rshift
diff --git a/sysdeps/alpha/submul_1.s b/sysdeps/alpha/submul_1.s

index acaa11c..292b2c1 100644 (file)
--- a/sysdeps/alpha/submul_1.s
+++ b/sysdeps/alpha/submul_1.s
@@ -26,16 +26,7 @@
   # size                r18
   # s2_limb     r19
  
- # This code runs at 42 cycles/limb on the 21064.
-
- # To improve performance for long multiplications, we would use
- # 'fetch' for S1 and 'fetch_m' for RES.  It's not obvious how to use
- # these instructions without slowing down the general code: 1. We can
- # only have two prefetches in operation at any time in the Alpha
- # architecture.  2. There will seldom be any special alignment
- # between RES_PTR and S1_PTR.  Maybe we can simply divide the current
- # loop into an inner and outer loop, having the inner loop handle
- # exactly one prefetch block?
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
  
         .set    noreorder
         .set    noat
@@ -52,7 +43,7 @@ __mpn_submul_1:
         mulq    $2,$19,$3       # $3 = prod_low
         ldq     $5,0($16)       # $5 = *res_ptr
         umulh   $2,$19,$0       # $0 = prod_high
-       beq     $18,Lend1       # jump if size was == 1
+       beq     $18,.Lend1      # jump if size was == 1
         ldq     $2,0($17)       # $2 = s1_limb
         addq    $17,8,$17       # s1_ptr++
         subq    $18,1,$18       # size--
@@ -60,10 +51,10 @@ __mpn_submul_1:
         cmpult  $5,$3,$4
         stq     $3,0($16)
         addq    $16,8,$16       # res_ptr++
-       beq     $18,Lend2       # jump if size was == 2
+       beq     $18,.Lend2      # jump if size was == 2
  
         .align  3
-Loop:  mulq    $2,$19,$3       # $3 = prod_low
+.Loop: mulq    $2,$19,$3       # $3 = prod_low
         ldq     $5,0($16)       # $5 = *res_ptr
         addq    $4,$0,$0        # cy_limb = cy_limb + 'cy'
         subq    $18,1,$18       # size--
@@ -77,9 +68,9 @@ Loop: mulq    $2,$19,$3       # $3 = prod_low
         stq     $3,0($16)
         addq    $16,8,$16       # res_ptr++
         addq    $5,$0,$0        # combine carries
-       bne     $18,Loop
+       bne     $18,.Loop
  
-Lend2: mulq    $2,$19,$3       # $3 = prod_low
+.Lend2:        mulq    $2,$19,$3       # $3 = prod_low
         ldq     $5,0($16)       # $5 = *res_ptr
         addq    $4,$0,$0        # cy_limb = cy_limb + 'cy'
         umulh   $2,$19,$4       # $4 = cy_limb
@@ -91,7 +82,7 @@ Lend2:        mulq    $2,$19,$3       # $3 = prod_low
         addq    $5,$0,$0        # combine carries
         addq    $4,$0,$0        # cy_limb = prod_high + cy
         ret     $31,($26),1
-Lend1: subq    $5,$3,$3
+.Lend1:        subq    $5,$3,$3
         cmpult  $5,$3,$5
         stq     $3,0($16)
         addq    $0,$5,$0
diff --git a/sysdeps/alpha/udiv_qrnnd.S b/sysdeps/alpha/udiv_qrnnd.S

index bafafd6..ce590ed 100644 (file)
--- a/sysdeps/alpha/udiv_qrnnd.S
+++ b/sysdeps/alpha/udiv_qrnnd.S
@@ -1,6 +1,6 @@
   # Alpha 21064 __udiv_qrnnd
  
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  
   # This file is part of the GNU MP Library.
  
@@ -21,13 +21,11 @@
  
          .set noreorder
          .set noat
-
  .text
-        .align 3
-        .globl __udiv_qrnnd
-        .ent __udiv_qrnnd 0
+        .align 3
+        .globl __udiv_qrnnd
+        .ent   __udiv_qrnnd
  __udiv_qrnnd:
-__udiv_qrnnd..ng:
          .frame $30,0,$26,0
          .prologue 0
  #define cnt    $2
@@ -39,9 +37,9 @@ __udiv_qrnnd..ng:
  #define qb     $20
  
         ldiq    cnt,16
-       blt     d,Largedivisor
+       blt     d,.Largedivisor
  
-Loop1: cmplt   n0,0,tmp
+.Loop1:        cmplt   n0,0,tmp
         addq    n1,n1,n1
         bis     n1,tmp,n1
         addq    n0,n0,n0
@@ -74,12 +72,12 @@ Loop1:      cmplt   n0,0,tmp
         cmovne  qb,tmp,n1
         bis     n0,qb,n0
         subq    cnt,1,cnt
-       bgt     cnt,Loop1
+       bgt     cnt,.Loop1
         stq     n1,0(rem_ptr)
         bis     $31,n0,$0
         ret     $31,($26),1
  
-Largedivisor:
+.Largedivisor:
         and     n0,1,$4
  
         srl     n0,1,n0
@@ -91,7 +89,7 @@ Largedivisor:
         srl     d,1,$5
         addq    $5,$6,$5
  
-Loop2: cmplt   n0,0,tmp
+.Loop2:        cmplt   n0,0,tmp
         addq    n1,n1,n1
         bis     n1,tmp,n1
         addq    n0,n0,n0
@@ -124,27 +122,27 @@ Loop2:    cmplt   n0,0,tmp
         cmovne  qb,tmp,n1
         bis     n0,qb,n0
         subq    cnt,1,cnt
-       bgt     cnt,Loop2
+       bgt     cnt,.Loop2
  
         addq    n1,n1,n1
         addq    $4,n1,n1
-       bne     $6,Odd
+       bne     $6,.LOdd
         stq     n1,0(rem_ptr)
         bis     $31,n0,$0
         ret     $31,($26),1
  
-Odd:
+.LOdd:
         /* q' in n0. r' in n1 */
         addq    n1,n0,n1
         cmpult  n1,n0,tmp       # tmp := carry from addq
-       beq     tmp,LLp6
+       beq     tmp,.LLp6
         addq    n0,1,n0
         subq    n1,d,n1
-LLp6:  cmpult  n1,d,tmp
-       bne     tmp,LLp7
+.LLp6: cmpult  n1,d,tmp
+       bne     tmp,.LLp7
         addq    n0,1,n0
         subq    n1,d,n1
-LLp7:
+.LLp7:
         stq     n1,0(rem_ptr)
         bis     $31,n0,$0
         ret     $31,($26),1
diff --git a/sysdeps/generic/add_n.c b/sysdeps/generic/add_n.c

index 6989ab0..647548d 100644 (file)
--- a/sysdeps/generic/add_n.c
+++ b/sysdeps/generic/add_n.c
@@ -1,6 +1,6 @@
-/* __mpn_add_n -- Add two limb vectors of equal, non-zero length.
+/* mpn_add_n -- Add two limb vectors of equal, non-zero length.
  
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,9 +23,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  mp_limb
  #if __STDC__
-__mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
  #else
-__mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       register mp_srcptr s2_ptr;
diff --git a/sysdeps/generic/addmul_1.c b/sysdeps/generic/addmul_1.c

index fdf3541..6156cab 100644 (file)
--- a/sysdeps/generic/addmul_1.c
+++ b/sysdeps/generic/addmul_1.c
@@ -1,9 +1,9 @@
-/* __mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
     by S2_LIMB, add the S1_SIZE least significant limbs of the product to the
     limb vector pointed to by RES_PTR.  Return the most significant limb of
     the product, adjusted for carry-out from the addition.
  
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,7 +26,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  #include "longlong.h"
  
  mp_limb
-__mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       mp_size_t s1_size;
diff --git a/sysdeps/generic/cmp.c b/sysdeps/generic/cmp.c

index 144c885..e499b1e 100644 (file)
--- a/sysdeps/generic/cmp.c
+++ b/sysdeps/generic/cmp.c
@@ -1,6 +1,6 @@
-/* __mpn_cmp -- Compare two low-level natural-number integers.
+/* mpn_cmp -- Compare two low-level natural-number integers.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -28,9 +28,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  int
  #if __STDC__
-__mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
+mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
  #else
-__mpn_cmp (op1_ptr, op2_ptr, size)
+mpn_cmp (op1_ptr, op2_ptr, size)
       mp_srcptr op1_ptr;
       mp_srcptr op2_ptr;
       mp_size_t size;
diff --git a/sysdeps/generic/divmod_1.c b/sysdeps/generic/divmod_1.c

index 2989d36..c040327 100644 (file)
--- a/sysdeps/generic/divmod_1.c
+++ b/sysdeps/generic/divmod_1.c
@@ -1,4 +1,4 @@
-/* __mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) --
+/* mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) --
     Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
     Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
     Return the single-limb remainder.
@@ -6,7 +6,7 @@
  
     QUOT_PTR and DIVIDEND_PTR might point to the same limb.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -41,11 +41,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  mp_limb
  #if __STDC__
-__mpn_divmod_1 (mp_ptr quot_ptr,
+mpn_divmod_1 (mp_ptr quot_ptr,
               mp_srcptr dividend_ptr, mp_size_t dividend_size,
               mp_limb divisor_limb)
  #else
-__mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
+mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
       mp_ptr quot_ptr;
       mp_srcptr dividend_ptr;
       mp_size_t dividend_size;
diff --git a/sysdeps/generic/lshift.c b/sysdeps/generic/lshift.c

index 1ba0903..35794e4 100644 (file)
--- a/sysdeps/generic/lshift.c
+++ b/sysdeps/generic/lshift.c
@@ -1,6 +1,6 @@
-/* __mpn_lshift -- Shift left low level.
+/* mpn_lshift -- Shift left low level.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -32,11 +32,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  mp_limb
  #if __STDC__
-__mpn_lshift (register mp_ptr wp,
+mpn_lshift (register mp_ptr wp,
             register mp_srcptr up, mp_size_t usize,
             register unsigned int cnt)
  #else
-__mpn_lshift (wp, up, usize, cnt)
+mpn_lshift (wp, up, usize, cnt)
       register mp_ptr wp;
       register mp_srcptr up;
       mp_size_t usize;
diff --git a/sysdeps/generic/mod_1.c b/sysdeps/generic/mod_1.c

index 8a49fb4..0842f6b 100644 (file)
--- a/sysdeps/generic/mod_1.c
+++ b/sysdeps/generic/mod_1.c
@@ -1,4 +1,4 @@
-/* __mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
     Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
     Return the single-limb remainder.
     There are no constraints on the value of the divisor.
@@ -38,10 +38,10 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  mp_limb
  #if __STDC__
-__mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
-            mp_limb divisor_limb)
+mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+          mp_limb divisor_limb)
  #else
-__mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
+mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
       mp_srcptr dividend_ptr;
       mp_size_t dividend_size;
       mp_limb divisor_limb;
diff --git a/sysdeps/generic/mul.c b/sysdeps/generic/mul.c

index cd2acb5..3f3f41e 100644 (file)
--- a/sysdeps/generic/mul.c
+++ b/sysdeps/generic/mul.c
@@ -1,6 +1,6 @@
-/* __mpn_mul -- Multiply two natural numbers.
+/* mpn_mul -- Multiply two natural numbers.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -43,11 +43,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  mp_limb
  #if __STDC__
-__mpn_mul (mp_ptr prodp,
-         mp_srcptr up, mp_size_t usize,
-         mp_srcptr vp, mp_size_t vsize)
+mpn_mul (mp_ptr prodp,
+        mp_srcptr up, mp_size_t usize,
+        mp_srcptr vp, mp_size_t vsize)
  #else
-__mpn_mul (prodp, up, usize, vp, vsize)
+mpn_mul (prodp, up, usize, vp, vsize)
       mp_ptr prodp;
       mp_srcptr up;
       mp_size_t usize;
@@ -58,6 +58,7 @@ __mpn_mul (prodp, up, usize, vp, vsize)
    mp_ptr prod_endp = prodp + usize + vsize - 1;
    mp_limb cy;
    mp_ptr tspace;
+  TMP_DECL (marker);
  
    if (vsize < KARATSUBA_THRESHOLD)
      {
@@ -86,7 +87,7 @@ __mpn_mul (prodp, up, usize, vp, vsize)
           cy_limb = 0;
         }
        else
-       cy_limb = __mpn_mul_1 (prodp, up, usize, v_limb);
+       cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
  
        prodp[usize] = cy_limb;
        prodp++;
@@ -100,10 +101,10 @@ __mpn_mul (prodp, up, usize, vp, vsize)
             {
               cy_limb = 0;
               if (v_limb == 1)
-               cy_limb = __mpn_add_n (prodp, prodp, up, usize);
+               cy_limb = mpn_add_n (prodp, prodp, up, usize);
             }
           else
-           cy_limb = __mpn_addmul_1 (prodp, up, usize, v_limb);
+           cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
  
           prodp[usize] = cy_limb;
           prodp++;
@@ -111,7 +112,9 @@ __mpn_mul (prodp, up, usize, vp, vsize)
        return cy_limb;
      }
  
-  tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
+  TMP_MARK (marker);
+
+  tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
    MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
  
    prodp += vsize;
@@ -119,12 +122,12 @@ __mpn_mul (prodp, up, usize, vp, vsize)
    usize -= vsize;
    if (usize >= vsize)
      {
-      mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
+      mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
        do
         {
           MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
-         cy = __mpn_add_n (prodp, prodp, tp, vsize);
-         __mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
+         cy = mpn_add_n (prodp, prodp, tp, vsize);
+         mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
           prodp += vsize;
           up += vsize;
           usize -= vsize;
@@ -138,10 +141,11 @@ __mpn_mul (prodp, up, usize, vp, vsize)
  
    if (usize != 0)
      {
-      __mpn_mul (tspace, vp, vsize, up, usize);
-      cy = __mpn_add_n (prodp, prodp, tspace, vsize);
-      __mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
+      mpn_mul (tspace, vp, vsize, up, usize);
+      cy = mpn_add_n (prodp, prodp, tspace, vsize);
+      mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
      }
  
+  TMP_FREE (marker);
    return *prod_endp;
  }
diff --git a/sysdeps/generic/mul_1.c b/sysdeps/generic/mul_1.c

index 37dbc33..01fdbbb 100644 (file)
--- a/sysdeps/generic/mul_1.c
+++ b/sysdeps/generic/mul_1.c
@@ -1,7 +1,7 @@
-/* __mpn_mul_1 -- Multiply a limb vector with a single limb and
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and
     store the product in a second limb vector.
  
-Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +24,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  #include "longlong.h"
  
  mp_limb
-__mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       mp_size_t s1_size;
diff --git a/sysdeps/generic/mul_n.c b/sysdeps/generic/mul_n.c

index e37c5d8..049f63d 100644 (file)
--- a/sysdeps/generic/mul_n.c
+++ b/sysdeps/generic/mul_n.c
@@ -1,6 +1,6 @@
-/* __mpn_mul_n -- Multiply two natural numbers of length n.
+/* mpn_mul_n -- Multiply two natural numbers of length n.
  
-Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -41,13 +41,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  #define KARATSUBA_THRESHOLD 2
  #endif
  
-void
-#if __STDC__
-____mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
-#else
-____mpn_mul_n ();
-#endif
-
  /* Handle simple cases with traditional multiplication.
  
     This is the most critical code of multiplication.  All multiplies rely
@@ -57,9 +50,9 @@ ____mpn_mul_n ();
  
  void
  #if __STDC__
-____mpn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  #else
-____mpn_mul_n_basecase (prodp, up, vp, size)
+impn_mul_n_basecase (prodp, up, vp, size)
       mp_ptr prodp;
       mp_srcptr up;
       mp_srcptr vp;
@@ -82,7 +75,7 @@ ____mpn_mul_n_basecase (prodp, up, vp, size)
        cy_limb = 0;
      }
    else
-    cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
+    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
  
    prodp[size] = cy_limb;
    prodp++;
@@ -96,10 +89,10 @@ ____mpn_mul_n_basecase (prodp, up, vp, size)
         {
           cy_limb = 0;
           if (v_limb == 1)
-           cy_limb = __mpn_add_n (prodp, prodp, up, size);
+           cy_limb = mpn_add_n (prodp, prodp, up, size);
         }
        else
-       cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
+       cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
  
        prodp[size] = cy_limb;
        prodp++;
@@ -108,10 +101,10 @@ ____mpn_mul_n_basecase (prodp, up, vp, size)
  
  void
  #if __STDC__
-____mpn_mul_n (mp_ptr prodp,
+impn_mul_n (mp_ptr prodp,
              mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
  #else
-____mpn_mul_n (prodp, up, vp, size, tspace)
+impn_mul_n (prodp, up, vp, size, tspace)
       mp_ptr prodp;
       mp_srcptr up;
       mp_srcptr vp;
@@ -135,9 +128,9 @@ ____mpn_mul_n (prodp, up, vp, size, tspace)
        mp_limb cy_limb;
  
        MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
-      cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
+      cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
        prodp[esize + esize] = cy_limb;
-      cy_limb = __mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
+      cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
  
        prodp[esize + size] = cy_limb;
      }
@@ -170,24 +163,24 @@ ____mpn_mul_n (prodp, up, vp, size, tspace)
  
        /*** Product M.   ________________
                         |_(U1-U0)(V0-V1)_|  */
-      if (__mpn_cmp (up + hsize, up, hsize) >= 0)
+      if (mpn_cmp (up + hsize, up, hsize) >= 0)
         {
-         __mpn_sub_n (prodp, up + hsize, up, hsize);
+         mpn_sub_n (prodp, up + hsize, up, hsize);
           negflg = 0;
         }
        else
         {
-         __mpn_sub_n (prodp, up, up + hsize, hsize);
+         mpn_sub_n (prodp, up, up + hsize, hsize);
           negflg = 1;
         }
-      if (__mpn_cmp (vp + hsize, vp, hsize) >= 0)
+      if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
         {
-         __mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
+         mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
           negflg ^= 1;
         }
        else
         {
-         __mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
+         mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
           /* No change of NEGFLG.  */
         }
        /* Read temporary operands from low part of PROD.
@@ -197,13 +190,13 @@ ____mpn_mul_n (prodp, up, vp, size, tspace)
  
        /*** Add/copy product H.  */
        MPN_COPY (prodp + hsize, prodp + size, hsize);
-      cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+      cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
  
        /*** Add product M (if NEGFLG M is a negative number).  */
        if (negflg)
-       cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+       cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
        else
-       cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
  
        /*** Product L.   ________________  ________________
                         |________________||____U0 x V0_____|  */
@@ -214,22 +207,22 @@ ____mpn_mul_n (prodp, up, vp, size, tspace)
  
        /*** Add/copy Product L (twice).  */
  
-      cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+      cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
        if (cy)
-       __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+       mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
  
        MPN_COPY (prodp, tspace, hsize);
-      cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+      cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
        if (cy)
-       __mpn_add_1 (prodp + size, prodp + size, size, 1);
+       mpn_add_1 (prodp + size, prodp + size, size, 1);
      }
  }
  
  void
  #if __STDC__
-____mpn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
+impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
  #else
-____mpn_sqr_n_basecase (prodp, up, size)
+impn_sqr_n_basecase (prodp, up, size)
       mp_ptr prodp;
       mp_srcptr up;
       mp_size_t size;
@@ -251,7 +244,7 @@ ____mpn_sqr_n_basecase (prodp, up, size)
        cy_limb = 0;
      }
    else
-    cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
+    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
  
    prodp[size] = cy_limb;
    prodp++;
@@ -265,10 +258,10 @@ ____mpn_sqr_n_basecase (prodp, up, size)
         {
           cy_limb = 0;
           if (v_limb == 1)
-           cy_limb = __mpn_add_n (prodp, prodp, up, size);
+           cy_limb = mpn_add_n (prodp, prodp, up, size);
         }
        else
-       cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
+       cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
  
        prodp[size] = cy_limb;
        prodp++;
@@ -277,10 +270,10 @@ ____mpn_sqr_n_basecase (prodp, up, size)
  
  void
  #if __STDC__
-____mpn_sqr_n (mp_ptr prodp,
+impn_sqr_n (mp_ptr prodp,
              mp_srcptr up, mp_size_t size, mp_ptr tspace)
  #else
-____mpn_sqr_n (prodp, up, size, tspace)
+impn_sqr_n (prodp, up, size, tspace)
       mp_ptr prodp;
       mp_srcptr up;
       mp_size_t size;
@@ -303,9 +296,9 @@ ____mpn_sqr_n (prodp, up, size, tspace)
        mp_limb cy_limb;
  
        MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
-      cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
+      cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
        prodp[esize + esize] = cy_limb;
-      cy_limb = __mpn_addmul_1 (prodp + esize, up, size, up[esize]);
+      cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
  
        prodp[esize + size] = cy_limb;
      }
@@ -322,13 +315,13 @@ ____mpn_sqr_n (prodp, up, size, tspace)
  
        /*** Product M.   ________________
                         |_(U1-U0)(U0-U1)_|  */
-      if (__mpn_cmp (up + hsize, up, hsize) >= 0)
+      if (mpn_cmp (up + hsize, up, hsize) >= 0)
         {
-         __mpn_sub_n (prodp, up + hsize, up, hsize);
+         mpn_sub_n (prodp, up + hsize, up, hsize);
         }
        else
         {
-         __mpn_sub_n (prodp, up, up + hsize, hsize);
+         mpn_sub_n (prodp, up, up + hsize, hsize);
         }
  
        /* Read temporary operands from low part of PROD.
@@ -338,10 +331,10 @@ ____mpn_sqr_n (prodp, up, size, tspace)
  
        /*** Add/copy product H.  */
        MPN_COPY (prodp + hsize, prodp + size, hsize);
-      cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+      cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
  
        /*** Add product M (if NEGFLG M is a negative number).  */
-      cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+      cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
  
        /*** Product L.   ________________  ________________
                         |________________||____U0 x U0_____|  */
@@ -352,53 +345,56 @@ ____mpn_sqr_n (prodp, up, size, tspace)
  
        /*** Add/copy Product L (twice).  */
  
-      cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+      cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
        if (cy)
-       __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+       mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
  
        MPN_COPY (prodp, tspace, hsize);
-      cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+      cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
        if (cy)
-       __mpn_add_1 (prodp + size, prodp + size, size, 1);
+       mpn_add_1 (prodp + size, prodp + size, size, 1);
      }
  }
  
  /* This should be made into an inline function in gmp.h.  */
  inline void
  #if __STDC__
-__mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  #else
-__mpn_mul_n (prodp, up, vp, size)
+mpn_mul_n (prodp, up, vp, size)
       mp_ptr prodp;
       mp_srcptr up;
       mp_srcptr vp;
       mp_size_t size;
  #endif
  {
+  TMP_DECL (marker);
+  TMP_MARK (marker);
    if (up == vp)
      {
        if (size < KARATSUBA_THRESHOLD)
         {
-         ____mpn_sqr_n_basecase (prodp, up, size);
+         impn_sqr_n_basecase (prodp, up, size);
         }
        else
         {
           mp_ptr tspace;
-         tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
-         ____mpn_sqr_n (prodp, up, size, tspace);
+         tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+         impn_sqr_n (prodp, up, size, tspace);
         }
      }
    else
      {
        if (size < KARATSUBA_THRESHOLD)
         {
-         ____mpn_mul_n_basecase (prodp, up, vp, size);
+         impn_mul_n_basecase (prodp, up, vp, size);
         }
        else
         {
           mp_ptr tspace;
-         tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
-         ____mpn_mul_n (prodp, up, vp, size, tspace);
+         tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+         impn_mul_n (prodp, up, vp, size, tspace);
         }
      }
+  TMP_FREE (marker);
  }
diff --git a/sysdeps/generic/rshift.c b/sysdeps/generic/rshift.c

index 966cc7b..7ce02e0 100644 (file)
--- a/sysdeps/generic/rshift.c
+++ b/sysdeps/generic/rshift.c
@@ -1,6 +1,6 @@
-/* __mpn_rshift -- Shift right a low-level natural-number integer.
+/* mpn_rshift -- Shift right a low-level natural-number integer.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -32,11 +32,11 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  mp_limb
  #if __STDC__
-__mpn_rshift (register mp_ptr wp,
+mpn_rshift (register mp_ptr wp,
             register mp_srcptr up, mp_size_t usize,
             register unsigned int cnt)
  #else
-__mpn_rshift (wp, up, usize, cnt)
+mpn_rshift (wp, up, usize, cnt)
       register mp_ptr wp;
       register mp_srcptr up;
       mp_size_t usize;
diff --git a/sysdeps/generic/sub_n.c b/sysdeps/generic/sub_n.c

index 6b33e66..f3c83d1 100644 (file)
--- a/sysdeps/generic/sub_n.c
+++ b/sysdeps/generic/sub_n.c
@@ -1,6 +1,6 @@
-/* __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
  
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,9 +23,9 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  mp_limb
  #if __STDC__
-__mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
  #else
-__mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       register mp_srcptr s2_ptr;
diff --git a/sysdeps/generic/submul_1.c b/sysdeps/generic/submul_1.c

index 855dd3f..57122a5 100644 (file)
--- a/sysdeps/generic/submul_1.c
+++ b/sysdeps/generic/submul_1.c
@@ -1,9 +1,9 @@
-/* __mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
     by S2_LIMB, subtract the S1_SIZE least significant limbs of the product
     from the limb vector pointed to by RES_PTR.  Return the most significant
     limb of the product, adjusted for carry-out from the subtraction.
  
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,7 +26,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  #include "longlong.h"
  
  mp_limb
-__mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
       register mp_ptr res_ptr;
       register mp_srcptr s1_ptr;
       mp_size_t s1_size;
diff --git a/sysdeps/i386/gmp-mparam.h b/sysdeps/i386/gmp-mparam.h

index 687f12a..ddc308a 100644 (file)
--- a/sysdeps/i386/gmp-mparam.h
+++ b/sysdeps/i386/gmp-mparam.h
@@ -1,6 +1,6 @@
  /* gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
diff --git a/sysdeps/i386/i586/add_n.S b/sysdeps/i386/i586/add_n.S

index f52f9c6..f214c8c 100644 (file)
--- a/sysdeps/i386/i586/add_n.S
+++ b/sysdeps/i386/i586/add_n.S
@@ -1,7 +1,7 @@
  /* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
     sum in a third limb vector.
  
-Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -30,13 +30,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  #include "sysdep.h"
  #include "asm-syntax.h"
  
-#define t1     %eax
-#define t2     %edx
-#define src1   %esi
-#define src2   %ebp
-#define dst    %edi
-#define x      %ebx
-
  .text
         ALIGN (3)
         .globl C_SYMBOL_NAME(__mpn_add_n)
@@ -46,85 +39,85 @@ C_SYMBOL_NAME(__mpn_add_n:)
         pushl   %ebx
         pushl   %ebp
  
-       movl    20(%esp),dst            /* res_ptr */
-       movl    24(%esp),src1           /* s1_ptr */
-       movl    28(%esp),src2           /* s2_ptr */
+       movl    20(%esp),%edi           /* res_ptr */
+       movl    24(%esp),%esi           /* s1_ptr */
+       movl    28(%esp),%ebp           /* s2_ptr */
         movl    32(%esp),%ecx           /* size */
  
-       movl    (src2),x
+       movl    (%ebp),%ebx
  
         decl    %ecx
-       movl    %ecx,t2
+       movl    %ecx,%edx
         shrl    $3,%ecx
-       andl    $7,t2
+       andl    $7,%edx
         testl   %ecx,%ecx               /* zero carry flag */
         jz      Lend
-       pushl   t2
+       pushl   %edx
  
         ALIGN (3)
-Loop:  movl    28(dst),%eax            /* fetch destination cache line */
-       leal    32(dst),dst
-
-L1:    movl    (src1),t1
-       movl    4(src1),t2
-       adcl    x,t1
-       movl    4(src2),x
-       adcl    x,t2
-       movl    8(src2),x
-       movl    t1,-32(dst)
-       movl    t2,-28(dst)
-
-L2:    movl    8(src1),t1
-       movl    12(src1),t2
-       adcl    x,t1
-       movl    12(src2),x
-       adcl    x,t2
-       movl    16(src2),x
-       movl    t1,-24(dst)
-       movl    t2,-20(dst)
-
-L3:    movl    16(src1),t1
-       movl    20(src1),t2
-       adcl    x,t1
-       movl    20(src2),x
-       adcl    x,t2
-       movl    24(src2),x
-       movl    t1,-16(dst)
-       movl    t2,-12(dst)
-
-L4:    movl    24(src1),t1
-       movl    28(src1),t2
-       adcl    x,t1
-       movl    28(src2),x
-       adcl    x,t2
-       movl    32(src2),x
-       movl    t1,-8(dst)
-       movl    t2,-4(dst)
-
-       leal    32(src1),src1
-       leal    32(src2),src2
+Loop:  movl    28(%edi),%eax           /* fetch destination cache line */
+       leal    32(%edi),%edi
+
+L1:    movl    (%esi),%eax
+       movl    4(%esi),%edx
+       adcl    %ebx,%eax
+       movl    4(%ebp),%ebx
+       adcl    %ebx,%edx
+       movl    8(%ebp),%ebx
+       movl    %eax,-32(%edi)
+       movl    %edx,-28(%edi)
+
+L2:    movl    8(%esi),%eax
+       movl    12(%esi),%edx
+       adcl    %ebx,%eax
+       movl    12(%ebp),%ebx
+       adcl    %ebx,%edx
+       movl    16(%ebp),%ebx
+       movl    %eax,-24(%edi)
+       movl    %edx,-20(%edi)
+
+L3:    movl    16(%esi),%eax
+       movl    20(%esi),%edx
+       adcl    %ebx,%eax
+       movl    20(%ebp),%ebx
+       adcl    %ebx,%edx
+       movl    24(%ebp),%ebx
+       movl    %eax,-16(%edi)
+       movl    %edx,-12(%edi)
+
+L4:    movl    24(%esi),%eax
+       movl    28(%esi),%edx
+       adcl    %ebx,%eax
+       movl    28(%ebp),%ebx
+       adcl    %ebx,%edx
+       movl    32(%ebp),%ebx
+       movl    %eax,-8(%edi)
+       movl    %edx,-4(%edi)
+
+       leal    32(%esi),%esi
+       leal    32(%ebp),%ebp
         decl    %ecx
         jnz     Loop
  
-       popl    t2
+       popl    %edx
  Lend:
-       decl    t2                      /* test t2 w/o clobbering carry */
+       decl    %edx                    /* test %edx w/o clobbering carry */
         js      Lend2
-       incl    t2
+       incl    %edx
  Loop2:
-       leal    4(dst),dst
-       movl    (src1),t1
-       adcl    x,t1
-       movl    4(src2),x
-       movl    t1,-4(dst)
-       leal    4(src1),src1
-       leal    4(src2),src2
-       decl    t2
+       leal    4(%edi),%edi
+       movl    (%esi),%eax
+       adcl    %ebx,%eax
+       movl    4(%ebp),%ebx
+       movl    %eax,-4(%edi)
+       leal    4(%esi),%esi
+       leal    4(%ebp),%ebp
+       decl    %edx
         jnz     Loop2
  Lend2:
-       movl    (src1),t1
-       adcl    x,t1
-       movl    t1,(dst)
+       movl    (%esi),%eax
+       adcl    %ebx,%eax
+       movl    %eax,(%edi)
  
         sbbl    %eax,%eax
         negl    %eax
diff --git a/sysdeps/i386/i586/addmul_1.S b/sysdeps/i386/i586/addmul_1.S

index b222840..5bf2603 100644 (file)
--- a/sysdeps/i386/i586/addmul_1.S
+++ b/sysdeps/i386/i586/addmul_1.S
@@ -1,7 +1,7 @@
  /* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
     the result to a second limb vector.
  
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -32,12 +32,12 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  
  #define res_ptr edi
  #define s1_ptr esi
+#define size ecx
  #define s2_limb ebp
  
         TEXT
         ALIGN (3)
         GLOBL   C_SYMBOL_NAME(__mpn_addmul_1)
-       .type   C_SYMBOL_NAME(__mpn_addmul_1),@function
  C_SYMBOL_NAME(__mpn_addmul_1:)
  
         INSN1(push,l    ,R(edi))
@@ -47,38 +47,36 @@ C_SYMBOL_NAME(__mpn_addmul_1:)
  
         INSN2(mov,l     ,R(res_ptr),MEM_DISP(esp,20))
         INSN2(mov,l     ,R(s1_ptr),MEM_DISP(esp,24))
-       INSN2(mov,l     ,R(ecx),MEM_DISP(esp,28))
+       INSN2(mov,l     ,R(size),MEM_DISP(esp,28))
         INSN2(mov,l     ,R(s2_limb),MEM_DISP(esp,32))
  
-       INSN2(lea,l     ,R(res_ptr),MEM_INDEX(res_ptr,ecx,4))
-       INSN2(lea,l     ,R(s1_ptr),MEM_INDEX(s1_ptr,ecx,4))
-       INSN1(neg,l     ,R(ecx))
-       INSN2(xor,l     ,R(edx),R(edx))
+       INSN2(lea,l     ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+       INSN2(lea,l     ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+       INSN1(neg,l     ,R(size))
+       INSN2(xor,l     ,R(ebx),R(ebx))
         ALIGN (3)
-Loop:
-       INSN2(mov,l     ,R(ebx),R(edx))
-       INSN2(mov,l     ,R(eax),MEM_INDEX(s1_ptr,ecx,4))
+
+Loop:  INSN2(adc,l     ,R(ebx),$0)
+       INSN2(mov,l     ,R(eax),MEM_INDEX(s1_ptr,size,4))
  
         INSN1(mul,l     ,R(s2_limb))
  
         INSN2(add,l     ,R(eax),R(ebx))
-       INSN2(mov,l     ,R(ebx),MEM_INDEX(res_ptr,ecx,4))
+       INSN2(mov,l     ,R(ebx),MEM_INDEX(res_ptr,size,4))
  
         INSN2(adc,l     ,R(edx),$0)
         INSN2(add,l     ,R(ebx),R(eax))
  
-       INSN2(adc,l     ,R(edx),$0)
-       INSN2(mov,l     ,MEM_INDEX(res_ptr,ecx,4),R(ebx))
+       INSN2(mov,l     ,MEM_INDEX(res_ptr,size,4),R(ebx))
+       INSN1(inc,l     ,R(size))
  
-       INSN1(inc,l     ,R(ecx))
+       INSN2(mov,l     ,R(ebx),R(edx))
         INSN1(jnz,      ,Loop)
  
-
-       INSN2(mov,l     ,R(eax),R(edx))
+       INSN2(adc,l     ,R(ebx),$0)
+       INSN2(mov,l     ,R(eax),R(ebx))
         INSN1(pop,l     ,R(ebp))
         INSN1(pop,l     ,R(ebx))
         INSN1(pop,l     ,R(esi))
         INSN1(pop,l     ,R(edi))
         ret
-Lfe1:
-       .size   C_SYMBOL_NAME(__mpn_addmul_1),Lfe1-C_SYMBOL_NAME(__mpn_addmul_1)
diff --git a/sysdeps/i386/i586/mul_1.S b/sysdeps/i386/i586/mul_1.S

index 2b7258e..048c060 100644 (file)
--- a/sysdeps/i386/i586/mul_1.S
+++ b/sysdeps/i386/i586/mul_1.S
@@ -1,7 +1,7 @@
  /* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
     the result in a second limb vector.
  
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -53,24 +53,24 @@ C_SYMBOL_NAME(__mpn_mul_1:)
         INSN2(lea,l     ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
         INSN2(lea,l     ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
         INSN1(neg,l     ,R(size))
-       INSN2(xor,l     ,R(edx),R(edx))
+       INSN2(xor,l     ,R(ebx),R(ebx))
         ALIGN (3)
-Loop:
-       INSN2(mov,l     ,R(ebx),R(edx))
+
+Loop:  INSN2(adc,l     ,R(ebx),$0)
         INSN2(mov,l     ,R(eax),MEM_INDEX(s1_ptr,size,4))
  
         INSN1(mul,l     ,R(s2_limb))
  
-       INSN2(add,l     ,R(eax),R(ebx))
-
-       INSN2(adc,l     ,R(edx),$0)
-       INSN2(mov,l     ,MEM_INDEX(res_ptr,size,4),R(eax))
+       INSN2(add,l     ,R(ebx),R(eax))
  
+       INSN2(mov,l     ,MEM_INDEX(res_ptr,size,4),R(ebx))
         INSN1(inc,l     ,R(size))
-       INSN1(jnz,      ,Loop)
  
+       INSN2(mov,l     ,R(ebx),R(edx))
+       INSN1(jnz,      ,Loop)
  
-       INSN2(mov,l     ,R(eax),R(edx))
+       INSN2(adc,l     ,R(ebx),$0)
+       INSN2(mov,l     ,R(eax),R(ebx))
         INSN1(pop,l     ,R(ebp))
         INSN1(pop,l     ,R(ebx))
         INSN1(pop,l     ,R(esi))
diff --git a/sysdeps/i386/i586/sub_n.S b/sysdeps/i386/i586/sub_n.S

index 9c964a8..cd158a5 100644 (file)
--- a/sysdeps/i386/i586/sub_n.S
+++ b/sysdeps/i386/i586/sub_n.S
@@ -1,7 +1,7 @@
  /* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
     and store difference in a third limb vector.
  
-Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -30,13 +30,6 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  #include "sysdep.h"
  #include "asm-syntax.h"
  
-#define t1     %eax
-#define t2     %edx
-#define src1   %esi
-#define src2   %ebp
-#define dst    %edi
-#define x      %ebx
-
  .text
         ALIGN (3)
         .globl C_SYMBOL_NAME(__mpn_sub_n)
@@ -46,85 +39,85 @@ C_SYMBOL_NAME(__mpn_sub_n:)
         pushl   %ebx
         pushl   %ebp
  
-       movl    20(%esp),dst            /* res_ptr */
-       movl    24(%esp),src1           /* s1_ptr */
-       movl    28(%esp),src2           /* s2_ptr */
+       movl    20(%esp),%edi           /* res_ptr */
+       movl    24(%esp),%esi           /* s1_ptr */
+       movl    28(%esp),%ebp           /* s2_ptr */
         movl    32(%esp),%ecx           /* size */
  
-       movl    (src2),x
+       movl    (%ebp),%ebx
  
         decl    %ecx
-       movl    %ecx,t2
+       movl    %ecx,%edx
         shrl    $3,%ecx
-       andl    $7,t2
+       andl    $7,%edx
         testl   %ecx,%ecx               /* zero carry flag */
         jz      Lend
-       pushl   t2
+       pushl   %edx
  
         ALIGN (3)
-Loop:  movl    28(dst),%eax            /* fetch destination cache line */
-       leal    32(dst),dst
-
-L1:    movl    (src1),t1
-       movl    4(src1),t2
-       sbbl    x,t1
-       movl    4(src2),x
-       sbbl    x,t2
-       movl    8(src2),x
-       movl    t1,-32(dst)
-       movl    t2,-28(dst)
-
-L2:    movl    8(src1),t1
-       movl    12(src1),t2
-       sbbl    x,t1
-       movl    12(src2),x
-       sbbl    x,t2
-       movl    16(src2),x
-       movl    t1,-24(dst)
-       movl    t2,-20(dst)
-
-L3:    movl    16(src1),t1
-       movl    20(src1),t2
-       sbbl    x,t1
-       movl    20(src2),x
-       sbbl    x,t2
-       movl    24(src2),x
-       movl    t1,-16(dst)
-       movl    t2,-12(dst)
-
-L4:    movl    24(src1),t1
-       movl    28(src1),t2
-       sbbl    x,t1
-       movl    28(src2),x
-       sbbl    x,t2
-       movl    32(src2),x
-       movl    t1,-8(dst)
-       movl    t2,-4(dst)
-
-       leal    32(src1),src1
-       leal    32(src2),src2
+Loop:  movl    28(%edi),%eax           /* fetch destination cache line */
+       leal    32(%edi),%edi
+
+L1:    movl    (%esi),%eax
+       movl    4(%esi),%edx
+       sbbl    %ebx,%eax
+       movl    4(%ebp),%ebx
+       sbbl    %ebx,%edx
+       movl    8(%ebp),%ebx
+       movl    %eax,-32(%edi)
+       movl    %edx,-28(%edi)
+
+L2:    movl    8(%esi),%eax
+       movl    12(%esi),%edx
+       sbbl    %ebx,%eax
+       movl    12(%ebp),%ebx
+       sbbl    %ebx,%edx
+       movl    16(%ebp),%ebx
+       movl    %eax,-24(%edi)
+       movl    %edx,-20(%edi)
+
+L3:    movl    16(%esi),%eax
+       movl    20(%esi),%edx
+       sbbl    %ebx,%eax
+       movl    20(%ebp),%ebx
+       sbbl    %ebx,%edx
+       movl    24(%ebp),%ebx
+       movl    %eax,-16(%edi)
+       movl    %edx,-12(%edi)
+
+L4:    movl    24(%esi),%eax
+       movl    28(%esi),%edx
+       sbbl    %ebx,%eax
+       movl    28(%ebp),%ebx
+       sbbl    %ebx,%edx
+       movl    32(%ebp),%ebx
+       movl    %eax,-8(%edi)
+       movl    %edx,-4(%edi)
+
+       leal    32(%esi),%esi
+       leal    32(%ebp),%ebp
         decl    %ecx
         jnz     Loop
  
-       popl    t2
+       popl    %edx
  Lend:
-       decl    t2                      /* test t2 w/o clobbering carry */
+       decl    %edx                    /* test %edx w/o clobbering carry */
         js      Lend2
-       incl    t2
+       incl    %edx
  Loop2:
-       leal    4(dst),dst
-       movl    (src1),t1
-       sbbl    x,t1
-       movl    4(src2),x
-       movl    t1,-4(dst)
-       leal    4(src1),src1
-       leal    4(src2),src2
-       decl    t2
+       leal    4(%edi),%edi
+       movl    (%esi),%eax
+       sbbl    %ebx,%eax
+       movl    4(%ebp),%ebx
+       movl    %eax,-4(%edi)
+       leal    4(%esi),%esi
+       leal    4(%ebp),%ebp
+       decl    %edx
         jnz     Loop2
  Lend2:
-       movl    (src1),t1
-       sbbl    x,t1
-       movl    t1,(dst)
+       movl    (%esi),%eax
+       sbbl    %ebx,%eax
+       movl    %eax,(%edi)
  
         sbbl    %eax,%eax
         negl    %eax
diff --git a/sysdeps/i386/i586/submul_1.S b/sysdeps/i386/i586/submul_1.S

index 14bfe54..440f64f 100644 (file)
--- a/sysdeps/i386/i586/submul_1.S
+++ b/sysdeps/i386/i586/submul_1.S
@@ -1,7 +1,7 @@
  /* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
     the result from a second limb vector.
  
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -53,10 +53,10 @@ C_SYMBOL_NAME(__mpn_submul_1:)
         INSN2(lea,l     ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
         INSN2(lea,l     ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
         INSN1(neg,l     ,R(size))
-       INSN2(xor,l     ,R(edx),R(edx))
+       INSN2(xor,l     ,R(ebx),R(ebx))
         ALIGN (3)
-Loop:
-       INSN2(mov,l     ,R(ebx),R(edx))
+
+Loop:  INSN2(adc,l     ,R(ebx),$0)
         INSN2(mov,l     ,R(eax),MEM_INDEX(s1_ptr,size,4))
  
         INSN1(mul,l     ,R(s2_limb))
@@ -67,14 +67,14 @@ Loop:
         INSN2(adc,l     ,R(edx),$0)
         INSN2(sub,l     ,R(ebx),R(eax))
  
-       INSN2(adc,l     ,R(edx),$0)
         INSN2(mov,l     ,MEM_INDEX(res_ptr,size,4),R(ebx))
-
         INSN1(inc,l     ,R(size))
-       INSN1(jnz,      ,Loop)
  
+       INSN2(mov,l     ,R(ebx),R(edx))
+       INSN1(jnz,      ,Loop)
  
-       INSN2(mov,l     ,R(eax),R(edx))
+       INSN2(adc,l     ,R(ebx),$0)
+       INSN2(mov,l     ,R(eax),R(ebx))
         INSN1(pop,l     ,R(ebp))
         INSN1(pop,l     ,R(ebx))
         INSN1(pop,l     ,R(esi))
diff --git a/sysdeps/m68k/add_n.S b/sysdeps/m68k/add_n.S

index ea7a445..754af9f 100644 (file)
--- a/sysdeps/m68k/add_n.S
+++ b/sysdeps/m68k/add_n.S
@@ -1,7 +1,7 @@
  /* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
     sum in a third limb vector.
  
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
    size         (sp + 12)
  */
  
+#include "sysdep.h"
  #include "asm-syntax.h"
  
         TEXT
         ALIGN
-       GLOBL   ___mpn_add_n
+       GLOBL   C_SYMBOL_NAME(__mpn_add_n)
  
-LAB(___mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n:)
+PROLOG(__mpn_add_n)
  /* Save used registers on the stack.  */
-       INSN2(move,l    ,MEM_PREDEC(sp),d2)
-       INSN2(move,l    ,MEM_PREDEC(sp),a2)
+       movel   R(d2),MEM_PREDEC(sp)
+       movel   R(a2),MEM_PREDEC(sp)
  
  /* Copy the arguments to registers.  Better use movem?  */
-       INSN2(move,l    ,a2,MEM_DISP(sp,12))
-       INSN2(move,l    ,a0,MEM_DISP(sp,16))
-       INSN2(move,l    ,a1,MEM_DISP(sp,20))
-       INSN2(move,l    ,d2,MEM_DISP(sp,24))
-
-       INSN2(eor,w     ,d2,#1)
-       INSN2(lsr,l     ,d2,#1)
-       bcc L1
-       INSN2(subq,l    ,d2,#1)         /* clears cy as side effect */
-
-LAB(Loop)
-       INSN2(move,l    ,d0,MEM_POSTINC(a0))
-       INSN2(move,l    ,d1,MEM_POSTINC(a1))
-       INSN2(addx,l    ,d0,d1)
-       INSN2(move,l    ,MEM_POSTINC(a2),d0)
-LAB(L1)        INSN2(move,l    ,d0,MEM_POSTINC(a0))
-       INSN2(move,l    ,d1,MEM_POSTINC(a1))
-       INSN2(addx,l    ,d0,d1)
-       INSN2(move,l    ,MEM_POSTINC(a2),d0)
-
-       dbf d2,Loop                     /* loop until 16 lsb of %4 == -1 */
-       INSN2(subx,l    ,d0,d0)         /* d0 <= -cy; save cy as 0 or -1 in d0 */
-       INSN2(sub,l     ,d2,#0x10000)
-       bcs L2
-       INSN2(add,l     ,d0,d0)         /* restore cy */
-       bra Loop
-
-LAB(L2)
-       INSN1(neg,l     ,d0)
+       movel   MEM_DISP(sp,12),R(a2)
+       movel   MEM_DISP(sp,16),R(a0)
+       movel   MEM_DISP(sp,20),R(a1)
+       movel   MEM_DISP(sp,24),R(d2)
+
+       eorw    #1,R(d2)
+       lsrl    #1,R(d2)
+       bcc     L(L1)
+       subql   #1,R(d2)        /* clears cy as side effect */
+
+L(Loop:)
+       movel   MEM_POSTINC(a0),R(d0)
+       movel   MEM_POSTINC(a1),R(d1)
+       addxl   R(d1),R(d0)
+       movel   R(d0),MEM_POSTINC(a2)
+L(L1:) movel   MEM_POSTINC(a0),R(d0)
+       movel   MEM_POSTINC(a1),R(d1)
+       addxl   R(d1),R(d0)
+       movel   R(d0),MEM_POSTINC(a2)
+
+       dbf     R(d2),L(Loop)           /* loop until 16 lsb of %4 == -1 */
+       subxl   R(d0),R(d0)     /* d0 <= -cy; save cy as 0 or -1 in d0 */
+       subl    #0x10000,R(d2)
+       bcs     L(L2)
+       addl    R(d0),R(d0)     /* restore cy */
+       bra     L(Loop)
+
+L(L2:)
+       negl    R(d0)
  
  /* Restore used registers from stack frame.  */
-       INSN2(move,l    ,a2,MEM_POSTINC(sp))
-       INSN2(move,l    ,d2,MEM_POSTINC(sp))
+       movel   MEM_POSTINC(sp),R(a2)
+       movel   MEM_POSTINC(sp),R(d2)
  
         rts
+EPILOG(__mpn_add_n)
diff --git a/sysdeps/m68k/lshift.S b/sysdeps/m68k/lshift.S

new file mode 100644 (file)

index 0000000..c58594a
--- /dev/null
+++ b/sysdeps/m68k/lshift.S
@@ -0,0 +1,150 @@
+/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr      (sp + 4)
+  s_ptr                (sp + 8)
+  s_size       (sp + 16)
+  cnt          (sp + 12)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+       TEXT
+       ALIGN
+       GLOBL   C_SYMBOL_NAME(__mpn_lshift)
+
+C_SYMBOL_NAME(__mpn_lshift:)
+PROLOG(__mpn_lshift)
+
+/* Save used registers on the stack.  */
+       moveml  R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  */
+       movel   MEM_DISP(sp,28),R(res_ptr)
+       movel   MEM_DISP(sp,32),R(s_ptr)
+       movel   MEM_DISP(sp,36),R(s_size)
+       movel   MEM_DISP(sp,40),R(cnt)
+
+       moveql  #1,R(d5)
+       cmpl    R(d5),R(cnt)
+       bne     L(Lnormal)
+       cmpl    R(s_ptr),R(res_ptr)
+       bls     L(Lspecial)             /* jump if s_ptr >= res_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+       lea     MEM_INDX1(s_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+       movel   R(s_size),R(d0)
+       asll    #2,R(d0)
+       lea     MEM_INDX(s_ptr,d0,l),R(a2)
+#endif
+       cmpl    R(res_ptr),R(a2)
+       bls     L(Lspecial)             /* jump if res_ptr >= s_ptr + s_size */
+
+L(Lnormal:)
+       moveql  #32,R(d5)
+       subl    R(cnt),R(d5)
+
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+       lea     MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+       lea     MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+       movel   R(s_size),R(d0)
+       asll    #2,R(d0)
+       addl    R(s_size),R(s_ptr)
+       addl    R(s_size),R(res_ptr)
+#endif
+       movel   MEM_PREDEC(s_ptr),R(d2)
+       movel   R(d2),R(d0)
+       lsrl    R(d5),R(d0)             /* compute carry limb */
+
+       lsll    R(cnt),R(d2)
+       movel   R(d2),R(d1)
+       subql   #1,R(s_size)
+       beq     L(Lend)
+       lsrl    #1,R(s_size)
+       bcs     L(L1)
+       subql   #1,R(s_size)
+
+L(Loop:)
+       movel   MEM_PREDEC(s_ptr),R(d2)
+       movel   R(d2),R(d3)
+       lsrl    R(d5),R(d3)
+       orl     R(d3),R(d1)
+       movel   R(d1),MEM_PREDEC(res_ptr)
+       lsll    R(cnt),R(d2)
+L(L1:)
+       movel   MEM_PREDEC(s_ptr),R(d1)
+       movel   R(d1),R(d3)
+       lsrl    R(d5),R(d3)
+       orl     R(d3),R(d2)
+       movel   R(d2),MEM_PREDEC(res_ptr)
+       lsll    R(cnt),R(d1)
+
+       dbf     R(s_size),L(Loop)
+       subl    #0x10000,R(s_size)
+       bcc     L(Loop)
+
+L(Lend:)
+       movel   R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
+
+/* Restore used registers from stack frame.  */
+       moveml  MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+       rts
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.  */
+
+L(Lspecial:)
+       clrl    R(d0)                   /* initialize carry */
+       eorw    #1,R(s_size)
+       lsrl    #1,R(s_size)
+       bcc     L(LL1)
+       subql   #1,R(s_size)
+
+L(LLoop:)
+       movel   MEM_POSTINC(s_ptr),R(d2)
+       addxl   R(d2),R(d2)
+       movel   R(d2),MEM_POSTINC(res_ptr)
+L(LL1:)
+       movel   MEM_POSTINC(s_ptr),R(d2)
+       addxl   R(d2),R(d2)
+       movel   R(d2),MEM_POSTINC(res_ptr)
+
+       dbf     R(s_size),L(LLoop)
+       addxl   R(d0),R(d0)             /* save cy in lsb */
+       subl    #0x10000,R(s_size)
+       bcs     L(LLend)
+       lsrl    #1,R(d0)                /* restore cy */
+       bra     L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame.  */
+       moveml  MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+       rts
+EPILOG(__mpn_lshift)
diff --git a/sysdeps/m68k/m68020/addmul_1.S b/sysdeps/m68k/m68020/addmul_1.S

index 3f244c4..169f113 100644 (file)
--- a/sysdeps/m68k/m68020/addmul_1.S
+++ b/sysdeps/m68k/m68020/addmul_1.S
@@ -1,7 +1,7 @@
  /* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
     the result to a second limb vector.
  
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
    INPUT PARAMETERS
    res_ptr      (sp + 4)
    s1_ptr       (sp + 8)
-  size         (sp + 12)
+  s1_size      (sp + 12)
    s2_limb      (sp + 16)
  */
  
+#include "sysdep.h"
  #include "asm-syntax.h"
  
         TEXT
         ALIGN
-       GLOBL   ___mpn_addmul_1
+       GLOBL   C_SYMBOL_NAME(__mpn_addmul_1)
  
-LAB(___mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1:)
+PROLOG(__mpn_addmul_1)
  
  #define res_ptr a0
  #define s1_ptr a1
-#define size d2
+#define s1_size d2
  #define s2_limb d4
  
  /* Save used registers on the stack.  */
-       INSN2(movem,l   ,MEM_PREDEC(sp),d2-d5)
+       moveml  R(d2)-R(d5),MEM_PREDEC(sp)
  
  /* Copy the arguments to registers.  Better use movem?  */
-       INSN2(move,l    ,res_ptr,MEM_DISP(sp,20))
-       INSN2(move,l    ,s1_ptr,MEM_DISP(sp,24))
-       INSN2(move,l    ,size,MEM_DISP(sp,28))
-       INSN2(move,l    ,s2_limb,MEM_DISP(sp,32))
-
-       INSN2(eor,w     ,size,#1)
-       INSN1(clr,l     ,d1)
-       INSN1(clr,l     ,d5)
-       INSN2(lsr,l     ,size,#1)
-       bcc     L1
-       INSN2(subq,l    ,size,#1)
-       INSN2(sub,l     ,d0,d0)         /* (d0,cy) <= (0,0) */
-
-LAB(Loop)
-       INSN2(move,l    ,d3,MEM_POSTINC(s1_ptr))
-       INSN2(mulu,l    ,d1:d3,s2_limb)
-       INSN2(addx,l    ,d3,d0)
-       INSN2(addx,l    ,d1,d5)
-       INSN2(add,l     ,MEM_POSTINC(res_ptr),d3)
-LAB(L1)        INSN2(move,l    ,d3,MEM_POSTINC(s1_ptr))
-       INSN2(mulu,l    ,d0:d3,s2_limb)
-       INSN2(addx,l    ,d3,d1)
-       INSN2(addx,l    ,d0,d5)
-       INSN2(add,l     ,MEM_POSTINC(res_ptr),d3)
-
-       dbf     size,Loop
-       INSN2(addx,l    ,d0,d5)
-       INSN2(sub,l     ,size,#0x10000)
-       bcc     Loop
+       movel   MEM_DISP(sp,20),R(res_ptr)
+       movel   MEM_DISP(sp,24),R(s1_ptr)
+       movel   MEM_DISP(sp,28),R(s1_size)
+       movel   MEM_DISP(sp,32),R(s2_limb)
+
+       eorw    #1,R(s1_size)
+       clrl    R(d1)
+       clrl    R(d5)
+       lsrl    #1,R(s1_size)
+       bcc     L(L1)
+       subql   #1,R(s1_size)
+       subl    R(d0),R(d0)             /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+       movel   MEM_POSTINC(s1_ptr),R(d3)
+       mulul   R(s2_limb),R(d1):R(d3)
+       addxl   R(d0),R(d3)
+       addxl   R(d5),R(d1)
+       addl    R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel   MEM_POSTINC(s1_ptr),R(d3)
+       mulul   R(s2_limb),R(d0):R(d3)
+       addxl   R(d1),R(d3)
+       addxl   R(d5),R(d0)
+       addl    R(d3),MEM_POSTINC(res_ptr)
+
+       dbf     R(s1_size),L(Loop)
+       addxl   R(d5),R(d0)
+       subl    #0x10000,R(s1_size)
+       bcc     L(Loop)
  
  /* Restore used registers from stack frame.  */
-       INSN2(movem,l   ,d2-d5,MEM_POSTINC(sp))
+       moveml  MEM_POSTINC(sp),R(d2)-R(d5)
  
         rts
+EPILOG(__mpn_addmul_1)
diff --git a/sysdeps/m68k/m68020/mul_1.S b/sysdeps/m68k/m68020/mul_1.S

index 548ca00..4db1cca 100644 (file)
--- a/sysdeps/m68k/m68020/mul_1.S
+++ b/sysdeps/m68k/m68020/mul_1.S
@@ -1,7 +1,7 @@
  /* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store
     the result in a second limb vector.
  
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,65 +23,68 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
    INPUT PARAMETERS
    res_ptr      (sp + 4)
    s1_ptr       (sp + 8)
-  size         (sp + 12)
+  s1_size      (sp + 12)
    s2_limb      (sp + 16)
  */
  
+#include "sysdep.h"
  #include "asm-syntax.h"
  
         TEXT
         ALIGN
-       GLOBL   ___mpn_mul_1
+       GLOBL   C_SYMBOL_NAME(__mpn_mul_1)
  
-LAB(___mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1:)
+PROLOG(__mpn_mul_1)
  
  #define res_ptr a0
  #define s1_ptr a1
-#define size d2
+#define s1_size d2
  #define s2_limb d4
  
  /* Save used registers on the stack.  */
-       INSN2(movem,l   ,MEM_PREDEC(sp),d2-d4)
+       moveml  R(d2)-R(d4),MEM_PREDEC(sp)
  #if 0
-       INSN2(move,l    ,MEM_PREDEC(sp),d2)
-       INSN2(move,l    ,MEM_PREDEC(sp),d3)
-       INSN2(move,l    ,MEM_PREDEC(sp),d4)
+       movel   R(d2),MEM_PREDEC(sp)
+       movel   R(d3),MEM_PREDEC(sp)
+       movel   R(d4),MEM_PREDEC(sp)
  #endif
  
  /* Copy the arguments to registers.  Better use movem?  */
-       INSN2(move,l    ,res_ptr,MEM_DISP(sp,16))
-       INSN2(move,l    ,s1_ptr,MEM_DISP(sp,20))
-       INSN2(move,l    ,size,MEM_DISP(sp,24))
-       INSN2(move,l    ,s2_limb,MEM_DISP(sp,28))
-
-       INSN2(eor,w     ,size,#1)
-       INSN1(clr,l     ,d1)
-       INSN2(lsr,l     ,size,#1)
-       bcc     L1
-       INSN2(subq,l    ,size,#1)
-       INSN2(sub,l     ,d0,d0)         /* (d0,cy) <= (0,0) */
-
-LAB(Loop)
-       INSN2(move,l    ,d3,MEM_POSTINC(s1_ptr))
-       INSN2(mulu,l    ,d1:d3,s2_limb)
-       INSN2(addx,l    ,d3,d0)
-       INSN2(move,l    ,MEM_POSTINC(res_ptr),d3)
-LAB(L1)        INSN2(move,l    ,d3,MEM_POSTINC(s1_ptr))
-       INSN2(mulu,l    ,d0:d3,s2_limb)
-       INSN2(addx,l    ,d3,d1)
-       INSN2(move,l    ,MEM_POSTINC(res_ptr),d3)
-
-       dbf     size,Loop
-       INSN1(clr,l     ,d3)
-       INSN2(addx,l    ,d0,d3)
-       INSN2(sub,l     ,size,#0x10000)
-       bcc     Loop
+       movel   MEM_DISP(sp,16),R(res_ptr)
+       movel   MEM_DISP(sp,20),R(s1_ptr)
+       movel   MEM_DISP(sp,24),R(s1_size)
+       movel   MEM_DISP(sp,28),R(s2_limb)
+
+       eorw    #1,R(s1_size)
+       clrl    R(d1)
+       lsrl    #1,R(s1_size)
+       bcc     L(L1)
+       subql   #1,R(s1_size)
+       subl    R(d0),R(d0)     /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+       movel   MEM_POSTINC(s1_ptr),R(d3)
+       mulul   R(s2_limb),R(d1):R(d3)
+       addxl   R(d0),R(d3)
+       movel   R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel   MEM_POSTINC(s1_ptr),R(d3)
+       mulul   R(s2_limb),R(d0):R(d3)
+       addxl   R(d1),R(d3)
+       movel   R(d3),MEM_POSTINC(res_ptr)
+
+       dbf     R(s1_size),L(Loop)
+       clrl    R(d3)
+       addxl   R(d3),R(d0)
+       subl    #0x10000,R(s1_size)
+       bcc     L(Loop)
  
  /* Restore used registers from stack frame.  */
-       INSN2(movem,l   ,d2-d4,MEM_POSTINC(sp))
+       moveml  MEM_POSTINC(sp),R(d2)-R(d4)
  #if 0
-       INSN2(move,l    ,d4,MEM_POSTINC(sp))
-       INSN2(move,l    ,d3,MEM_POSTINC(sp))
-       INSN2(move,l    ,d2,MEM_POSTINC(sp))
+       movel   MEM_POSTINC(sp),R(d4)
+       movel   MEM_POSTINC(sp),R(d3)
+       movel   MEM_POSTINC(sp),R(d2)
  #endif
         rts
+EPILOG(__mpn_mul_1)
diff --git a/sysdeps/m68k/m68020/submul_1.S b/sysdeps/m68k/m68020/submul_1.S

index ef7f39d..cf30029 100644 (file)
--- a/sysdeps/m68k/m68020/submul_1.S
+++ b/sysdeps/m68k/m68020/submul_1.S
@@ -1,7 +1,7 @@
  /* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
     the result from a second limb vector.
  
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
    INPUT PARAMETERS
    res_ptr      (sp + 4)
    s1_ptr       (sp + 8)
-  size         (sp + 12)
+  s1_size      (sp + 12)
    s2_limb      (sp + 16)
  */
  
+#include "sysdep.h"
  #include "asm-syntax.h"
  
         TEXT
         ALIGN
-       GLOBL   ___mpn_submul_1
+       GLOBL   C_SYMBOL_NAME(__mpn_submul_1)
  
-LAB(___mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1:)
+PROLOG(__mpn_submul_1)
  
  #define res_ptr a0
  #define s1_ptr a1
-#define size d2
+#define s1_size d2
  #define s2_limb d4
  
  /* Save used registers on the stack.  */
-       INSN2(movem,l   ,MEM_PREDEC(sp),d2-d5)
+       moveml  R(d2)-R(d5),MEM_PREDEC(sp)
  
  /* Copy the arguments to registers.  Better use movem?  */
-       INSN2(move,l    ,res_ptr,MEM_DISP(sp,20))
-       INSN2(move,l    ,s1_ptr,MEM_DISP(sp,24))
-       INSN2(move,l    ,size,MEM_DISP(sp,28))
-       INSN2(move,l    ,s2_limb,MEM_DISP(sp,32))
-
-       INSN2(eor,w     ,size,#1)
-       INSN1(clr,l     ,d1)
-       INSN1(clr,l     ,d5)
-       INSN2(lsr,l     ,size,#1)
-       bcc     L1
-       INSN2(subq,l    ,size,#1)
-       INSN2(sub,l     ,d0,d0)         /* (d0,cy) <= (0,0) */
-
-LAB(Loop)
-       INSN2(move,l    ,d3,MEM_POSTINC(s1_ptr))
-       INSN2(mulu,l    ,d1:d3,s2_limb)
-       INSN2(addx,l    ,d3,d0)
-       INSN2(addx,l    ,d1,d5)
-       INSN2(sub,l     ,MEM_POSTINC(res_ptr),d3)
-LAB(L1)        INSN2(move,l    ,d3,MEM_POSTINC(s1_ptr))
-       INSN2(mulu,l    ,d0:d3,s2_limb)
-       INSN2(addx,l    ,d3,d1)
-       INSN2(addx,l    ,d0,d5)
-       INSN2(sub,l     ,MEM_POSTINC(res_ptr),d3)
-
-       dbf     size,Loop
-       INSN2(addx,l    ,d0,d5)
-       INSN2(sub,l     ,size,#0x10000)
-       bcc     Loop
+       movel   MEM_DISP(sp,20),R(res_ptr)
+       movel   MEM_DISP(sp,24),R(s1_ptr)
+       movel   MEM_DISP(sp,28),R(s1_size)
+       movel   MEM_DISP(sp,32),R(s2_limb)
+
+       eorw    #1,R(s1_size)
+       clrl    R(d1)
+       clrl    R(d5)
+       lsrl    #1,R(s1_size)
+       bcc     L(L1)
+       subql   #1,R(s1_size)
+       subl    R(d0),R(d0)     /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+       movel   MEM_POSTINC(s1_ptr),R(d3)
+       mulul   R(s2_limb),R(d1):R(d3)
+       addxl   R(d0),R(d3)
+       addxl   R(d5),R(d1)
+       subl    R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel   MEM_POSTINC(s1_ptr),R(d3)
+       mulul   R(s2_limb),R(d0):R(d3)
+       addxl   R(d1),R(d3)
+       addxl   R(d5),R(d0)
+       subl    R(d3),MEM_POSTINC(res_ptr)
+
+       dbf     R(s1_size),L(Loop)
+       addxl   R(d5),R(d0)
+       subl    #0x10000,R(s1_size)
+       bcc     L(Loop)
  
  /* Restore used registers from stack frame.  */
-       INSN2(movem,l   ,d2-d5,MEM_POSTINC(sp))
+       moveml  MEM_POSTINC(sp),R(d2)-R(d5)
  
         rts
+EPILOG(__mpn_submul_1)
diff --git a/sysdeps/m68k/rshift.S b/sysdeps/m68k/rshift.S

new file mode 100644 (file)

index 0000000..494dfcb
--- /dev/null
+++ b/sysdeps/m68k/rshift.S
@@ -0,0 +1,149 @@
+/* mc68020 __mpn_rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr      (sp + 4)
+  s_ptr                (sp + 8)
+  s_size       (sp + 16)
+  cnt          (sp + 12)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+       TEXT
+       ALIGN
+       GLOBL   C_SYMBOL_NAME(__mpn_rshift)
+
+C_SYMBOL_NAME(__mpn_rshift:)
+PROLOG(__mpn_rshift)
+/* Save used registers on the stack.  */
+       moveml  R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  */
+       movel   MEM_DISP(sp,28),R(res_ptr)
+       movel   MEM_DISP(sp,32),R(s_ptr)
+       movel   MEM_DISP(sp,36),R(s_size)
+       movel   MEM_DISP(sp,40),R(cnt)
+
+       moveql  #1,R(d5)
+       cmpl    R(d5),R(cnt)
+       bne     L(Lnormal)
+       cmpl    R(res_ptr),R(s_ptr)
+       bls     L(Lspecial)             /* jump if res_ptr >= s_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+       lea     MEM_INDX1(res_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+       movel   R(s_size),R(d0)
+       asll    #2,R(d0)
+       lea     MEM_INDX(res_ptr,d0,l),R(a2)
+#endif
+       cmpl    R(s_ptr),R(a2)
+       bls     L(Lspecial)             /* jump if s_ptr >= res_ptr + s_size */
+
+L(Lnormal:)
+       moveql  #32,R(d5)
+       subl    R(cnt),R(d5)
+       movel   MEM_POSTINC(s_ptr),R(d2)
+       movel   R(d2),R(d0)
+       lsll    R(d5),R(d0)             /* compute carry limb */
+   
+       lsrl    R(cnt),R(d2)
+       movel   R(d2),R(d1)
+       subql   #1,R(s_size)
+       beq     L(Lend)
+       lsrl    #1,R(s_size)
+       bcs     L(L1)
+       subql   #1,R(s_size)
+
+L(Loop:)
+       movel   MEM_POSTINC(s_ptr),R(d2)
+       movel   R(d2),R(d3)
+       lsll    R(d5),R(d3)
+       orl     R(d3),R(d1)
+       movel   R(d1),MEM_POSTINC(res_ptr)
+       lsrl    R(cnt),R(d2)
+L(L1:)
+       movel   MEM_POSTINC(s_ptr),R(d1)
+       movel   R(d1),R(d3)
+       lsll    R(d5),R(d3)
+       orl     R(d3),R(d2)
+       movel   R(d2),MEM_POSTINC(res_ptr)
+       lsrl    R(cnt),R(d1)
+
+       dbf     R(s_size),L(Loop)
+       subl    #0x10000,R(s_size)
+       bcc     L(Loop)
+
+L(Lend:)
+       movel   R(d1),MEM(res_ptr) /* store most significant limb */
+
+/* Restore used registers from stack frame.  */
+       moveml  MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+       rts
+
+/* We loop from most significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.  */
+
+L(Lspecial:)
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+       lea     MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+       lea     MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+       movel   R(s_size),R(d0)
+       asll    #2,R(d0)
+       addl    R(s_size),R(s_ptr)
+       addl    R(s_size),R(res_ptr)
+#endif
+
+       clrl    R(d0)                   /* initialize carry */
+       eorw    #1,R(s_size)
+       lsrl    #1,R(s_size)
+       bcc     L(LL1)
+       subql   #1,R(s_size)
+
+L(LLoop:)
+       movel   MEM_PREDEC(s_ptr),R(d2)
+       roxrl   #1,R(d2)
+       movel   R(d2),MEM_PREDEC(res_ptr)
+L(LL1:)
+       movel   MEM_PREDEC(s_ptr),R(d2)
+       roxrl   #1,R(d2)
+       movel   R(d2),MEM_PREDEC(res_ptr)
+
+       dbf     R(s_size),L(LLoop)
+       roxrl   #1,R(d0)                /* save cy in msb */
+       subl    #0x10000,R(s_size)
+       bcs     L(LLend)
+       addl    R(d0),R(d0)             /* restore cy */
+       bra     L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame.  */
+       moveml  MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+       rts
+EPILOG(__mpn_rshift)
diff --git a/sysdeps/m68k/sub_n.S b/sysdeps/m68k/sub_n.S

index 19f0ec1..39f5161 100644 (file)
--- a/sysdeps/m68k/sub_n.S
+++ b/sysdeps/m68k/sub_n.S
@@ -1,7 +1,7 @@
  /* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
     store difference in a third limb vector.
  
-Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
    size         (sp + 12)
  */
  
+#include "sysdep.h"
  #include "asm-syntax.h"
  
         TEXT
         ALIGN
-       GLOBL   ___mpn_sub_n
+       GLOBL   C_SYMBOL_NAME(__mpn_sub_n)
  
-LAB(___mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n:)
+PROLOG(__mpn_sub_n)
  /* Save used registers on the stack.  */
-       INSN2(move,l    ,MEM_PREDEC(sp),d2)
-       INSN2(move,l    ,MEM_PREDEC(sp),a2)
+       movel   R(d2),MEM_PREDEC(sp)
+       movel   R(a2),MEM_PREDEC(sp)
  
  /* Copy the arguments to registers.  Better use movem?  */
-       INSN2(move,l    ,a2,MEM_DISP(sp,12))
-       INSN2(move,l    ,a0,MEM_DISP(sp,16))
-       INSN2(move,l    ,a1,MEM_DISP(sp,20))
-       INSN2(move,l    ,d2,MEM_DISP(sp,24))
-
-       INSN2(eor,w     ,d2,#1)
-       INSN2(lsr,l     ,d2,#1)
-       bcc L1
-       INSN2(subq,l    ,d2,#1)         /* clears cy as side effect */
-
-LAB(Loop)
-       INSN2(move,l    ,d0,MEM_POSTINC(a0))
-       INSN2(move,l    ,d1,MEM_POSTINC(a1))
-       INSN2(subx,l    ,d0,d1)
-       INSN2(move,l    ,MEM_POSTINC(a2),d0)
-LAB(L1)        INSN2(move,l    ,d0,MEM_POSTINC(a0))
-       INSN2(move,l    ,d1,MEM_POSTINC(a1))
-       INSN2(subx,l    ,d0,d1)
-       INSN2(move,l    ,MEM_POSTINC(a2),d0)
-
-       dbf d2,Loop                     /* loop until 16 lsb of %4 == -1 */
-       INSN2(subx,l    ,d0,d0)         /* d0 <= -cy; save cy as 0 or -1 in d0 */
-       INSN2(sub,l     ,d2,#0x10000)
-       bcs L2
-       INSN2(add,l     ,d0,d0)         /* restore cy */
-       bra Loop
-
-LAB(L2)
-       INSN1(neg,l     ,d0)
+       movel   MEM_DISP(sp,12),R(a2)
+       movel   MEM_DISP(sp,16),R(a0)
+       movel   MEM_DISP(sp,20),R(a1)
+       movel   MEM_DISP(sp,24),R(d2)
+
+       eorw    #1,R(d2)
+       lsrl    #1,R(d2)
+       bcc     L(L1)
+       subql   #1,R(d2)        /* clears cy as side effect */
+
+L(Loop:)
+       movel   MEM_POSTINC(a0),R(d0)
+       movel   MEM_POSTINC(a1),R(d1)
+       subxl   R(d1),R(d0)
+       movel   R(d0),MEM_POSTINC(a2)
+L(L1:) movel   MEM_POSTINC(a0),R(d0)
+       movel   MEM_POSTINC(a1),R(d1)
+       subxl   R(d1),R(d0)
+       movel   R(d0),MEM_POSTINC(a2)
+
+       dbf     R(d2),L(Loop)           /* loop until 16 lsb of %4 == -1 */
+       subxl   R(d0),R(d0)     /* d0 <= -cy; save cy as 0 or -1 in d0 */
+       subl    #0x10000,R(d2)
+       bcs     L(L2)
+       addl    R(d0),R(d0)     /* restore cy */
+       bra     L(Loop)
+
+L(L2:)
+       negl    R(d0)
  
  /* Restore used registers from stack frame.  */
-       INSN2(move,l    ,a2,MEM_POSTINC(sp))
-       INSN2(move,l    ,d2,MEM_POSTINC(sp))
+       movel   MEM_POSTINC(sp),R(a2)
+       movel   MEM_POSTINC(sp),R(d2)
  
         rts
+EPILOG(__mpn_sub_n)
diff --git a/sysdeps/m88k/add_n.s b/sysdeps/m88k/add_n.s

index 7e4cccc..d564479 100644 (file)
--- a/sysdeps/m88k/add_n.s
+++ b/sysdeps/m88k/add_n.s
@@ -1,7 +1,7 @@
  ; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
  ; sum in a third limb vector.
  
-; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  
  ; This file is part of the GNU MP Library.
  
diff --git a/sysdeps/m88k/m88110/add_n.S b/sysdeps/m88k/m88110/add_n.S

new file mode 100644 (file)

index 0000000..ab20630
--- /dev/null
+++ b/sysdeps/m88k/m88110/add_n.S
@@ -0,0 +1,199 @@
+; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr        r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size   r5
+
+#include "sysdep.h"
+
+       text
+       align   16
+       global  C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n):
+       addu.co  r0,r0,r0               ; clear cy flag
+       xor      r12,s2_ptr,res_ptr
+       bb1      2,r12,L1
+; **  V1a  **
+L0:    bb0      2,res_ptr,L_v1         ; branch if res_ptr is aligned?
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+       ld       r10,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       ld       r8,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       subu     size,size,1
+       addu.co  r6,r10,r8
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+L_v1:  cmp      r12,size,2
+       bb1      lt,r12,Lend2
+
+       ld       r10,s1_ptr,0
+       ld       r12,s1_ptr,4
+       ld.d     r8,s2_ptr,0
+       subu     size,size,10
+       bcnd     lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop1: subu     size,size,8
+       addu.cio r6,r10,r8
+       ld       r10,s1_ptr,8
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,12
+       ld.d     r8,s2_ptr,8
+       st.d     r6,res_ptr,0
+       addu.cio r6,r10,r8
+       ld       r10,s1_ptr,16
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,20
+       ld.d     r8,s2_ptr,16
+       st.d     r6,res_ptr,8
+       addu.cio r6,r10,r8
+       ld       r10,s1_ptr,24
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,28
+       ld.d     r8,s2_ptr,24
+       st.d     r6,res_ptr,16
+       addu.cio r6,r10,r8
+       ld       r10,s1_ptr,32
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,36
+       addu     s1_ptr,s1_ptr,32
+       ld.d     r8,s2_ptr,32
+       addu     s2_ptr,s2_ptr,32
+       st.d     r6,res_ptr,24
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop1
+
+Lfin1: addu     size,size,8-2
+       bcnd     lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1:        addu.cio r6,r10,r8
+       ld       r10,s1_ptr,8
+       addu.cio r7,r12,r9
+       ld       r12,s1_ptr,12
+       ld.d     r8,s2_ptr,8
+       st.d     r6,res_ptr,0
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope1
+Lend1: addu.cio r6,r10,r8
+       addu.cio r7,r12,r9
+       st.d     r6,res_ptr,0
+
+       bb0      0,size,Lret1
+/* Add last limb */
+       ld       r10,s1_ptr,8
+       ld       r8,s2_ptr,8
+       addu.cio r6,r10,r8
+       st       r6,res_ptr,8
+
+Lret1: jmp.n    r1
+       addu.ci  r2,r0,r0               ; return carry-out from most sign. limb
+
+L1:    xor      r12,s1_ptr,res_ptr
+       bb1      2,r12,L2
+; **  V1b  **
+       or       r12,r0,s2_ptr
+       or       s2_ptr,r0,s1_ptr
+       or       s1_ptr,r0,r12
+       br       L0
+
+; **  V2  **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of s1_ptr and s2_ptr are the same.  */
+
+L2:    cmp      r12,size,1
+       bb1      eq,r12,Ljone
+       bb0      2,s1_ptr,L_v2          ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+       ld       r10,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       ld       r8,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       subu     size,size,1
+       addu.co  r6,r10,r8
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+
+L_v2:  subu     size,size,8
+       bcnd     lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop2: subu     size,size,8
+       ld.d     r8,s1_ptr,0
+       ld.d     r6,s2_ptr,0
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,0
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,4
+       ld.d     r8,s1_ptr,8
+       ld.d     r6,s2_ptr,8
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,8
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,12
+       ld.d     r8,s1_ptr,16
+       ld.d     r6,s2_ptr,16
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,16
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,20
+       ld.d     r8,s1_ptr,24
+       ld.d     r6,s2_ptr,24
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,24
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,28
+       addu     s1_ptr,s1_ptr,32
+       addu     s2_ptr,s2_ptr,32
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop2
+
+Lfin2: addu     size,size,8-2
+       bcnd     lt0,size,Lend2
+Loope2:        ld.d     r8,s1_ptr,0
+       ld.d     r6,s2_ptr,0
+       addu.cio r8,r8,r6
+       st       r8,res_ptr,0
+       addu.cio r9,r9,r7
+       st       r9,res_ptr,4
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope2
+Lend2: bb0      0,size,Lret2
+/* Add last limb */
+Ljone: ld       r10,s1_ptr,0
+       ld       r8,s2_ptr,0
+       addu.cio r6,r10,r8
+       st       r6,res_ptr,0
+
+Lret2: jmp.n    r1
+       addu.ci  r2,r0,r0               ; return carry-out from most sign. limb
diff --git a/sysdeps/m88k/m88110/addmul_1.s b/sysdeps/m88k/m88110/addmul_1.s

new file mode 100644 (file)

index 0000000..1a4dfa1
--- /dev/null
+++ b/sysdeps/m88k/m88110/addmul_1.s
@@ -0,0 +1,60 @@
+; mc88110 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr      r2
+; s1_ptr       r3
+; size         r4
+; s2_limb      r5
+
+       text
+       align   16
+       global  ___mpn_addmul_1
+___mpn_addmul_1:
+       lda      r3,r3[r4]
+       lda      r8,r2[r4]              ; RES_PTR in r8 since r2 is retval
+       subu     r4,r0,r4
+       addu.co  r2,r0,r0               ; r2 = cy = 0
+
+       ld       r6,r3[r4]
+       addu     r4,r4,1
+       subu     r8,r8,4
+       bcnd.n   eq0,r4,Lend
+        mulu.d  r10,r6,r5
+
+Loop:  ld       r7,r8[r4]
+       ld       r6,r3[r4]
+       addu.cio r9,r11,r2
+       addu.ci  r2,r10,r0
+       addu.co  r9,r9,r7
+       st       r9,r8[r4]
+       addu     r4,r4,1
+       mulu.d   r10,r6,r5
+       bcnd     ne0,r4,Loop
+
+Lend:  ld       r7,r8,0
+       addu.cio r9,r11,r2
+       addu.ci  r2,r10,r0
+       addu.co  r9,r9,r7
+       st       r9,r8,0
+       jmp.n    r1
+        addu.ci r2,r2,r0
diff --git a/sysdeps/m88k/m88110/mul_1.s b/sysdeps/m88k/m88110/mul_1.s

index 08c3ca0..b1352ce 100644 (file)
--- a/sysdeps/m88k/m88110/mul_1.s
+++ b/sysdeps/m88k/m88110/mul_1.s
@@ -1,7 +1,7 @@
  ; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
  ; store the product in a second limb vector.
  
-; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  
  ; This file is part of the GNU MP Library.
  
@@ -56,29 +56,3 @@ Lend:        addu.cio r9,r11,r2
         st       r9,r8,4
         jmp.n    r1
          addu.ci r2,r10,r0
-
-; This is the Right Way to do this on '110.  4 cycles / 64-bit limb.
-;      ld.d    r10,
-;      mulu.d
-;      addu.cio
-;      addu.cio
-;      st.d
-;      mulu.d  ,r11,r5
-;      ld.d    r12,
-;      mulu.d  ,r10,r5
-;      addu.cio
-;      addu.cio
-;      st.d
-;      mulu.d
-;      ld.d    r10,
-;      mulu.d
-;      addu.cio
-;      addu.cio
-;      st.d
-;      mulu.d
-;      ld.d    r10,
-;      mulu.d
-;      addu.cio
-;      addu.cio
-;      st.d
-;      mulu.d
diff --git a/sysdeps/m88k/m88110/sub_n.S b/sysdeps/m88k/m88110/sub_n.S

new file mode 100644 (file)

index 0000000..74ee0ae
--- /dev/null
+++ b/sysdeps/m88k/m88110/sub_n.S
@@ -0,0 +1,275 @@
+; mc88110 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr        r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size   r5
+
+#include "sysdep.h"
+
+       text
+       align   16
+       global  C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n):
+       subu.co  r0,r0,r0               ; set cy flag
+       xor      r12,s2_ptr,res_ptr
+       bb1      2,r12,L1
+; **  V1a  **
+L0:    bb0      2,res_ptr,L_v1         ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+       ld       r10,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       ld       r8,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       subu     size,size,1
+       subu.co  r6,r10,r8
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+L_v1:  cmp      r12,size,2
+       bb1      lt,r12,Lend2
+
+       ld       r10,s1_ptr,0
+       ld       r12,s1_ptr,4
+       ld.d     r8,s2_ptr,0
+       subu     size,size,10
+       bcnd     lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop1: subu     size,size,8
+       subu.cio r6,r10,r8
+       ld       r10,s1_ptr,8
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,12
+       ld.d     r8,s2_ptr,8
+       st.d     r6,res_ptr,0
+       subu.cio r6,r10,r8
+       ld       r10,s1_ptr,16
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,20
+       ld.d     r8,s2_ptr,16
+       st.d     r6,res_ptr,8
+       subu.cio r6,r10,r8
+       ld       r10,s1_ptr,24
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,28
+       ld.d     r8,s2_ptr,24
+       st.d     r6,res_ptr,16
+       subu.cio r6,r10,r8
+       ld       r10,s1_ptr,32
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,36
+       addu     s1_ptr,s1_ptr,32
+       ld.d     r8,s2_ptr,32
+       addu     s2_ptr,s2_ptr,32
+       st.d     r6,res_ptr,24
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop1
+
+Lfin1: addu     size,size,8-2
+       bcnd     lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1:        subu.cio r6,r10,r8
+       ld       r10,s1_ptr,8
+       subu.cio r7,r12,r9
+       ld       r12,s1_ptr,12
+       ld.d     r8,s2_ptr,8
+       st.d     r6,res_ptr,0
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope1
+Lend1: subu.cio r6,r10,r8
+       subu.cio r7,r12,r9
+       st.d     r6,res_ptr,0
+
+       bb0      0,size,Lret1
+/* Add last limb */
+       ld       r10,s1_ptr,8
+       ld       r8,s2_ptr,8
+       subu.cio r6,r10,r8
+       st       r6,res_ptr,8
+
+Lret1: addu.ci r2,r0,r0                ; return carry-out from most sign. limb
+       jmp.n    r1
+        xor    r2,r2,1
+
+L1:    xor      r12,s1_ptr,res_ptr
+       bb1      2,r12,L2
+; **  V1b  **
+       bb0      2,res_ptr,L_v1b        ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+       ld       r10,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       ld       r8,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       subu     size,size,1
+       subu.co  r6,r8,r10
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+L_v1b: cmp      r12,size,2
+       bb1      lt,r12,Lend2
+
+       ld       r10,s2_ptr,0
+       ld       r12,s2_ptr,4
+       ld.d     r8,s1_ptr,0
+       subu     size,size,10
+       bcnd     lt0,size,Lfin1b
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop1b:        subu     size,size,8
+       subu.cio r6,r8,r10
+       ld       r10,s2_ptr,8
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,12
+       ld.d     r8,s1_ptr,8
+       st.d     r6,res_ptr,0
+       subu.cio r6,r8,r10
+       ld       r10,s2_ptr,16
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,20
+       ld.d     r8,s1_ptr,16
+       st.d     r6,res_ptr,8
+       subu.cio r6,r8,r10
+       ld       r10,s2_ptr,24
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,28
+       ld.d     r8,s1_ptr,24
+       st.d     r6,res_ptr,16
+       subu.cio r6,r8,r10
+       ld       r10,s2_ptr,32
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,36
+       addu     s2_ptr,s2_ptr,32
+       ld.d     r8,s1_ptr,32
+       addu     s1_ptr,s1_ptr,32
+       st.d     r6,res_ptr,24
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop1b
+
+Lfin1b:        addu     size,size,8-2
+       bcnd     lt0,size,Lend1b
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1b:subu.cio r6,r8,r10
+       ld       r10,s2_ptr,8
+       subu.cio r7,r9,r12
+       ld       r12,s2_ptr,12
+       ld.d     r8,s1_ptr,8
+       st.d     r6,res_ptr,0
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope1b
+Lend1b:        subu.cio r6,r8,r10
+       subu.cio r7,r9,r12
+       st.d     r6,res_ptr,0
+
+       bb0      0,size,Lret1b
+/* Add last limb */
+       ld       r10,s2_ptr,8
+       ld       r8,s1_ptr,8
+       subu.cio r6,r8,r10
+       st       r6,res_ptr,8
+
+Lret1b:        addu.ci r2,r0,r0                ; return carry-out from most sign. limb
+       jmp.n    r1
+        xor    r2,r2,1
+
+; **  V2  **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of s1_ptr and s2_ptr are the same.  */
+
+L2:    cmp      r12,size,1
+       bb1      eq,r12,Ljone
+       bb0      2,s1_ptr,L_v2          ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+       ld       r10,s1_ptr,0
+       addu     s1_ptr,s1_ptr,4
+       ld       r8,s2_ptr,0
+       addu     s2_ptr,s2_ptr,4
+       subu     size,size,1
+       subu.co  r6,r10,r8
+       st       r6,res_ptr,0
+       addu     res_ptr,res_ptr,4
+
+L_v2:  subu     size,size,8
+       bcnd     lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+       align    8
+Loop2: subu     size,size,8
+       ld.d     r8,s1_ptr,0
+       ld.d     r6,s2_ptr,0
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,0
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,4
+       ld.d     r8,s1_ptr,8
+       ld.d     r6,s2_ptr,8
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,8
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,12
+       ld.d     r8,s1_ptr,16
+       ld.d     r6,s2_ptr,16
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,16
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,20
+       ld.d     r8,s1_ptr,24
+       ld.d     r6,s2_ptr,24
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,24
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,28
+       addu     s1_ptr,s1_ptr,32
+       addu     s2_ptr,s2_ptr,32
+       addu     res_ptr,res_ptr,32
+       bcnd     ge0,size,Loop2
+
+Lfin2: addu     size,size,8-2
+       bcnd     lt0,size,Lend2
+Loope2:        ld.d     r8,s1_ptr,0
+       ld.d     r6,s2_ptr,0
+       subu.cio r8,r8,r6
+       st       r8,res_ptr,0
+       subu.cio r9,r9,r7
+       st       r9,res_ptr,4
+       subu     size,size,2
+       addu     s1_ptr,s1_ptr,8
+       addu     s2_ptr,s2_ptr,8
+       addu     res_ptr,res_ptr,8
+       bcnd     ge0,size,Loope2
+Lend2: bb0      0,size,Lret2
+/* Add last limb */
+Ljone: ld       r10,s1_ptr,0
+       ld       r8,s2_ptr,0
+       subu.cio r6,r10,r8
+       st       r6,res_ptr,0
+
+Lret2: addu.ci r2,r0,r0                ; return carry-out from most sign. limb
+       jmp.n    r1
+        xor    r2,r2,1
diff --git a/sysdeps/m88k/mul_1.s b/sysdeps/m88k/mul_1.s

index 35c238d..6b8492c 100644 (file)
--- a/sysdeps/m88k/mul_1.s
+++ b/sysdeps/m88k/mul_1.s
@@ -1,7 +1,7 @@
  ; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
  ; store the product in a second limb vector.
  
-; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  
  ; This file is part of the GNU MP Library.
  
@@ -55,14 +55,14 @@ ___mpn_mul_1:
         ; Make S1_PTR and RES_PTR point at the end of their blocks
         ; and negate SIZE.
         lda      r3,r3[r4]
-       lda      r6,r2[r4]              ; RES_PTR in r6 since r2 is retval
+       lda      r6,r2[r4]      ; RES_PTR in r6 since r2 is retval
         subu     r4,r0,r4
  
-       addu.co  r2,r0,r0               ; r2 = cy = 0
+       addu.co  r2,r0,r0       ; r2 = cy = 0
         ld       r9,r3[r4]
-       mask     r7,r5,0xffff           ; r7 = lo(S2_LIMB)
-       extu     r8,r5,16               ; r8 = hi(S2_LIMB)
-       bcnd.n   eq0,r8,Lsmall          ; jump if (hi(S2_LIMB) == 0)
+       mask     r7,r5,0xffff   ; r7 = lo(S2_LIMB)
+       extu     r8,r5,16       ; r8 = hi(S2_LIMB)
+       bcnd.n   eq0,r8,Lsmall  ; jump if (hi(S2_LIMB) == 0)
          subu    r6,r6,4
  
  ; General code for any value of S2_LIMB.
@@ -75,28 +75,27 @@ ___mpn_mul_1:
         br.n    L1
         addu     r4,r4,1
  
-Loop:
-       ld       r9,r3[r4]
+Loop:  ld       r9,r3[r4]
         st       r26,r6[r4]
-; bcnd ne0,r0,0                        ; bubble
+; bcnd ne0,r0,0                ; bubble
         addu     r4,r4,1
-L1:    mul      r26,r9,r5              ; low word of product   mul_1   WB ld
-       mask     r12,r9,0xffff          ; r12 = lo(s1_limb)     mask_1
-       mul      r11,r12,r7             ; r11 =  prod_0         mul_2   WB mask_1
-       mul      r10,r12,r8             ; r10 = prod_1a         mul_3
-       extu     r13,r9,16              ; r13 = hi(s1_limb)     extu_1  WB mul_1
-       mul      r12,r13,r7             ; r12 = prod_1b         mul_4   WB extu_1
-       mul      r25,r13,r8             ; r25  = prod_2         mul_5   WB mul_2
-       extu     r11,r11,16             ; r11 = hi(prod_0)      extu_2  WB mul_3
-       addu     r10,r10,r11            ;                       addu_1  WB extu_2
-; bcnd ne0,r0,0                        ; bubble                        WB addu_1
-       addu.co  r10,r10,r12            ;                               WB mul_4
-       mask.u   r10,r10,0xffff         ; move the 16 most significant bits...
-       addu.ci  r10,r10,r0             ; ...to the low half of the word...
-       rot      r10,r10,16             ; ...and put carry in pos 16.
-       addu.co  r26,r26,r2             ; add old carry limb
+L1:    mul      r26,r9,r5      ; low word of product   mul_1   WB ld
+       mask     r12,r9,0xffff  ; r12 = lo(s1_limb)     mask_1
+       mul      r11,r12,r7     ; r11 =  prod_0         mul_2   WB mask_1
+       mul      r10,r12,r8     ; r10 = prod_1a         mul_3
+       extu     r13,r9,16      ; r13 = hi(s1_limb)     extu_1  WB mul_1
+       mul      r12,r13,r7     ; r12 = prod_1b         mul_4   WB extu_1
+       mul      r25,r13,r8     ; r25  = prod_2         mul_5   WB mul_2
+       extu     r11,r11,16     ; r11 = hi(prod_0)      extu_2  WB mul_3
+       addu     r10,r10,r11    ;                       addu_1  WB extu_2
+; bcnd ne0,r0,0                ; bubble                        WB addu_1
+       addu.co  r10,r10,r12    ;                               WB mul_4
+       mask.u   r10,r10,0xffff ; move the 16 most significant bits...
+       addu.ci  r10,r10,r0     ; ...to the low half of the word...
+       rot      r10,r10,16     ; ...and put carry in pos 16.
+       addu.co  r26,r26,r2     ; add old carry limb
         bcnd.n   ne0,r4,Loop
-        addu.ci r2,r25,r10             ; compute new carry limb
+        addu.ci r2,r25,r10     ; compute new carry limb
  
         st       r26,r6[r4]
         ld.d     r25,r31,8
@@ -109,20 +108,19 @@ Lsmall:
         br.n    SL1
         addu     r4,r4,1
  
-SLoop:
-       ld       r9,r3[r4]              ;
-       st       r8,r6[r4]              ;
-       addu     r4,r4,1                ;
-SL1:   mul      r8,r9,r5               ; low word of product
-       mask     r12,r9,0xffff          ; r12 = lo(s1_limb)
-       extu     r13,r9,16              ; r13 = hi(s1_limb)
-       mul      r11,r12,r7             ; r11 =  prod_0
-       mul      r12,r13,r7             ; r12 = prod_1b
-       addu.cio r8,r8,r2               ; add old carry limb
-       extu     r10,r11,16             ; r11 = hi(prod_0)
-       addu     r10,r10,r12            ;
+SLoop: ld       r9,r3[r4]      ;
+       st       r8,r6[r4]      ;
+       addu     r4,r4,1        ;
+SL1:   mul      r8,r9,r5       ; low word of product
+       mask     r12,r9,0xffff  ; r12 = lo(s1_limb)
+       extu     r13,r9,16      ; r13 = hi(s1_limb)
+       mul      r11,r12,r7     ; r11 =  prod_0
+       mul      r12,r13,r7     ; r12 = prod_1b
+       addu.cio r8,r8,r2       ; add old carry limb
+       extu     r10,r11,16     ; r11 = hi(prod_0)
+       addu     r10,r10,r12    ;
         bcnd.n   ne0,r4,SLoop
-       extu     r2,r10,16              ; r2 = new carry limb
+       extu     r2,r10,16      ; r2 = new carry limb
  
         jmp.n    r1
         st       r8,r6[r4]
diff --git a/sysdeps/m88k/sub_n.s b/sysdeps/m88k/sub_n.s

index 3963cd5..cd0b791 100644 (file)
--- a/sysdeps/m88k/sub_n.s
+++ b/sysdeps/m88k/sub_n.s
@@ -1,7 +1,7 @@
  ; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
  ; store difference in a third limb vector.
  
-; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
  ; This file is part of the GNU MP Library.
  
@@ -41,9 +41,10 @@ ___mpn_sub_n:
         extu    r10,r5,3
         ld      r7,r4,0                 ; read first limb from s2_ptr
  
-       subu.co r5,r0,r5                ; (clear carry as side effect)
+       subu    r5,r0,r5
         mak     r5,r5,3<4>
-       bcnd    eq0,r5,Lzero
+       bcnd.n  eq0,r5,Lzero
+       subu.co r0,r0,r0                ; initialize carry
  
         or      r12,r0,lo16(Lbase)
         or.u    r12,r12,hi16(Lbase)
diff --git a/sysdeps/mips/addmul_1.s b/sysdeps/mips/addmul_1.s

index abc2fb8..917af1b 100644 (file)
--- a/sysdeps/mips/addmul_1.s
+++ b/sysdeps/mips/addmul_1.s
@@ -1,7 +1,7 @@
   # MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
   # add the product to a second limb vector.
  
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
   # This file is part of the GNU MP Library.
  
@@ -63,7 +63,7 @@ Loop: lw      $10,0($4)
         addu    $2,$2,$10
         sw      $3,0($4)
         addiu   $4,$4,4
-       bne     $6,$0,Loop      # should be "bnel"
+       bne     $6,$0,Loop
          addu   $2,$9,$2        # add high product limb and carry from addition
  
   # cool down phase 1
diff --git a/sysdeps/mips/mips3/addmul_1.s b/sysdeps/mips/mips3/addmul_1.s

index 7af0172..7dbc9ad 100644 (file)
--- a/sysdeps/mips/mips3/addmul_1.s
+++ b/sysdeps/mips/mips3/addmul_1.s
@@ -63,7 +63,7 @@ Loop: ld      $10,0($4)
         daddu   $2,$2,$10
         sd      $3,0($4)
         daddiu  $4,$4,8
-       bne     $6,$0,Loop      # should be "bnel"
+       bne     $6,$0,Loop
          daddu  $2,$9,$2        # add high product limb and carry from addition
  
   # cool down phase 1
diff --git a/sysdeps/mips/mips3/mul_1.s b/sysdeps/mips/mips3/mul_1.s

index 87954e5..8376a02 100644 (file)
--- a/sysdeps/mips/mips3/mul_1.s
+++ b/sysdeps/mips/mips3/mul_1.s
@@ -59,7 +59,7 @@ Loop: mflo    $10
         sltu    $2,$10,$2       # carry from previous addition -> $2
         sd      $10,0($4)
         daddiu  $4,$4,8
-       bne     $6,$0,Loop      # should be "bnel"
+       bne     $6,$0,Loop
          daddu  $2,$9,$2        # add high product limb and carry from addition
  
   # cool down phase 1
diff --git a/sysdeps/mips/mips3/submul_1.s b/sysdeps/mips/mips3/submul_1.s

index f28c6a5..f041f6c 100644 (file)
--- a/sysdeps/mips/mips3/submul_1.s
+++ b/sysdeps/mips/mips3/submul_1.s
@@ -63,7 +63,7 @@ Loop: ld      $10,0($4)
         daddu   $2,$2,$10
         sd      $3,0($4)
         daddiu  $4,$4,8
-       bne     $6,$0,Loop      # should be "bnel"
+       bne     $6,$0,Loop
          daddu  $2,$9,$2        # add high product limb and carry from addition
  
   # cool down phase 1
diff --git a/sysdeps/mips/mul_1.s b/sysdeps/mips/mul_1.s

index 01327e2..6f5324c 100644 (file)
--- a/sysdeps/mips/mul_1.s
+++ b/sysdeps/mips/mul_1.s
@@ -1,7 +1,7 @@
   # MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
   # store the product in a second limb vector.
  
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
   # This file is part of the GNU MP Library.
  
@@ -59,7 +59,7 @@ Loop: mflo    $10
         sltu    $2,$10,$2       # carry from previous addition -> $2
         sw      $10,0($4)
         addiu   $4,$4,4
-       bne     $6,$0,Loop      # should be "bnel"
+       bne     $6,$0,Loop
          addu   $2,$9,$2        # add high product limb and carry from addition
  
   # cool down phase 1
diff --git a/sysdeps/mips/submul_1.s b/sysdeps/mips/submul_1.s

index 616dd1b..a78072a 100644 (file)
--- a/sysdeps/mips/submul_1.s
+++ b/sysdeps/mips/submul_1.s
@@ -1,7 +1,7 @@
   # MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
   # subtract the product from a second limb vector.
  
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
  
   # This file is part of the GNU MP Library.
  
@@ -63,7 +63,7 @@ Loop: lw      $10,0($4)
         addu    $2,$2,$10
         sw      $3,0($4)
         addiu   $4,$4,4
-       bne     $6,$0,Loop      # should be "bnel"
+       bne     $6,$0,Loop
          addu   $2,$9,$2        # add high product limb and carry from addition
  
   # cool down phase 1
diff --git a/sysdeps/rs6000/add_n.s b/sysdeps/rs6000/add_n.s

index 7090cf1..e2536d5 100644 (file)
--- a/sysdeps/rs6000/add_n.s
+++ b/sysdeps/rs6000/add_n.s
@@ -1,6 +1,6 @@
  # IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
  
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  
  # This file is part of the GNU MP Library.
  
diff --git a/sysdeps/rs6000/sub_n.s b/sysdeps/rs6000/sub_n.s

index 40fe7d6..c57675b 100644 (file)
--- a/sysdeps/rs6000/sub_n.s
+++ b/sysdeps/rs6000/sub_n.s
@@ -1,7 +1,7 @@
  # IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
  # store difference in a third limb vector.
  
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  
  # This file is part of the GNU MP Library.
  
diff --git a/sysdeps/sparc/add_n.S b/sysdeps/sparc/add_n.S

index 80c3b99..49b31fc 100644 (file)
--- a/sysdeps/sparc/add_n.S
+++ b/sysdeps/sparc/add_n.S
@@ -1,7 +1,7 @@
-! sparc __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
  ! sum in a third limb vector.
  
-! Copyright (C) 1995 Free Software Foundation, Inc.
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
  
  ! This file is part of the GNU MP Library.
  
@@ -32,18 +32,14 @@
         .align  4
         .global C_SYMBOL_NAME(__mpn_add_n)
  C_SYMBOL_NAME(__mpn_add_n):
-       cmp     size,8
-       mov     0,%o4                   ! clear cy-save register
-       blt,a   Ltriv
-       addcc   size,-2,size
         xor     s2_ptr,res_ptr,%g1
         andcc   %g1,4,%g0
         bne     L1                      ! branch if alignment differs
         nop
+! **  V1a  **
  L0:    andcc   res_ptr,4,%g0           ! res_ptr unaligned? Side effect: cy=0
-       beq     L_v1                    ! if no, branch
+       be      L_v1                    ! if no, branch
         nop
-! **  V1a  **
  /* Add least significant limb separately to align res_ptr and s2_ptr */
         ld      [s1_ptr],%g4
         add     s1_ptr,4,s1_ptr
@@ -53,12 +49,15 @@ L0: andcc   res_ptr,4,%g0           ! res_ptr unaligned? Side effect: cy=0
         addcc   %g4,%g2,%o4
         st      %o4,[res_ptr]
         add     res_ptr,4,res_ptr
+L_v1:  addx    %g0,%g0,%o4             ! save cy in register
+       cmp     size,2                  ! if size < 2 ...
+       bl      Lend2                   ! ... branch to tail code
+       subcc   %g0,%o4,%g0             ! restore cy
  
-L_v1:  ld      [s1_ptr+0],%g4
+       ld      [s1_ptr+0],%g4
+       addcc   size,-10,size
         ld      [s1_ptr+4],%g1
         ldd     [s2_ptr+0],%g2
-       addx    %g0,%g0,%o4             ! save cy in register
-       addcc   size,-10,size
         blt     Lfin1
         subcc   %g0,%o4,%g0             ! restore cy
  /* Add blocks of 8 limbs until less than 8 limbs remain */
@@ -98,7 +97,7 @@ Lfin1:        addcc   size,8-2,size
         blt     Lend1
         subcc   %g0,%o4,%g0             ! restore cy
  /* Add blocks of 2 limbs until less than 2 limbs remain */
-Loop1b:        addxcc  %g4,%g2,%o4
+Loope1:        addxcc  %g4,%g2,%o4
         ld      [s1_ptr+8],%g4
         addxcc  %g1,%g3,%o5
         ld      [s1_ptr+12],%g1
@@ -109,7 +108,7 @@ Loop1b:     addxcc  %g4,%g2,%o4
         add     s1_ptr,8,s1_ptr
         add     s2_ptr,8,s2_ptr
         add     res_ptr,8,res_ptr
-       bge     Loop1b
+       bge     Loope1
         subcc   %g0,%o4,%g0             ! restore cy
  Lend1: addxcc  %g4,%g2,%o4
         addxcc  %g1,%g3,%o5
@@ -144,10 +143,13 @@ L1:       xor     s1_ptr,res_ptr,%g1
     things can be aligned (that we care about) we now know that the alignment
     of s1_ptr and s2_ptr are the same.  */
  
-L2:    andcc   s1_ptr,4,%g0            ! s1_ptr unaligned? Side effect: cy=0
-       beq     L_v2                    ! if no, branch
+L2:    cmp     size,1
+       be      Ljone
         nop
-/* Add least significant limb separately to align res_ptr and s2_ptr */
+       andcc   s1_ptr,4,%g0            ! s1_ptr unaligned? Side effect: cy=0
+       be      L_v2                    ! if no, branch
+       nop
+/* Add least significant limb separately to align s1_ptr and s2_ptr */
         ld      [s1_ptr],%g4
         add     s1_ptr,4,s1_ptr
         ld      [s2_ptr],%g2
@@ -195,9 +197,9 @@ Loop2:      ldd     [s1_ptr+0],%g2
         subcc   %g0,%o4,%g0             ! restore cy
  
  Lfin2: addcc   size,8-2,size
-Ltriv: blt     Lend2
+       blt     Lend2
         subcc   %g0,%o4,%g0             ! restore cy
-Loop2b:        ldd     [s1_ptr+0],%g2
+Loope2:        ldd     [s1_ptr+0],%g2
         ldd     [s2_ptr+0],%o4
         addxcc  %g2,%o4,%g2
         st      %g2,[res_ptr+0]
@@ -208,13 +210,13 @@ Loop2b:   ldd     [s1_ptr+0],%g2
         add     s1_ptr,8,s1_ptr
         add     s2_ptr,8,s2_ptr
         add     res_ptr,8,res_ptr
-       bge     Loop2b
+       bge     Loope2
         subcc   %g0,%o4,%g0             ! restore cy
  Lend2: andcc   size,1,%g0
         be      Lret2
         subcc   %g0,%o4,%g0             ! restore cy
  /* Add last limb */
-       ld      [s1_ptr],%g4
+Ljone: ld      [s1_ptr],%g4
         ld      [s2_ptr],%g2
         addxcc  %g4,%g2,%o4
         st      %o4,[res_ptr]
diff --git a/sysdeps/sparc/lshift.S b/sysdeps/sparc/lshift.S

index 497272a..6844fa2 100644 (file)
--- a/sysdeps/sparc/lshift.S
+++ b/sysdeps/sparc/lshift.S
@@ -1,6 +1,6 @@
  ! sparc __mpn_lshift --
  
-! Copyright (C) 1995 Free Software Foundation, Inc.
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
  
  ! This file is part of the GNU MP Library.
  
@@ -39,7 +39,7 @@ C_SYMBOL_NAME(__mpn_lshift):
         add     %o2,-1,%o2
         andcc   %o2,4-1,%g4     ! number of limbs in first loop
         srl     %g2,%o5,%g1     ! compute function result
-       beq     L0              ! if multiple of 4 limbs, skip first loop
+       be      L0              ! if multiple of 4 limbs, skip first loop
         st      %g1,[%sp+80]
  
         sub     %o2,%g4,%o2     ! adjust count for main loop
@@ -56,7 +56,7 @@ Loop0:        ld      [%o1-8],%g3
          st     %o4,[%o0+0]
  
  L0:    tst     %o2
-       beq     Lend
+       be      Lend
          nop
  
  Loop:  ld      [%o1-8],%g3
diff --git a/sysdeps/sparc/rshift.S b/sysdeps/sparc/rshift.S

index 3428cfe..5a47926 100644 (file)
--- a/sysdeps/sparc/rshift.S
+++ b/sysdeps/sparc/rshift.S
@@ -1,6 +1,6 @@
  ! sparc __mpn_rshift --
  
-! Copyright (C) 1995 Free Software Foundation, Inc.
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
  
  ! This file is part of the GNU MP Library.
  
@@ -36,7 +36,7 @@ C_SYMBOL_NAME(__mpn_rshift):
         add     %o2,-1,%o2
         andcc   %o2,4-1,%g4     ! number of limbs in first loop
         sll     %g2,%o5,%g1     ! compute function result
-       beq     L0              ! if multiple of 4 limbs, skip first loop
+       be      L0              ! if multiple of 4 limbs, skip first loop
         st      %g1,[%sp+80]
  
         sub     %o2,%g4,%o2     ! adjust count for main loop
@@ -53,7 +53,7 @@ Loop0:        ld      [%o1+4],%g3
          st     %o4,[%o0-4]
  
  L0:    tst     %o2
-       beq     Lend
+       be      Lend
          nop
  
  Loop:  ld      [%o1+4],%g3
diff --git a/sysdeps/sparc/sparc64/add_n.s b/sysdeps/sparc/sparc64/add_n.s

new file mode 100644 (file)

index 0000000..104a89e
--- /dev/null
+++ b/sysdeps/sparc/sparc64/add_n.s
@@ -0,0 +1,57 @@
+! SPARC v9 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr      %o0
+! s1_ptr       %o1
+! s2_ptr       %o2
+! size         %o3
+
+.section       ".text"
+       .align 4
+       .global __mpn_add_n
+       .type    __mpn_add_n,#function
+       .proc   04
+__mpn_add_n:
+       sub %g0,%o3,%g3
+       sllx %o3,3,%g1
+       add %o1,%g1,%o1                 ! make s1_ptr point at end
+       add %o2,%g1,%o2                 ! make s2_ptr point at end
+       add %o0,%g1,%o0                 ! make res_ptr point at end
+       mov 0,%o4                       ! clear carry variable
+       sllx %g3,3,%o5                  ! compute initial address index
+
+.Loop: ldx [%o2+%o5],%g1               ! load s2 limb
+       add %g3,1,%g3                   ! increment loop count
+       ldx [%o1+%o5],%g2               ! load s1 limb
+       addcc %g1,%o4,%g1               ! add s2 limb and carry variable
+       movcc %xcc,0,%o4                ! if carry-out, o4 was 1; clear it
+       addcc %g1,%g2,%g1               ! add s1 limb to sum
+       stx %g1,[%o0+%o5]               ! store result
+       add %o5,8,%o5                   ! increment address index
+       brnz,pt %g3,.Loop
+       movcs %xcc,1,%o4                ! if s1 add gave carry, record it
+
+       retl
+       mov %o4,%o0
+.LLfe1:
+       .size    __mpn_add_n,.LLfe1-__mpn_add_n
diff --git a/sysdeps/sparc/sparc64/addmul_1.s b/sysdeps/sparc/sparc64/addmul_1.s

new file mode 100644 (file)

index 0000000..ef013ee
--- /dev/null
+++ b/sysdeps/sparc/sparc64/addmul_1.s
@@ -0,0 +1,88 @@
+! SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+! add the product to a second limb vector.
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr      o0
+! s1_ptr       o1
+! size         o2
+! s2_limb      o3
+
+.section       ".text"
+       .align 4
+       .global __mpn_addmul_1
+       .type    __mpn_addmul_1,#function
+       .proc   016
+__mpn_addmul_1:
+       !#PROLOGUE#     0
+       save    %sp,-160,%sp
+       !#PROLOGUE#     1
+       sub     %g0,%i2,%o7
+       sllx    %o7,3,%g5
+       sub     %i1,%g5,%o3
+       sub     %i0,%g5,%o4
+       mov     0,%o0                   ! zero cy_limb
+
+       srl     %i3,0,%o1               ! extract low 32 bits of s2_limb
+       srlx    %i3,32,%i3              ! extract high 32 bits of s2_limb
+       mov     1,%o2
+       sllx    %o2,32,%o2              ! o2 = 0x100000000
+
+       !   hi   !
+             !  mid-1 !
+             !  mid-2 !
+                !   lo   !
+.Loop:
+       sllx    %o7,3,%g1
+       ldx     [%o3+%g1],%g5
+       srl     %g5,0,%i0               ! zero hi bits
+       srlx    %g5,32,%g5
+       mulx    %o1,%i0,%i4             ! lo product
+       mulx    %i3,%i0,%i1             ! mid-1 product
+       mulx    %o1,%g5,%l2             ! mid-2 product
+       mulx    %i3,%g5,%i5             ! hi product
+       srlx    %i4,32,%i0              ! extract high 32 bits of lo product...
+       add     %i1,%i0,%i1             ! ...and add it to the mid-1 product
+       addcc   %i1,%l2,%i1             ! add mid products
+       mov     0,%l0                   ! we need the carry from that add...
+       movcs   %xcc,%o2,%l0            ! ...compute it and...
+       add     %i5,%l0,%i5             ! ...add to bit 32 of the hi product
+       sllx    %i1,32,%i0              ! align low bits of mid product
+       srl     %i4,0,%g5               ! zero high 32 bits of lo product
+       add     %i0,%g5,%i0             ! combine into low 64 bits of result
+       srlx    %i1,32,%i1              ! extract high bits of mid product...
+       add     %i5,%i1,%i1             ! ...and add them to the high result
+       addcc   %i0,%o0,%i0             ! add cy_limb to low 64 bits of result
+       mov     0,%g5
+       movcs   %xcc,1,%g5
+       add     %o7,1,%o7
+       ldx     [%o4+%g1],%l1
+       addcc   %l1,%i0,%i0
+       movcs   %xcc,1,%g5
+       stx     %i0,[%o4+%g1]
+       brnz    %o7,.Loop
+       add     %i1,%g5,%o0             ! compute new cy_limb
+
+       mov     %o0,%i0
+       ret
+       restore
+.LLfe1:
+       .size  __mpn_addmul_1,.LLfe1-__mpn_addmul_1
diff --git a/sysdeps/sparc/sparc64/gmp-mparam.h b/sysdeps/sparc/sparc64/gmp-mparam.h

new file mode 100644 (file)

index 0000000..05c893f
--- /dev/null
+++ b/sysdeps/sparc/sparc64/gmp-mparam.h
@@ -0,0 +1,26 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/sysdeps/sparc/sparc64/lshift.s b/sysdeps/sparc/sparc64/lshift.s

new file mode 100644 (file)

index 0000000..bd7fa01
--- /dev/null
+++ b/sysdeps/sparc/sparc64/lshift.s
@@ -0,0 +1,95 @@
+! SPARC v9 __mpn_lshift --
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr      %o0
+! src_ptr      %o1
+! size         %o2
+! cnt          %o3
+
+.section       ".text"
+       .align 4
+       .global __mpn_lshift
+       .type    __mpn_lshift,#function
+       .proc   04
+__mpn_lshift:
+       sllx    %o2,3,%g1
+       add     %o1,%g1,%o1     ! make %o1 point at end of src
+       ldx     [%o1-8],%g2     ! load first limb
+       sub     %g0,%o3,%o5     ! negate shift count
+       add     %o0,%g1,%o0     ! make %o0 point at end of res
+       add     %o2,-1,%o2
+       and     %o2,4-1,%g4     ! number of limbs in first loop
+       srlx    %g2,%o5,%g1     ! compute function result
+       brz,pn  %g4,.L0         ! if multiple of 4 limbs, skip first loop
+       stx     %g1,[%sp+80]
+
+       sub     %o2,%g4,%o2     ! adjust count for main loop
+
+.Loop0:        ldx     [%o1-16],%g3
+       add     %o0,-8,%o0
+       add     %o1,-8,%o1
+       add     %g4,-1,%g4
+       sllx    %g2,%o3,%o4
+       srlx    %g3,%o5,%g1
+       mov     %g3,%g2
+       or      %o4,%g1,%o4
+       brnz,pt %g4,.Loop0
+        stx    %o4,[%o0+0]
+
+.L0:   brz,pn  %o2,.Lend
+        nop
+
+.Loop: ldx     [%o1-16],%g3
+       add     %o0,-32,%o0
+       add     %o2,-4,%o2
+       sllx    %g2,%o3,%o4
+       srlx    %g3,%o5,%g1
+
+       ldx     [%o1-24],%g2
+       sllx    %g3,%o3,%g4
+       or      %o4,%g1,%o4
+       stx     %o4,[%o0+24]
+       srlx    %g2,%o5,%g1
+
+       ldx     [%o1-32],%g3
+       sllx    %g2,%o3,%o4
+       or      %g4,%g1,%g4
+       stx     %g4,[%o0+16]
+       srlx    %g3,%o5,%g1
+
+       ldx     [%o1-40],%g2
+       sllx    %g3,%o3,%g4
+       or      %o4,%g1,%o4
+       stx     %o4,[%o0+8]
+       srlx    %g2,%o5,%g1
+
+       add     %o1,-32,%o1
+       or      %g4,%g1,%g4
+       brnz,pt %o2,.Loop
+        stx    %g4,[%o0+0]
+
+.Lend: sllx    %g2,%o3,%g2
+       stx     %g2,[%o0-8]
+       retl
+       ldx     [%sp+80],%o0
+.LLfe1:
+       .size    __mpn_lshift,.LLfe1-__mpn_lshift
diff --git a/sysdeps/sparc/sparc64/mul_1.s b/sysdeps/sparc/sparc64/mul_1.s

new file mode 100644 (file)

index 0000000..41be370
--- /dev/null
+++ b/sysdeps/sparc/sparc64/mul_1.s
@@ -0,0 +1,85 @@
+! SPARC v9 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr      o0
+! s1_ptr       o1
+! size         o2
+! s2_limb      o3
+
+.section       ".text"
+       .align 4
+       .global __mpn_mul_1
+       .type    __mpn_mul_1,#function
+       .proc   016
+__mpn_mul_1:
+       !#PROLOGUE#     0
+       save    %sp,-160,%sp
+       !#PROLOGUE#     1
+       sub     %g0,%i2,%o7
+       sllx    %o7,3,%g5
+       sub     %i1,%g5,%o3
+       sub     %i0,%g5,%o4
+       mov     0,%o0                   ! zero cy_limb
+
+       srl     %i3,0,%o1               ! extract low 32 bits of s2_limb
+       srlx    %i3,32,%i3              ! extract high 32 bits of s2_limb
+       mov     1,%o2
+       sllx    %o2,32,%o2              ! o2 = 0x100000000
+
+       !   hi   !
+             !  mid-1 !
+             !  mid-2 !
+                !   lo   !
+.Loop:
+       sllx    %o7,3,%g1
+       ldx     [%o3+%g1],%g5
+       srl     %g5,0,%i0               ! zero hi bits
+       srlx    %g5,32,%g5
+       mulx    %o1,%i0,%i4             ! lo product
+       mulx    %i3,%i0,%i1             ! mid-1 product
+       mulx    %o1,%g5,%l2             ! mid-2 product
+       mulx    %i3,%g5,%i5             ! hi product
+       srlx    %i4,32,%i0              ! extract high 32 bits of lo product...
+       add     %i1,%i0,%i1             ! ...and add it to the mid-1 product
+       addcc   %i1,%l2,%i1             ! add mid products
+       mov     0,%l0                   ! we need the carry from that add...
+       movcs   %xcc,%o2,%l0            ! ...compute it and...
+       add     %i5,%l0,%i5             ! ...add to bit 32 of the hi product
+       sllx    %i1,32,%i0              ! align low bits of mid product
+       srl     %i4,0,%g5               ! zero high 32 bits of lo product
+       add     %i0,%g5,%i0             ! combine into low 64 bits of result
+       srlx    %i1,32,%i1              ! extract high bits of mid product...
+       add     %i5,%i1,%i1             ! ...and add them to the high result
+       addcc   %i0,%o0,%i0             ! add cy_limb to low 64 bits of result
+       mov     0,%g5
+       movcs   %xcc,1,%g5
+       add     %o7,1,%o7
+       stx     %i0,[%o4+%g1]
+       brnz    %o7,.Loop
+       add     %i1,%g5,%o0             ! compute new cy_limb
+
+       mov     %o0,%i0
+       ret
+       restore
+.LLfe1:
+       .size  __mpn_mul_1,.LLfe1-__mpn_mul_1
diff --git a/sysdeps/sparc/sparc64/rshift.s b/sysdeps/sparc/sparc64/rshift.s

new file mode 100644 (file)

index 0000000..971deec
--- /dev/null
+++ b/sysdeps/sparc/sparc64/rshift.s
@@ -0,0 +1,92 @@
+! SPARC v9 __mpn_rshift --
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr      %o0
+! src_ptr      %o1
+! size         %o2
+! cnt          %o3
+
+.section       ".text"
+       .align  4
+       .global __mpn_rshift
+       .type   __mpn_rshift,#function
+       .proc   04
+__mpn_rshift:
+       ldx     [%o1],%g2       ! load first limb
+       sub     %g0,%o3,%o5     ! negate shift count
+       add     %o2,-1,%o2
+       and     %o2,4-1,%g4     ! number of limbs in first loop
+       sllx    %g2,%o5,%g1     ! compute function result
+       brz,pn  %g4,.L0         ! if multiple of 4 limbs, skip first loop
+       stx     %g1,[%sp+80]
+
+       sub     %o2,%g4,%o2     ! adjust count for main loop
+
+.Loop0:        ldx     [%o1+8],%g3
+       add     %o0,8,%o0
+       add     %o1,8,%o1
+       add     %g4,-1,%g4
+       srlx    %g2,%o3,%o4
+       sllx    %g3,%o5,%g1
+       mov     %g3,%g2
+       or      %o4,%g1,%o4
+       brnz,pt %g4,.Loop0
+        stx    %o4,[%o0-8]
+
+.L0:   brz,pn  %o2,.Lend
+        nop
+
+.Loop: ldx     [%o1+8],%g3
+       add     %o0,32,%o0
+       add     %o2,-4,%o2
+       srlx    %g2,%o3,%o4
+       sllx    %g3,%o5,%g1
+
+       ldx     [%o1+16],%g2
+       srlx    %g3,%o3,%g4
+       or      %o4,%g1,%o4
+       stx     %o4,[%o0-32]
+       sllx    %g2,%o5,%g1
+
+       ldx     [%o1+24],%g3
+       srlx    %g2,%o3,%o4
+       or      %g4,%g1,%g4
+       stx     %g4,[%o0-24]
+       sllx    %g3,%o5,%g1
+
+       ldx     [%o1+32],%g2
+       srlx    %g3,%o3,%g4
+       or      %o4,%g1,%o4
+       stx     %o4,[%o0-16]
+       sllx    %g2,%o5,%g1
+
+       add     %o1,32,%o1
+       or      %g4,%g1,%g4
+       brnz    %o2,.Loop
+        stx    %g4,[%o0-8]
+
+.Lend: srlx    %g2,%o3,%g2
+       stx     %g2,[%o0-0]
+       retl
+       ldx     [%sp+80],%o0
+.LLfe1:
+       .size   __mpn_rshift,.LLfe1-__mpn_rshift
diff --git a/sysdeps/sparc/sparc64/sub_n.s b/sysdeps/sparc/sparc64/sub_n.s

new file mode 100644 (file)

index 0000000..7099bf4
--- /dev/null
+++ b/sysdeps/sparc/sparc64/sub_n.s
@@ -0,0 +1,57 @@
+! SPARC v9 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr      %o0
+! s1_ptr       %o1
+! s2_ptr       %o2
+! size         %o3
+
+.section       ".text"
+       .align 4
+       .global __mpn_sub_n
+       .type    __mpn_sub_n,#function
+       .proc   04
+__mpn_sub_n:
+       sub %g0,%o3,%g3
+       sllx %o3,3,%g1
+       add %o1,%g1,%o1                 ! make s1_ptr point at end
+       add %o2,%g1,%o2                 ! make s2_ptr point at end
+       add %o0,%g1,%o0                 ! make res_ptr point at end
+       mov 0,%o4                       ! clear carry variable
+       sllx %g3,3,%o5                  ! compute initial address index
+
+.Loop: ldx [%o2+%o5],%g1               ! load s2 limb
+       add %g3,1,%g3                   ! increment loop count
+       ldx [%o1+%o5],%g2               ! load s1 limb
+       addcc %g1,%o4,%g1               ! add s2 limb and carry variable
+       movcc %xcc,0,%o4                ! if carry-out, o4 was 1; clear it
+       subcc %g1,%g2,%g1               ! subtract s1 limb from sum
+       stx %g1,[%o0+%o5]               ! store result
+       add %o5,8,%o5                   ! increment address index
+       brnz,pt %g3,.Loop
+       movcs %xcc,1,%o4                ! if s1 subtract gave carry, record it
+
+       retl
+       mov %o4,%o0
+.LLfe1:
+       .size    __mpn_sub_n,.LLfe1-__mpn_sub_n
diff --git a/sysdeps/sparc/sparc64/submul_1.s b/sysdeps/sparc/sparc64/submul_1.s

new file mode 100644 (file)

index 0000000..f0df38c
--- /dev/null
+++ b/sysdeps/sparc/sparc64/submul_1.s
@@ -0,0 +1,88 @@
+! SPARC v9 __mpn_submul_1 -- Multiply a limb vector with a single limb and
+! subtract the product from a second limb vector.
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr      o0
+! s1_ptr       o1
+! size         o2
+! s2_limb      o3
+
+.section       ".text"
+       .align 4
+       .global __mpn_submul_1
+       .type    __mpn_submul_1,#function
+       .proc   016
+__mpn_submul_1:
+       !#PROLOGUE#     0
+       save    %sp,-160,%sp
+       !#PROLOGUE#     1
+       sub     %g0,%i2,%o7
+       sllx    %o7,3,%g5
+       sub     %i1,%g5,%o3
+       sub     %i0,%g5,%o4
+       mov     0,%o0                   ! zero cy_limb
+
+       srl     %i3,0,%o1               ! extract low 32 bits of s2_limb
+       srlx    %i3,32,%i3              ! extract high 32 bits of s2_limb
+       mov     1,%o2
+       sllx    %o2,32,%o2              ! o2 = 0x100000000
+
+       !   hi   !
+             !  mid-1 !
+             !  mid-2 !
+                !   lo   !
+.Loop:
+       sllx    %o7,3,%g1
+       ldx     [%o3+%g1],%g5
+       srl     %g5,0,%i0               ! zero hi bits
+       srlx    %g5,32,%g5
+       mulx    %o1,%i0,%i4             ! lo product
+       mulx    %i3,%i0,%i1             ! mid-1 product
+       mulx    %o1,%g5,%l2             ! mid-2 product
+       mulx    %i3,%g5,%i5             ! hi product
+       srlx    %i4,32,%i0              ! extract high 32 bits of lo product...
+       add     %i1,%i0,%i1             ! ...and add it to the mid-1 product
+       addcc   %i1,%l2,%i1             ! add mid products
+       mov     0,%l0                   ! we need the carry from that add...
+       movcs   %xcc,%o2,%l0            ! ...compute it and...
+       add     %i5,%l0,%i5             ! ...add to bit 32 of the hi product
+       sllx    %i1,32,%i0              ! align low bits of mid product
+       srl     %i4,0,%g5               ! zero high 32 bits of lo product
+       add     %i0,%g5,%i0             ! combine into low 64 bits of result
+       srlx    %i1,32,%i1              ! extract high bits of mid product...
+       add     %i5,%i1,%i1             ! ...and add them to the high result
+       addcc   %i0,%o0,%i0             ! add cy_limb to low 64 bits of result
+       mov     0,%g5
+       movcs   %xcc,1,%g5
+       add     %o7,1,%o7
+       ldx     [%o4+%g1],%l1
+       subcc   %l1,%i0,%i0
+       movcs   %xcc,1,%g5
+       stx     %i0,[%o4+%g1]
+       brnz    %o7,.Loop
+       add     %i1,%g5,%o0             ! compute new cy_limb
+
+       mov     %o0,%i0
+       ret
+       restore
+.LLfe1:
+       .size  __mpn_submul_1,.LLfe1-__mpn_submul_1
diff --git a/sysdeps/sparc/sparc8/addmul_1.S b/sysdeps/sparc/sparc8/addmul_1.S

index d1de0c3..1cf5128 100644 (file)
--- a/sysdeps/sparc/sparc8/addmul_1.S
+++ b/sysdeps/sparc/sparc8/addmul_1.S
@@ -1,7 +1,7 @@
  ! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
  ! add the result to a second limb vector.
  
-! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
  
  ! This file is part of the GNU MP Library.
  
diff --git a/sysdeps/sparc/sparc8/mul_1.S b/sysdeps/sparc/sparc8/mul_1.S

index 42717be..d56394e 100644 (file)
--- a/sysdeps/sparc/sparc8/mul_1.S
+++ b/sysdeps/sparc/sparc8/mul_1.S
@@ -1,7 +1,7 @@
  ! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
  ! store the product in a second limb vector.
  
-! Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  
  ! This file is part of the GNU MP Library.
  
diff --git a/sysdeps/sparc/sub_n.S b/sysdeps/sparc/sub_n.S

index 2e217ed..9ff5b7b 100644 (file)
--- a/sysdeps/sparc/sub_n.S
+++ b/sysdeps/sparc/sub_n.S
@@ -1,7 +1,7 @@
-! sparc __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
  ! store difference in a third limb vector.
  
-! Copyright (C) 1995 Free Software Foundation, Inc.
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
  
  ! This file is part of the GNU MP Library.
  
@@ -38,7 +38,7 @@ C_SYMBOL_NAME(__mpn_sub_n):
         nop
  ! **  V1a  **
         andcc   res_ptr,4,%g0           ! res_ptr unaligned? Side effect: cy=0
-       beq     L_v1                    ! if no, branch
+       be      L_v1                    ! if no, branch
         nop
  /* Add least significant limb separately to align res_ptr and s2_ptr */
         ld      [s1_ptr],%g4
@@ -133,9 +133,9 @@ L1: xor     s1_ptr,res_ptr,%g1
         nop
  ! **  V1b  **
         andcc   res_ptr,4,%g0           ! res_ptr unaligned? Side effect: cy=0
-       beq     L_v1b                   ! if no, branch
+       be      L_v1b                   ! if no, branch
         nop
-/* Add least significant limb separately to align res_ptr and s2_ptr */
+/* Add least significant limb separately to align res_ptr and s1_ptr */
         ld      [s2_ptr],%g4
         add     s2_ptr,4,s2_ptr
         ld      [s1_ptr],%g2
@@ -232,7 +232,7 @@ L2: cmp     size,1
         be      Ljone
         nop
         andcc   s1_ptr,4,%g0            ! s1_ptr unaligned? Side effect: cy=0
-       beq     L_v2                    ! if no, branch
+       be      L_v2                    ! if no, branch
         nop
  /* Add least significant limb separately to align s1_ptr and s2_ptr */
         ld      [s1_ptr],%g4
diff --git a/sysdeps/unix/sysv/linux/m68k/profil-counter.h b/sysdeps/unix/sysv/linux/m68k/profil-counter.h

new file mode 100644 (file)

index 0000000..4e7b132
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/m68k/profil-counter.h
@@ -0,0 +1,24 @@
+/* Machine-dependent SIGPROF signal handler.  Linux/m68k version.
+Copyright (C) 1996 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB.  If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA.  */
+  
+static void
+profil_counter (int signr, int code, struct sigcontext *scp)
+{
+  profil_count ((void *) scp->sc_pc);
+}
diff --git a/sysdeps/vax/gmp-mparam.h b/sysdeps/vax/gmp-mparam.h

index 687f12a..ddc308a 100644 (file)
--- a/sysdeps/vax/gmp-mparam.h
+++ b/sysdeps/vax/gmp-mparam.h
@@ -1,6 +1,6 @@
  /* gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
diff --git a/sysdeps/z8000/mul_1.s b/sysdeps/z8000/mul_1.s

index 2075225..0150e85 100644 (file)
--- a/sysdeps/z8000/mul_1.s
+++ b/sysdeps/z8000/mul_1.s
@@ -1,7 +1,7 @@
  ! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
  ! the result in a second limb vector.
  
-! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+! Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
  
  ! This file is part of the GNU MP Library.
author	Roland McGrath <roland@gnu.org>
	Fri, 1 Mar 1996 18:45:35 +0000 (18:45 +0000)
committer	Roland McGrath <roland@gnu.org>
	Fri, 1 Mar 1996 18:45:35 +0000 (18:45 +0000)
ChangeLog		patch \| blob \| history
libc-symbols.h		patch \| blob \| history
stdlib/Makefile		patch \| blob \| history
stdlib/gmp-impl.h		patch \| blob \| history
stdlib/gmp.h		patch \| blob \| history
stdlib/strtod.c		patch \| blob \| history
sysdeps/alpha/addmul_1.s		patch \| blob \| history
sysdeps/alpha/alphaev5/add_n.s		patch \| blob \| history
sysdeps/alpha/alphaev5/lshift.s		patch \| blob \| history
sysdeps/alpha/alphaev5/rshift.s		patch \| blob \| history
sysdeps/alpha/alphaev5/sub_n.s	[new file with mode: 0644]	patch \| blob
sysdeps/alpha/lshift.s		patch \| blob \| history
sysdeps/alpha/mul_1.s		patch \| blob \| history
sysdeps/alpha/rshift.s		patch \| blob \| history
sysdeps/alpha/submul_1.s		patch \| blob \| history
sysdeps/alpha/udiv_qrnnd.S		patch \| blob \| history
sysdeps/generic/add_n.c		patch \| blob \| history
sysdeps/generic/addmul_1.c		patch \| blob \| history
sysdeps/generic/cmp.c		patch \| blob \| history
sysdeps/generic/divmod_1.c		patch \| blob \| history
sysdeps/generic/lshift.c		patch \| blob \| history
sysdeps/generic/mod_1.c		patch \| blob \| history
sysdeps/generic/mul.c		patch \| blob \| history
sysdeps/generic/mul_1.c		patch \| blob \| history
sysdeps/generic/mul_n.c		patch \| blob \| history
sysdeps/generic/rshift.c		patch \| blob \| history
sysdeps/generic/sub_n.c		patch \| blob \| history
sysdeps/generic/submul_1.c		patch \| blob \| history
sysdeps/i386/gmp-mparam.h		patch \| blob \| history
sysdeps/i386/i586/add_n.S		patch \| blob \| history
sysdeps/i386/i586/addmul_1.S		patch \| blob \| history
sysdeps/i386/i586/mul_1.S		patch \| blob \| history
sysdeps/i386/i586/sub_n.S		patch \| blob \| history
sysdeps/i386/i586/submul_1.S		patch \| blob \| history
sysdeps/m68k/add_n.S		patch \| blob \| history
sysdeps/m68k/lshift.S	[new file with mode: 0644]	patch \| blob
sysdeps/m68k/m68020/addmul_1.S		patch \| blob \| history
sysdeps/m68k/m68020/mul_1.S		patch \| blob \| history
sysdeps/m68k/m68020/submul_1.S		patch \| blob \| history
sysdeps/m68k/rshift.S	[new file with mode: 0644]	patch \| blob
sysdeps/m68k/sub_n.S		patch \| blob \| history
sysdeps/m88k/add_n.s		patch \| blob \| history
sysdeps/m88k/m88110/add_n.S	[new file with mode: 0644]	patch \| blob
sysdeps/m88k/m88110/addmul_1.s	[new file with mode: 0644]	patch \| blob
sysdeps/m88k/m88110/mul_1.s		patch \| blob \| history
sysdeps/m88k/m88110/sub_n.S	[new file with mode: 0644]	patch \| blob
sysdeps/m88k/mul_1.s		patch \| blob \| history
sysdeps/m88k/sub_n.s		patch \| blob \| history
sysdeps/mips/addmul_1.s		patch \| blob \| history
sysdeps/mips/mips3/addmul_1.s		patch \| blob \| history
sysdeps/mips/mips3/mul_1.s		patch \| blob \| history
sysdeps/mips/mips3/submul_1.s		patch \| blob \| history
sysdeps/mips/mul_1.s		patch \| blob \| history
sysdeps/mips/submul_1.s		patch \| blob \| history
sysdeps/rs6000/add_n.s		patch \| blob \| history
sysdeps/rs6000/sub_n.s		patch \| blob \| history
sysdeps/sparc/add_n.S		patch \| blob \| history
sysdeps/sparc/lshift.S		patch \| blob \| history
sysdeps/sparc/rshift.S		patch \| blob \| history
sysdeps/sparc/sparc64/add_n.s	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/addmul_1.s	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/gmp-mparam.h	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/lshift.s	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/mul_1.s	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/rshift.s	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/sub_n.s	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/submul_1.s	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc8/addmul_1.S		patch \| blob \| history
sysdeps/sparc/sparc8/mul_1.S		patch \| blob \| history
sysdeps/sparc/sub_n.S		patch \| blob \| history
sysdeps/unix/sysv/linux/m68k/profil-counter.h	[new file with mode: 0644]	patch \| blob
sysdeps/vax/gmp-mparam.h		patch \| blob \| history
sysdeps/z8000/mul_1.s		patch \| blob \| history