mpn/sparc64/sparc64.h

   1 /* UltraSPARC 64 support macros.
   2
   3    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
   4    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
   5    FUTURE GNU MP RELEASES.
   6
   7 Copyright 2003 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU Lesser General Public License as published by
  13 the Free Software Foundation; either version 3 of the License, or (at your
  14 option) any later version.
  15
  16 The GNU MP Library is distributed in the hope that it will be useful, but
  17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  19 License for more details.
  20
  21 You should have received a copy of the GNU Lesser General Public License
  22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  23
  24
  25 #define LOW32(x)   ((x) & 0xFFFFFFFF)
  26 #define HIGH32(x)  ((x) >> 32)
  27
  28
  29 /* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
  30    Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
  31    effect of swapping the two halves in this case.  */
  32 #if HAVE_LIMB_BIG_ENDIAN
  33 #define HALF_ENDIAN_ADJ(i)  (1 - (((i) & 1) << 1))   /* +1 even, -1 odd */
  34 #endif
  35 #if HAVE_LIMB_LITTLE_ENDIAN
  36 #define HALF_ENDIAN_ADJ(i)  0                        /* no adjust */
  37 #endif
  38 #ifndef HALF_ENDIAN_ADJ
  39 Error, error, unknown limb endianness;
  40 #endif
  41
  42
  43 /* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
  44    of that product is equal to l.  dh and dl are the 32-bit halves of d.
  45
  46    |-----high----||----low-----|
  47    +------+------+
  48    |             |                 ph = qh * dh
  49    +------+------+
  50           +------+------+
  51           |             |          pm1 = ql * dh
  52           +------+------+
  53           +------+------+
  54           |             |          pm2 = qh * dl
  55           +------+------+
  56                  +------+------+
  57                  |             |   pl = ql * dl (not calculated)
  58                  +------+------+
  59
  60    Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
  61    + HIGH(pl) == HIGH(l).  The only thing we need from those product parts
  62    is whether they produce a carry into the high.
  63
  64    pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
  65    time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
  66    HIGH(l).  pl is never actually calculated.  */
  67
  68 #define umul_ppmm_lowequal(h, q, d, dh, dl, l)  \
  69   do {                                          \
  70     mp_limb_t  ql, qh, ph, pm1, pm2, pm_l;      \
  71     ASSERT (dh == HIGH32(d));                   \
  72     ASSERT (dl == LOW32(d));                    \
  73     ASSERT (q*d == l);                          \
  74                                                 \
  75     ql = LOW32 (q);                             \
  76     qh = HIGH32 (q);                            \
  77                                                 \
  78     pm1 = ql * dh;                              \
  79     pm2 = qh * dl;                              \
  80     ph  = qh * dh;                              \
  81                                                 \
  82     pm_l = LOW32 (pm1) + LOW32 (pm2);           \
  83                                                 \
  84     (h) = ph + HIGH32 (pm1) + HIGH32 (pm2)      \
  85       + HIGH32 (pm_l) + ((pm_l << 32) > l);     \
  86                                                 \
  87     ASSERT_HIGH_PRODUCT (h, q, d);              \
  88   } while (0)
  89
  90
  91 /* Set h to the high of q*d, assuming the low limb of that product is equal
  92    to l, and that d fits in 32-bits.
  93
  94    |-----high----||----low-----|
  95           +------+------+
  96           |             |          pm = qh * dl
  97           +------+------+
  98                  +------+------+
  99                  |             |   pl = ql * dl (not calculated)
 100                  +------+------+
 101
 102    Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
 103    time there's a carry from that sum is when LOW(pm) > HIGH(l).  There's no
 104    need to calculate pl to determine this.  */
 105
 106 #define umul_ppmm_half_lowequal(h, q, d, l)     \
 107   do {                                          \
 108     mp_limb_t pm;                               \
 109     ASSERT (q*d == l);                          \
 110     ASSERT (HIGH32(d) == 0);                    \
 111                                                 \
 112     pm = HIGH32(q) * d;                         \
 113     (h) = HIGH32(pm) + ((pm << 32) > l);        \
 114     ASSERT_HIGH_PRODUCT (h, q, d);              \
 115   } while (0)
 116
 117
 118 /* check that h is the high limb of x*y */
 119 #if WANT_ASSERT
 120 #define ASSERT_HIGH_PRODUCT(h, x, y)    \
 121   do {                                  \
 122     mp_limb_t  want_h, dummy;           \
 123     umul_ppmm (want_h, dummy, x, y);    \
 124     ASSERT (h == want_h);               \
 125   } while (0)
 126 #else
 127 #define ASSERT_HIGH_PRODUCT(h, q, d)    \
 128   do { } while (0)
 129 #endif
 130
 131
 132 /* Multiply u anv v, where v < 2^32.  */
 133 #define umul_ppmm_s(w1, w0, u, v)                                       \
 134   do {                                                                  \
 135     UWtype __x0, __x2;                                                  \
 136     UWtype __ul, __vl, __uh;                                            \
 137     UWtype __u = (u), __v = (v);                                        \
 138                                                                         \
 139     __ul = __ll_lowpart (__u);                                          \
 140     __uh = __ll_highpart (__u);                                         \
 141     __vl = __ll_lowpart (__v);                                          \
 142                                                                         \
 143     __x0 = (UWtype) __ul * __vl;                                        \
 144     __x2 = (UWtype) __uh * __vl;                                        \
 145                                                                         \
 146     (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2;           \
 147     (w0) = (__x2 << W_TYPE_SIZE/2) + __x0;                              \
 148   } while (0)
 149
 150 /* Count the leading zeros on a limb, but assuming it fits in 32 bits.
 151    The count returned will be in the range 32 to 63.
 152    This is the 32-bit generic C count_leading_zeros from longlong.h. */
 153 #define count_leading_zeros_32(count, x)                                      \
 154   do {                                                                        \
 155     mp_limb_t  __xr = (x);                                                    \
 156     unsigned   __a;                                                           \
 157     ASSERT ((x) != 0);                                                        \
 158     ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF));                                    \
 159     __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1)  \
 160       : (__xr < ((UWtype) 1 << 24)  ? 16 + 1 : 24 + 1);                       \
 161                                                                               \
 162     (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];                 \
 163   } while (0)
 164
 165
 166 /* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
 167    32 bits and is normalized (high bit set).  */
 168 #define invert_half_limb(inv, d)                \
 169   do {                                          \
 170     mp_limb_t  _n;                              \
 171     ASSERT ((d) <= 0xFFFFFFFF);                 \
 172     ASSERT ((d) & 0x80000000);                  \
 173     _n = (((mp_limb_t) -(d)) << 32) - 1;        \
 174     (inv) = (mp_limb_t) (unsigned) (_n / (d));  \
 175   } while (0)
 176
 177
 178 /* Divide nh:nl by d, setting q to the quotient and r to the remainder.
 179    q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
 180    dinv_limb is similarly a 32-bit inverse but in an mp_limb_t.  */
 181
 182 #define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb)         \
 183   do {                                                                  \
 184     unsigned   _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q;               \
 185     mp_limb_t  _n, _x;                                                  \
 186     ASSERT (d_limb <= 0xFFFFFFFF);                                      \
 187     ASSERT (dinv_limb <= 0xFFFFFFFF);                                   \
 188     ASSERT (d_limb & 0x80000000);                                       \
 189     ASSERT (nh < d_limb);                                               \
 190     _n10 = (nl);                                                        \
 191     _n2 = (nh);                                                         \
 192     _n1 = (int) _n10 >> 31;                                             \
 193     _nadj = _n10 + (_n1 & d_limb);                                      \
 194     _x = dinv_limb * (_n2 - _n1) + _nadj;                               \
 195     _q11n = ~(_n2 + HIGH32 (_x));             /* -q1-1 */               \
 196     _n = ((mp_limb_t) _n2 << 32) + _n10;                                \
 197     _x = _n + d_limb * _q11n;                 /* n-q1*d-d */            \
 198     _xh = HIGH32 (_x) - d_limb;               /* high(n-q1*d-d) */      \
 199     ASSERT (_xh == 0 || _xh == ~0);                                     \
 200     _r = _x + (d_limb & _xh);                 /* addback */             \
 201     _q = _xh - _q11n;                         /* q1+1-addback */        \
 202     ASSERT (_r < d_limb);                                               \
 203     ASSERT (d_limb * _q + _r == _n);                                    \
 204     (r) = _r;                                                           \
 205     (q) = _q;                                                           \
 206   } while (0)
 207
 208