mpn/sparc64/sparc64.h

   1 /* UltraSPARC 64 support macros.
   2
   3    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
   4    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
   5    FUTURE GNU MP RELEASES.
   6
   7 Copyright 2003 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of either:
  13
  14   * the GNU Lesser General Public License as published by the Free
  15     Software Foundation; either version 3 of the License, or (at your
  16     option) any later version.
  17
  18 or
  19
  20   * the GNU General Public License as published by the Free Software
  21     Foundation; either version 2 of the License, or (at your option) any
  22     later version.
  23
  24 or both in parallel, as here.
  25
  26 The GNU MP Library is distributed in the hope that it will be useful, but
  27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  28 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  29 for more details.
  30
  31 You should have received copies of the GNU General Public License and the
  32 GNU Lesser General Public License along with the GNU MP Library.  If not,
  33 see https://www.gnu.org/licenses/.  */
  34
  35
  36 #define LOW32(x)   ((x) & 0xFFFFFFFF)
  37 #define HIGH32(x)  ((x) >> 32)
  38
  39
  40 /* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
  41    Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
  42    effect of swapping the two halves in this case.  */
  43 #if HAVE_LIMB_BIG_ENDIAN
  44 #define HALF_ENDIAN_ADJ(i)  (1 - (((i) & 1) << 1))   /* +1 even, -1 odd */
  45 #endif
  46 #if HAVE_LIMB_LITTLE_ENDIAN
  47 #define HALF_ENDIAN_ADJ(i)  0                        /* no adjust */
  48 #endif
  49 #ifndef HALF_ENDIAN_ADJ
  50 Error, error, unknown limb endianness;
  51 #endif
  52
  53
  54 /* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
  55    of that product is equal to l.  dh and dl are the 32-bit halves of d.
  56
  57    |-----high----||----low-----|
  58    +------+------+
  59    |             |                 ph = qh * dh
  60    +------+------+
  61           +------+------+
  62           |             |          pm1 = ql * dh
  63           +------+------+
  64           +------+------+
  65           |             |          pm2 = qh * dl
  66           +------+------+
  67                  +------+------+
  68                  |             |   pl = ql * dl (not calculated)
  69                  +------+------+
  70
  71    Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
  72    + HIGH(pl) == HIGH(l).  The only thing we need from those product parts
  73    is whether they produce a carry into the high.
  74
  75    pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
  76    time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
  77    HIGH(l).  pl is never actually calculated.  */
  78
  79 #define umul_ppmm_lowequal(h, q, d, dh, dl, l)  \
  80   do {                                          \
  81     mp_limb_t  ql, qh, ph, pm1, pm2, pm_l;      \
  82     ASSERT (dh == HIGH32(d));                   \
  83     ASSERT (dl == LOW32(d));                    \
  84     ASSERT (q*d == l);                          \
  85                                                 \
  86     ql = LOW32 (q);                             \
  87     qh = HIGH32 (q);                            \
  88                                                 \
  89     pm1 = ql * dh;                              \
  90     pm2 = qh * dl;                              \
  91     ph  = qh * dh;                              \
  92                                                 \
  93     pm_l = LOW32 (pm1) + LOW32 (pm2);           \
  94                                                 \
  95     (h) = ph + HIGH32 (pm1) + HIGH32 (pm2)      \
  96       + HIGH32 (pm_l) + ((pm_l << 32) > l);     \
  97                                                 \
  98     ASSERT_HIGH_PRODUCT (h, q, d);              \
  99   } while (0)
 100
 101
 102 /* Set h to the high of q*d, assuming the low limb of that product is equal
 103    to l, and that d fits in 32-bits.
 104
 105    |-----high----||----low-----|
 106           +------+------+
 107           |             |          pm = qh * dl
 108           +------+------+
 109                  +------+------+
 110                  |             |   pl = ql * dl (not calculated)
 111                  +------+------+
 112
 113    Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
 114    time there's a carry from that sum is when LOW(pm) > HIGH(l).  There's no
 115    need to calculate pl to determine this.  */
 116
 117 #define umul_ppmm_half_lowequal(h, q, d, l)     \
 118   do {                                          \
 119     mp_limb_t pm;                               \
 120     ASSERT (q*d == l);                          \
 121     ASSERT (HIGH32(d) == 0);                    \
 122                                                 \
 123     pm = HIGH32(q) * d;                         \
 124     (h) = HIGH32(pm) + ((pm << 32) > l);        \
 125     ASSERT_HIGH_PRODUCT (h, q, d);              \
 126   } while (0)
 127
 128
 129 /* check that h is the high limb of x*y */
 130 #if WANT_ASSERT
 131 #define ASSERT_HIGH_PRODUCT(h, x, y)    \
 132   do {                                  \
 133     mp_limb_t  want_h, dummy;           \
 134     umul_ppmm (want_h, dummy, x, y);    \
 135     ASSERT (h == want_h);               \
 136   } while (0)
 137 #else
 138 #define ASSERT_HIGH_PRODUCT(h, q, d)    \
 139   do { } while (0)
 140 #endif
 141
 142
 143 /* Multiply u anv v, where v < 2^32.  */
 144 #define umul_ppmm_s(w1, w0, u, v)                                       \
 145   do {                                                                  \
 146     UWtype __x0, __x2;                                                  \
 147     UWtype __ul, __vl, __uh;                                            \
 148     UWtype __u = (u), __v = (v);                                        \
 149                                                                         \
 150     __ul = __ll_lowpart (__u);                                          \
 151     __uh = __ll_highpart (__u);                                         \
 152     __vl = __ll_lowpart (__v);                                          \
 153                                                                         \
 154     __x0 = (UWtype) __ul * __vl;                                        \
 155     __x2 = (UWtype) __uh * __vl;                                        \
 156                                                                         \
 157     (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2;           \
 158     (w0) = (__x2 << W_TYPE_SIZE/2) + __x0;                              \
 159   } while (0)
 160
 161 /* Count the leading zeros on a limb, but assuming it fits in 32 bits.
 162    The count returned will be in the range 32 to 63.
 163    This is the 32-bit generic C count_leading_zeros from longlong.h. */
 164 #define count_leading_zeros_32(count, x)                                      \
 165   do {                                                                        \
 166     mp_limb_t  __xr = (x);                                                    \
 167     unsigned   __a;                                                           \
 168     ASSERT ((x) != 0);                                                        \
 169     ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF));                                    \
 170     __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1)  \
 171       : (__xr < ((UWtype) 1 << 24)  ? 16 + 1 : 24 + 1);                       \
 172                                                                               \
 173     (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];                 \
 174   } while (0)
 175
 176
 177 /* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
 178    32 bits and is normalized (high bit set).  */
 179 #define invert_half_limb(inv, d)                \
 180   do {                                          \
 181     mp_limb_t  _n;                              \
 182     ASSERT ((d) <= 0xFFFFFFFF);                 \
 183     ASSERT ((d) & 0x80000000);                  \
 184     _n = (((mp_limb_t) -(d)) << 32) - 1;        \
 185     (inv) = (mp_limb_t) (unsigned) (_n / (d));  \
 186   } while (0)
 187
 188
 189 /* Divide nh:nl by d, setting q to the quotient and r to the remainder.
 190    q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
 191    dinv_limb is similarly a 32-bit inverse but in an mp_limb_t.  */
 192
 193 #define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb)         \
 194   do {                                                                  \
 195     unsigned   _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q;               \
 196     mp_limb_t  _n, _x;                                                  \
 197     ASSERT (d_limb <= 0xFFFFFFFF);                                      \
 198     ASSERT (dinv_limb <= 0xFFFFFFFF);                                   \
 199     ASSERT (d_limb & 0x80000000);                                       \
 200     ASSERT (nh < d_limb);                                               \
 201     _n10 = (nl);                                                        \
 202     _n2 = (nh);                                                         \
 203     _n1 = (int) _n10 >> 31;                                             \
 204     _nadj = _n10 + (_n1 & d_limb);                                      \
 205     _x = dinv_limb * (_n2 - _n1) + _nadj;                               \
 206     _q11n = ~(_n2 + HIGH32 (_x));             /* -q1-1 */               \
 207     _n = ((mp_limb_t) _n2 << 32) + _n10;                                \
 208     _x = _n + d_limb * _q11n;                 /* n-q1*d-d */            \
 209     _xh = HIGH32 (_x) - d_limb;               /* high(n-q1*d-d) */      \
 210     ASSERT (_xh == 0 || _xh == ~0);                                     \
 211     _r = _x + (d_limb & _xh);                 /* addback */             \
 212     _q = _xh - _q11n;                         /* q1+1-addback */        \
 213     ASSERT (_r < d_limb);                                               \
 214     ASSERT (d_limb * _q + _r == _n);                                    \
 215     (r) = _r;                                                           \
 216     (q) = _q;                                                           \
 217   } while (0)
 218
 219