tune/modlinv.c

   1 /* Alternate implementations of binvert_limb to compare speeds. */
   2
   3 /*
   4 Copyright 2000, 2002 Free Software Foundation, Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MP Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  20
  21 #include <stdio.h>
  22 #include "gmp.h"
  23 #include "gmp-impl.h"
  24 #include "longlong.h"
  25 #include "speed.h"
  26
  27
  28 /* Like the standard version in gmp-impl.h, but with the expressions using a
  29    "1-" form.  This has the same number of steps, but "1-" is on the
  30    dependent chain, whereas the "2*" in the standard version isn't.
  31    Depending on the CPU this should be the same or a touch slower.  */
  32
  33 #if GMP_LIMB_BITS <= 32
  34 #define binvert_limb_mul1(inv,n)                                \
  35   do {                                                          \
  36     mp_limb_t  __n = (n);                                       \
  37     mp_limb_t  __inv;                                           \
  38     ASSERT ((__n & 1) == 1);                                    \
  39     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
  40     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
  41     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
  42     ASSERT (__inv * __n == 1);                                  \
  43     (inv) = __inv;                                              \
  44   } while (0)
  45 #endif
  46
  47 #if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
  48 #define binvert_limb_mul1(inv,n)                                \
  49   do {                                                          \
  50     mp_limb_t  __n = (n);                                       \
  51     mp_limb_t  __inv;                                           \
  52     ASSERT ((__n & 1) == 1);                                    \
  53     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
  54     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
  55     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
  56     __inv = (1 - __n * __inv) * __inv + __inv;  /* 64 */        \
  57     ASSERT (__inv * __n == 1);                                  \
  58     (inv) = __inv;                                              \
  59   } while (0)
  60 #endif
  61
  62
  63 /* The loop based version used in GMP 3.0 and earlier.  Usually slower than
  64    multiplying, due to the number of steps that must be performed.  Much
  65    slower when the processor has a good multiply.  */
  66
  67 #define binvert_limb_loop(inv,n)                \
  68   do {                                          \
  69     mp_limb_t  __v = (n);                       \
  70     mp_limb_t  __v_orig = __v;                  \
  71     mp_limb_t  __make_zero = 1;                 \
  72     mp_limb_t  __two_i = 1;                     \
  73     mp_limb_t  __v_inv = 0;                     \
  74                                                 \
  75     ASSERT ((__v & 1) == 1);                    \
  76                                                 \
  77     do                                          \
  78       {                                         \
  79         while ((__two_i & __make_zero) == 0)    \
  80           __two_i <<= 1, __v <<= 1;             \
  81         __v_inv += __two_i;                     \
  82         __make_zero -= __v;                     \
  83       }                                         \
  84     while (__make_zero);                        \
  85                                                 \
  86     ASSERT (__v_orig * __v_inv == 1);           \
  87     (inv) = __v_inv;                            \
  88   } while (0)
  89
  90
  91 /* Another loop based version with conditionals, but doing a fixed number of
  92    steps. */
  93
  94 #define binvert_limb_cond(inv,n)                \
  95   do {                                          \
  96     mp_limb_t  __n = (n);                       \
  97     mp_limb_t  __rem = (1 - __n) >> 1;          \
  98     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;        \
  99     int        __count;                         \
 100                                                 \
 101     ASSERT ((__n & 1) == 1);                    \
 102                                                 \
 103     __count = GMP_LIMB_BITS-1;               \
 104     do                                          \
 105       {                                         \
 106         __inv >>= 1;                            \
 107         if (__rem & 1)                          \
 108           {                                     \
 109             __inv |= GMP_LIMB_HIGHBIT;          \
 110             __rem -= __n;                       \
 111           }                                     \
 112         __rem >>= 1;                            \
 113       }                                         \
 114     while (-- __count);                         \
 115                                                 \
 116     ASSERT (__inv * __n == 1);                  \
 117     (inv) = __inv;                              \
 118   } while (0)
 119
 120
 121 /* Another loop based bitwise version, but purely arithmetic, no
 122    conditionals. */
 123
 124 #define binvert_limb_arith(inv,n)                                       \
 125   do {                                                                  \
 126     mp_limb_t  __n = (n);                                               \
 127     mp_limb_t  __rem = (1 - __n) >> 1;                                  \
 128     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;                                \
 129     mp_limb_t  __lowbit;                                                \
 130     int        __count;                                                 \
 131                                                                         \
 132     ASSERT ((__n & 1) == 1);                                            \
 133                                                                         \
 134     __count = GMP_LIMB_BITS-1;                                       \
 135     do                                                                  \
 136       {                                                                 \
 137         __lowbit = __rem & 1;                                           \
 138         __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1));      \
 139         __rem = (__rem - (__n & -__lowbit)) >> 1;                       \
 140       }                                                                 \
 141     while (-- __count);                                                 \
 142                                                                         \
 143     ASSERT (__inv * __n == 1);                                          \
 144     (inv) = __inv;                                                      \
 145   } while (0)
 146
 147
 148 double
 149 speed_binvert_limb_mul1 (struct speed_params *s)
 150 {
 151   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
 152 }
 153 double
 154 speed_binvert_limb_loop (struct speed_params *s)
 155 {
 156   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
 157 }
 158 double
 159 speed_binvert_limb_cond (struct speed_params *s)
 160 {
 161   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
 162 }
 163 double
 164 speed_binvert_limb_arith (struct speed_params *s)
 165 {
 166   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
 167 }