tests/rand/stat.c

   1 /* stat.c -- statistical tests of random number sequences. */
   2
   3 /*
   4 Copyright 1999, 2000 Free Software Foundation, Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MP Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  20
  21 /* Examples:
  22
  23   $ gen 1000 | stat
  24 Test 1000 real numbers.
  25
  26   $ gen 30000 | stat -2 1000
  27 Test 1000 real numbers 30 times and then test the 30 results in a
  28 ``second level''.
  29
  30   $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
  31 Test 1000 integers 0 <= X <= 2^32-1.
  32
  33   $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
  34 Test 1000 integers 0 <= X <= 2^34-1.
  35
  36 */
  37
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <unistd.h>
  41 #include <math.h>
  42 #include "gmp.h"
  43 #include "gmpstat.h"
  44
  45 #if !HAVE_DECL_OPTARG
  46 extern char *optarg;
  47 extern int optind, opterr;
  48 #endif
  49
  50 #define FVECSIZ (100000L)
  51
  52 int g_debug = 0;
  53
  54 static void
  55 print_ks_results (mpf_t f_p, mpf_t f_p_prob,
  56                   mpf_t f_m, mpf_t f_m_prob,
  57                   FILE *fp)
  58 {
  59   double p, pp, m, mp;
  60
  61   p = mpf_get_d (f_p);
  62   m = mpf_get_d (f_m);
  63   pp = mpf_get_d (f_p_prob);
  64   mp = mpf_get_d (f_m_prob);
  65
  66   fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
  67   fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
  68 }
  69
  70 static void
  71 print_x2_table (unsigned int v, FILE *fp)
  72 {
  73   double t[7];
  74   int f;
  75
  76
  77   fprintf (fp, "Chi-square table for v=%u\n", v);
  78   fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
  79   x2_table (t, v);
  80   for (f = 0; f < 7; f++)
  81     fprintf (fp, "%.2f\t", t[f]);
  82   fputs ("\n", fp);
  83 }
  84
  85
  86
  87 /* Pks () -- Distribution function for KS results with a big n (like 1000
  88    or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
  89 /* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */
  90
  91 static void
  92 Pks (mpf_t p, mpf_t x)
  93 {
  94   double dt;                    /* temp double */
  95
  96   mpf_set (p, x);
  97   mpf_mul (p, p, p);            /* p = x^2 */
  98   mpf_mul_ui (p, p, 2);         /* p = 2*x^2 */
  99   mpf_neg (p, p);               /* p = -2*x^2 */
 100   /* No pow() in gmp.  Use doubles. */
 101   /* FIXME: Use exp()? */
 102   dt = pow (M_E, mpf_get_d (p));
 103   mpf_set_d (p, dt);
 104   mpf_ui_sub (p, 1, p);
 105 }
 106
 107 /* f_freq() -- frequency test on real numbers 0<=f<1*/
 108 static void
 109 f_freq (const unsigned l1runs, const unsigned l2runs,
 110         mpf_t fvec[], const unsigned long n)
 111 {
 112   unsigned f;
 113   mpf_t f_p, f_p_prob;
 114   mpf_t f_m, f_m_prob;
 115   mpf_t *l1res;                 /* level 1 result array */
 116
 117   mpf_init (f_p);  mpf_init (f_m);
 118   mpf_init (f_p_prob);  mpf_init (f_m_prob);
 119
 120
 121   /* Allocate space for 1st level results. */
 122   l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
 123   if (NULL == l1res)
 124     {
 125       fprintf (stderr, "stat: malloc failure\n");
 126       exit (1);
 127     }
 128
 129   printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
 130   printf ("\tKp\t\tKm\n");
 131
 132   for (f = 0; f < l2runs; f++)
 133     {
 134       /*  f_printvec (fvec, n); */
 135       mpf_freqt (f_p, f_m, fvec + f * n, n);
 136
 137       /* what's the probability of getting these results? */
 138       ks_table (f_p_prob, f_p, n);
 139       ks_table (f_m_prob, f_m, n);
 140
 141       if (l1runs == 0)
 142         {
 143           /*printf ("%u:\t", f + 1);*/
 144           print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
 145         }
 146       else
 147         {
 148           /* save result */
 149           mpf_init_set (l1res[f], f_p);
 150           mpf_init_set (l1res[f + l2runs], f_m);
 151         }
 152     }
 153
 154   /* Now, apply the KS test on the results from the 1st level rounds
 155      with the distribution
 156      F(x) = 1 - pow(e, -2*x^2)  [Knuth, vol 2, p.51] */
 157
 158   if (l1runs != 0)
 159     {
 160       /*printf ("-------------------------------------\n");*/
 161
 162       /* The Kp's. */
 163       ks (f_p, f_m, l1res, Pks, l2runs);
 164       ks_table (f_p_prob, f_p, l2runs);
 165       ks_table (f_m_prob, f_m, l2runs);
 166       printf ("Kp:\t");
 167       print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
 168
 169       /* The Km's. */
 170       ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
 171       ks_table (f_p_prob, f_p, l2runs);
 172       ks_table (f_m_prob, f_m, l2runs);
 173       printf ("Km:\t");
 174       print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
 175     }
 176
 177   mpf_clear (f_p);  mpf_clear (f_m);
 178   mpf_clear (f_p_prob);  mpf_clear (f_m_prob);
 179   free (l1res);
 180 }
 181
 182 /* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
 183    0<=z<=MAX */
 184 static void
 185 z_freq (const unsigned l1runs,
 186         const unsigned l2runs,
 187         mpz_t zvec[],
 188         const unsigned long n,
 189         unsigned int max)
 190 {
 191   mpf_t V;                      /* result */
 192   double d_V;                   /* result as a double */
 193
 194   mpf_init (V);
 195
 196
 197   printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
 198   print_x2_table (max, stdout);
 199
 200   mpz_freqt (V, zvec, max, n);
 201
 202   d_V = mpf_get_d (V);
 203   printf ("V = %.2f (n = %lu)\n", d_V, n);
 204
 205   mpf_clear (V);
 206 }
 207
 208 unsigned int stat_debug = 0;
 209
 210 int
 211 main (argc, argv)
 212      int argc;
 213      char *argv[];
 214 {
 215   const char usage[] =
 216     "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
 217     "       file     filename\n" \
 218     "       -2 runs  perform 2-level test with RUNS runs on 1st level\n" \
 219     "       -d       increase debugging level\n" \
 220     "       -i max   input is integers 0 <= Z <= MAX\n" \
 221     "       -r max   input is real numbers 0 <= R < 1 and use MAX as\n" \
 222     "                maximum value when converting real numbers to integers\n" \
 223     "";
 224
 225   mpf_t fvec[FVECSIZ];
 226   mpz_t zvec[FVECSIZ];
 227   unsigned long int f, n, vecentries;
 228   char *filen;
 229   FILE *fp;
 230   int c;
 231   int omitoutput = 0;
 232   int realinput = -1;           /* 1: input is real numbers 0<=R<1;
 233                                    0: input is integers 0 <= Z <= MAX. */
 234   long l1runs = 0,              /* 1st level runs */
 235     l2runs = 1;                 /* 2nd level runs */
 236   mpf_t f_temp;
 237   mpz_t z_imax;                 /* max value when converting between
 238                                    real number and integer. */
 239   mpf_t f_imax_plus1;           /* f_imax + 1 stored in an mpf_t for
 240                                    convenience */
 241   mpf_t f_imax_minus1;          /* f_imax - 1 stored in an mpf_t for
 242                                    convenience */
 243
 244
 245   mpf_init (f_temp);
 246   mpz_init_set_ui (z_imax, 0x7fffffff);
 247   mpf_init (f_imax_plus1);
 248   mpf_init (f_imax_minus1);
 249
 250   while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
 251     switch (c)
 252       {
 253       case '2':
 254         l1runs = atol (optarg);
 255         l2runs = -1;            /* set later on */
 256         break;
 257       case 'd':                 /* increase debug level */
 258         stat_debug++;
 259         break;
 260       case 'i':
 261         if (1 == realinput)
 262           {
 263             fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
 264             exit (1);
 265           }
 266         if (mpz_set_str (z_imax, optarg, 0))
 267           {
 268             fprintf (stderr, "stat: bad max value %s\n", optarg);
 269             exit (1);
 270           }
 271         realinput = 0;
 272         break;
 273       case 'r':
 274         if (0 == realinput)
 275           {
 276             fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
 277             exit (1);
 278           }
 279         if (mpz_set_str (z_imax, optarg, 0))
 280           {
 281             fprintf (stderr, "stat: bad max value %s\n", optarg);
 282             exit (1);
 283           }
 284         realinput = 1;
 285         break;
 286       case 'o':
 287         omitoutput = atoi (optarg);
 288         break;
 289       case '?':
 290       default:
 291         fputs (usage, stderr);
 292         exit (1);
 293       }
 294   argc -= optind;
 295   argv += optind;
 296
 297   if (argc < 1)
 298     fp = stdin;
 299   else
 300     filen = argv[0];
 301
 302   if (fp != stdin)
 303     if (NULL == (fp = fopen (filen, "r")))
 304       {
 305         perror (filen);
 306         exit (1);
 307       }
 308
 309   if (-1 == realinput)
 310     realinput = 1;              /* default is real numbers */
 311
 312   /* read file and fill appropriate vec */
 313   if (1 == realinput)           /* real input */
 314     {
 315       for (f = 0; f < FVECSIZ ; f++)
 316         {
 317           mpf_init (fvec[f]);
 318           if (!mpf_inp_str (fvec[f], fp, 10))
 319             break;
 320         }
 321     }
 322   else                          /* integer input */
 323     {
 324       for (f = 0; f < FVECSIZ ; f++)
 325         {
 326           mpz_init (zvec[f]);
 327           if (!mpz_inp_str (zvec[f], fp, 10))
 328             break;
 329         }
 330     }
 331   vecentries = n = f;           /* number of entries read */
 332   fclose (fp);
 333
 334   if (FVECSIZ == f)
 335     fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
 336              "of only %ld entries.  sorry.\n", FVECSIZ);
 337
 338   printf ("Got %lu numbers.\n", n);
 339
 340   /* convert and fill the other vec */
 341   /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
 342      0<=z<=imax and we are truncating all fractions when
 343      converting float to int, we have to add 1 to imax.*/
 344   mpf_set_z (f_imax_plus1, z_imax);
 345   mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
 346   if (1 == realinput)           /* fill zvec[] */
 347     {
 348       for (f = 0; f < n; f++)
 349         {
 350           mpf_mul (f_temp, fvec[f], f_imax_plus1);
 351           mpz_init (zvec[f]);
 352           mpz_set_f (zvec[f], f_temp); /* truncating fraction */
 353           if (stat_debug > 1)
 354             {
 355               mpz_out_str (stderr, 10, zvec[f]);
 356               fputs ("\n", stderr);
 357             }
 358         }
 359     }
 360   else                          /* integer input; fill fvec[] */
 361     {
 362       /*    mpf_set_z (f_imax_minus1, z_imax);
 363             mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
 364       for (f = 0; f < n; f++)
 365         {
 366           mpf_init (fvec[f]);
 367           mpf_set_z (fvec[f], zvec[f]);
 368           mpf_div (fvec[f], fvec[f], f_imax_plus1);
 369           if (stat_debug > 1)
 370             {
 371               mpf_out_str (stderr, 10, 0, fvec[f]);
 372               fputs ("\n", stderr);
 373             }
 374         }
 375     }
 376
 377   /* 2 levels? */
 378   if (1 != l2runs)
 379     {
 380       l2runs = n / l1runs;
 381       printf ("Doing %ld second level rounds "\
 382               "with %ld entries in each round", l2runs, l1runs);
 383       if (n % l1runs)
 384         printf (" (discarding %ld entr%s)", n % l1runs,
 385                 n % l1runs == 1 ? "y" : "ies");
 386       puts (".");
 387       n = l1runs;
 388     }
 389
 390 #ifndef DONT_FFREQ
 391   f_freq (l1runs, l2runs, fvec, n);
 392 #endif
 393 #ifdef DO_ZFREQ
 394   z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
 395 #endif
 396
 397   mpf_clear (f_temp); mpz_clear (z_imax);
 398   mpf_clear (f_imax_plus1);
 399   mpf_clear (f_imax_minus1);
 400   for (f = 0; f < vecentries; f++)
 401     {
 402       mpf_clear (fvec[f]);
 403       mpz_clear (zvec[f]);
 404     }
 405
 406   return 0;
 407 }