libs/math/doc/internals/test_data.qbk

   1 [section:test_data Graphing, Profiling, and Generating Test Data for Special Functions]
   2
   3 The class `test_data` and associated helper functions are designed so that in just
   4 a few lines of code you should be able to:
   5
   6 * Profile a continued fraction, or infinite series for convergence and accuracy.
   7 * Generate csv data from a special function that can be imported into your favorite
   8 graphing program (or spreadsheet) for further analysis.
   9 * Generate high precision test data.
  10
  11 [h4 Synopsis]
  12
  13    #include <boost/math/tools/test_data.hpp>
  14
  15 [important
  16 This is a non-core Boost.Math header that is predominantly used for internal
  17 maintenance of the library: as a result the library is located under
  18 `libs/math/include_private` and you will need to add that directory to
  19 your include path in order to use this feature.
  20 ]
  21
  22    namespace boost{ namespace math{ namespace tools{
  23
  24    enum parameter_type
  25    {
  26       random_in_range = 0,
  27       periodic_in_range = 1,
  28       power_series = 2,
  29       dummy_param = 0x80,
  30    };
  31
  32    template <class T>
  33    struct parameter_info;
  34
  35    template <class T>
  36    parameter_info<T> make_random_param(T start_range, T end_range, int n_points);
  37
  38    template <class T>
  39    parameter_info<T> make_periodic_param(T start_range, T end_range, int n_points);
  40
  41    template <class T>
  42    parameter_info<T> make_power_param(T basis, int start_exponent, int end_exponent);
  43
  44    template <class T>
  45    bool get_user_parameter_info(parameter_info<T>& info, const char* param_name);
  46
  47    template <class T>
  48    class test_data
  49    {
  50    public:
  51       typedef std::vector<T> row_type;
  52       typedef row_type value_type;
  53    private:
  54       typedef std::set<row_type> container_type;
  55    public:
  56       typedef typename container_type::reference reference;
  57       typedef typename container_type::const_reference const_reference;
  58       typedef typename container_type::iterator iterator;
  59       typedef typename container_type::const_iterator const_iterator;
  60       typedef typename container_type::difference_type difference_type;
  61       typedef typename container_type::size_type size_type;
  62
  63       // creation:
  64       test_data(){}
  65       template <class F>
  66       test_data(F func, const parameter_info<T>& arg1);
  67
  68       // insertion:
  69       template <class F>
  70       test_data& insert(F func, const parameter_info<T>& arg1);
  71
  72       template <class F>
  73       test_data& insert(F func, const parameter_info<T>& arg1,
  74                         const parameter_info<T>& arg2);
  75
  76       template <class F>
  77       test_data& insert(F func, const parameter_info<T>& arg1,
  78                         const parameter_info<T>& arg2,
  79                         const parameter_info<T>& arg3);
  80
  81       void clear();
  82
  83       // access:
  84       iterator begin();
  85       iterator end();
  86       const_iterator begin()const;
  87       const_iterator end()const;
  88       bool operator==(const test_data& d)const;
  89       bool operator!=(const test_data& d)const;
  90       void swap(test_data& other);
  91       size_type size()const;
  92       size_type max_size()const;
  93       bool empty()const;
  94
  95       bool operator < (const test_data& dat)const;
  96       bool operator <= (const test_data& dat)const;
  97       bool operator > (const test_data& dat)const;
  98       bool operator >= (const test_data& dat)const;
  99    };
 100
 101    template <class charT, class traits, class T>
 102    std::basic_ostream<charT, traits>& write_csv(
 103                std::basic_ostream<charT, traits>& os,
 104                const test_data<T>& data);
 105
 106    template <class charT, class traits, class T>
 107    std::basic_ostream<charT, traits>& write_csv(
 108                std::basic_ostream<charT, traits>& os,
 109                const test_data<T>& data,
 110                const charT* separator);
 111
 112    template <class T>
 113    std::ostream& write_code(std::ostream& os,
 114                             const test_data<T>& data,
 115                             const char* name);
 116
 117    }}} // namespaces
 118
 119 [h4 Description]
 120
 121 This tool is best illustrated with the following series of examples.
 122
 123 The functionality of test_data is split into the following parts:
 124
 125 * A functor that implements the function for which data is being generated:
 126 this is the bit you have to write.
 127 * One of more parameters that are to be passed to the functor, these are
 128 described in fairly abstract terms: give me N points distributed like /this/ etc.
 129 * The class test_data, that takes the functor and descriptions of the parameters
 130 and computes how ever many output points have been requested, these are stored
 131 in a sorted container.
 132 * Routines to iterate over the test_data container and output the data in either
 133 csv format, or as C++ source code (as a table using Boost.Array).
 134
 135 [h5 Example 1: Output Data for Graph Plotting]
 136
 137 For example, lets say we want to graph the lgamma function between -3 and 100,
 138 one could do this like so:
 139
 140    #include <boost/math/tools/test_data.hpp>
 141    #include <boost/math/special_functions/gamma.hpp>
 142
 143    int main()
 144    {
 145       using namespace boost::math::tools;
 146
 147       // create an object to hold the data:
 148       test_data<double> data;
 149
 150       // insert 500 points at uniform intervals between just after -3 and 100:
 151       double (*pf)(double) = boost::math::lgamma;
 152       data.insert(pf, make_periodic_param(-3.0 + 0.00001, 100.0, 500));
 153
 154       // print out in csv format:
 155       write_csv(std::cout, data, ", ");
 156       return 0;
 157    }
 158
 159 Which, when plotted, results in:
 160
 161 [graph lgamma]
 162
 163 [h5 Example 2: Creating Test Data]
 164
 165 As a second example, let's suppose we want to create highly accurate test
 166 data for a special function.  Since many special functions have two or
 167 more independent parameters, it's very hard to effectively cover all of
 168 the possible parameter space without generating gigabytes of data at
 169 great computational expense.  A second best approach is to provide the tools
 170 by which a user (or the library maintainer) can quickly generate more data
 171 on demand to probe the function over a particular domain of interest.
 172
 173 In this example we'll generate test data for the beta function using
 174 [@http://shoup.net/ntl/doc/RR.txt NTL::RR] at 1000 bit precision.
 175 Rather than call our generic
 176 version of the beta function, we'll implement a deliberately naive version
 177 of the beta function using lgamma, and rely on the high precision of the
 178 data type used to get results accurate to at least 128-bit precision. In this
 179 way our test data is independent of whatever clever tricks we may wish to
 180 use inside the our beta function.
 181
 182 To start with then, here's the function object that creates the test data:
 183
 184    #include <boost/math/tools/ntl.hpp>
 185    #include <boost/math/special_functions/gamma.hpp>
 186    #include <boost/math/tools/test_data.hpp>
 187    #include <fstream>
 188
 189    #include <boost/math/tools/test_data.hpp>
 190
 191    using namespace boost::math::tools;
 192
 193    struct beta_data_generator
 194    {
 195       NTL::RR operator()(NTL::RR a, NTL::RR b)
 196       {
 197          //
 198          // If we throw a domain error then test_data will
 199          // ignore this input point. We'll use this to filter
 200          // out all cases where a < b since the beta function
 201          // is symmetrical in a and b:
 202          //
 203          if(a < b)
 204             throw std::domain_error("");
 205
 206          // very naively calculate spots with lgamma:
 207          NTL::RR g1, g2, g3;
 208          int s1, s2, s3;
 209          g1 = boost::math::lgamma(a, &s1);
 210          g2 = boost::math::lgamma(b, &s2);
 211          g3 = boost::math::lgamma(a+b, &s3);
 212          g1 += g2 - g3;
 213          g1 = exp(g1);
 214          g1 *= s1 * s2 * s3;
 215          return g1;
 216       }
 217    };
 218
 219 To create the data, we'll need to input the domains for a and b
 220 for which the function will be tested: the function `get_user_parameter_info`
 221 is designed for just that purpose.  The start of main will look something like:
 222
 223    // Set the precision on RR:
 224    NTL::RR::SetPrecision(1000); // bits.
 225    NTL::RR::SetOutputPrecision(40); // decimal digits.
 226
 227    parameter_info<NTL::RR> arg1, arg2;
 228    test_data<NTL::RR> data;
 229
 230    std::cout << "Welcome.\n"
 231       "This program will generate spot tests for the beta function:\n"
 232       "  beta(a, b)\n\n";
 233
 234    bool cont;
 235    std::string line;
 236
 237    do{
 238       // prompt the user for the domain of a and b to test:
 239       get_user_parameter_info(arg1, "a");
 240       get_user_parameter_info(arg2, "b");
 241
 242       // create the data:
 243       data.insert(beta_data_generator(), arg1, arg2);
 244
 245       // see if the user want's any more domains tested:
 246       std::cout << "Any more data [y/n]?";
 247       std::getline(std::cin, line);
 248       boost::algorithm::trim(line);
 249       cont = (line == "y");
 250    }while(cont);
 251
 252 [caution At this point one potential stumbling block should be mentioned:
 253 test_data<>::insert will create a matrix of test data when there are two
 254 or more parameters, so if we have two parameters and we're asked for
 255 a thousand points on each, that's a ['million test points in total].
 256 Don't say you weren't warned!]
 257
 258 There's just one final step now, and that's to write the test data to file:
 259
 260    std::cout << "Enter name of test data file [default=beta_data.ipp]";
 261    std::getline(std::cin, line);
 262    boost::algorithm::trim(line);
 263    if(line == "")
 264       line = "beta_data.ipp";
 265    std::ofstream ofs(line.c_str());
 266    write_code(ofs, data, "beta_data");
 267
 268 The format of the test data looks something like:
 269
 270    #define SC_(x) static_cast<T>(BOOST_JOIN(x, L))
 271       static const boost::array<boost::array<T, 3>, 1830>
 272       beta_med_data = {
 273          SC_(0.4883005917072296142578125),
 274          SC_(0.4883005917072296142578125),
 275          SC_(3.245912809500479157065104747353807392371),
 276          SC_(3.5808107852935791015625),
 277          SC_(0.4883005917072296142578125),
 278          SC_(1.007653173802923954909901438393379243537),
 279          /* ... lots of rows skipped */
 280    };
 281
 282 The first two values in each row are the input parameters that were passed
 283 to our functor and the last value is the return value from the functor.
 284 Had our functor returned a __tuple rather than a value, then we would have had
 285 one entry for each element in the tuple in addition to the input parameters.
 286
 287 The first #define serves two purposes:
 288
 289 * It reduces the file sizes considerably: all those `static_cast`'s add up to a lot
 290 of bytes otherwise (they are needed to suppress compiler warnings when `T` is
 291 narrower than a `long double`).
 292 * It provides a useful customisation point: for example if we were testing
 293 a user-defined type that has more precision than a `long double` we could change
 294 it to:
 295
 296 [^#define SC_(x) lexical_cast<T>(BOOST_STRINGIZE(x))]
 297
 298 in order to ensure that no truncation of the values occurs prior to conversion
 299 to `T`.  Note that this isn't used by default as it's rather hard on the compiler
 300 when the table is large.
 301
 302 [h5 Example 3: Profiling a Continued Fraction for Convergence and Accuracy]
 303
 304 Alternatively, lets say we want to profile a continued fraction for
 305 convergence and error.  As an example, we'll use the continued fraction
 306 for the upper incomplete gamma function, the following function object
 307 returns the next a[sub N ] and b[sub N ] of the continued fraction
 308 each time it's invoked:
 309
 310    template <class T>
 311    struct upper_incomplete_gamma_fract
 312    {
 313    private:
 314       T z, a;
 315       int k;
 316    public:
 317       typedef std::pair<T,T> result_type;
 318
 319       upper_incomplete_gamma_fract(T a1, T z1)
 320          : z(z1-a1+1), a(a1), k(0)
 321       {
 322       }
 323
 324       result_type operator()()
 325       {
 326          ++k;
 327          z += 2;
 328          return result_type(k * (a - k), z);
 329       }
 330    };
 331
 332 We want to measure both the relative error, and the rate of convergence
 333 of this fraction, so we'll write a functor that returns both as a __tuple:
 334 class test_data will unpack the tuple for us, and create one column of data
 335 for each element in the tuple (in addition to the input parameters):
 336
 337    #include <boost/math/tools/test_data.hpp>
 338    #include <boost/math/tools/test.hpp>
 339    #include <boost/math/special_functions/gamma.hpp>
 340    #include <boost/math/tools/ntl.hpp>
 341    #include <boost/math/tools/tuple.hpp>
 342
 343    template <class T>
 344    struct profile_gamma_fraction
 345    {
 346       typedef ``__tuple``<T, T> result_type;
 347
 348       result_type operator()(T val)
 349       {
 350          using namespace boost::math::tools;
 351          // estimate the true value, using arbitary precision
 352          // arithmetic and NTL::RR:
 353          NTL::RR rval(val);
 354          upper_incomplete_gamma_fract<NTL::RR> f1(rval, rval);
 355          NTL::RR true_val = continued_fraction_a(f1, 1000);
 356          //
 357          // Now get the aproximation at double precision, along with the number of
 358          // iterations required:
 359          boost::uintmax_t iters = std::numeric_limits<boost::uintmax_t>::max();
 360          upper_incomplete_gamma_fract<T> f2(val, val);
 361          T found_val = continued_fraction_a(f2, std::numeric_limits<T>::digits, iters);
 362          //
 363          // Work out the relative error, as measured in units of epsilon:
 364          T err = real_cast<T>(relative_error(true_val, NTL::RR(found_val)) / std::numeric_limits<T>::epsilon());
 365          //
 366          // now just return the results as a tuple:
 367          return boost::math::make_tuple(err, iters);
 368       }
 369    };
 370
 371 Feeding that functor into test_data allows rapid output of csv data,
 372 for whatever type `T` we may be interested in:
 373
 374    int main()
 375    {
 376       using namespace boost::math::tools;
 377       // create an object to hold the data:
 378       test_data<double> data;
 379       // insert 500 points at uniform intervals between just after 0 and 100:
 380       data.insert(profile_gamma_fraction<double>(), make_periodic_param(0.01, 100.0, 100));
 381       // print out in csv format:
 382       write_csv(std::cout, data, ", ");
 383       return 0;
 384    }
 385
 386 This time there's no need to plot a graph, the first few rows are:
 387
 388    a and z,  Error/epsilon,  Iterations required
 389
 390    0.01,     9723.14,        4726
 391    1.0099,   9.54818,        87
 392    2.0098,   3.84777,        40
 393    3.0097,   0.728358,       25
 394    4.0096,   2.39712,        21
 395    5.0095,   0.233263,       16
 396
 397 So it's pretty clear that this fraction shouldn't be used for small values of /a/ and /z/.
 398
 399 [h4 reference]
 400
 401 Most of this tool has been described already in the examples above, we'll
 402 just add the following notes on the non-member functions:
 403
 404    template <class T>
 405    parameter_info<T> make_random_param(T start_range, T end_range, int n_points);
 406
 407 Tells class test_data to test /n_points/ random values in the range
 408 [start_range,end_range].
 409
 410    template <class T>
 411    parameter_info<T> make_periodic_param(T start_range, T end_range, int n_points);
 412
 413 Tells class test_data to test /n_points/ evenly spaced values in the range
 414 [start_range,end_range].
 415
 416    template <class T>
 417    parameter_info<T> make_power_param(T basis, int start_exponent, int end_exponent);
 418
 419 Tells class test_data to test points of the form ['basis + R * 2[super expon]] for each
 420 /expon/ in the range [start_exponent, end_exponent], and /R/ a random number in \[0.5, 1\].
 421
 422    template <class T>
 423    bool get_user_parameter_info(parameter_info<T>& info, const char* param_name);
 424
 425 Prompts the user for the parameter range and form to use.
 426
 427 Finally, if we don't want the parameter to be included in the output, we can tell
 428 test_data by setting it a "dummy parameter":
 429
 430    parameter_info<double> p = make_random_param(2.0, 5.0, 10);
 431    p.type |= dummy_param;
 432
 433 This is useful when the functor used transforms the parameter in some way
 434 before passing it to the function under test, usually the functor will then
 435 return both the transformed input and the result in a tuple, so there's no
 436 need for the original pseudo-parameter to be included in program output.
 437
 438 [endsect] [/section:test_data Graphing, Profiling, and Generating Test Data for Special Functions]
 439
 440 [/
 441   Copyright 2006 John Maddock and Paul A. Bristow.
 442   Distributed under the Boost Software License, Version 1.0.
 443   (See accompanying file LICENSE_1_0.txt or copy at
 444   http://www.boost.org/LICENSE_1_0.txt).
 445 ]