boost/random/discrete_distribution.hpp

   1 /* boost random/discrete_distribution.hpp header file
   2  *
   3  * Copyright Steven Watanabe 2009-2011
   4  * Distributed under the Boost Software License, Version 1.0. (See
   5  * accompanying file LICENSE_1_0.txt or copy at
   6  * http://www.boost.org/LICENSE_1_0.txt)
   7  *
   8  * See http://www.boost.org for most recent version including documentation.
   9  *
  10  * $Id$
  11  */
  12
  13 #ifndef BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED
  14 #define BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED
  15
  16 #include <vector>
  17 #include <limits>
  18 #include <numeric>
  19 #include <utility>
  20 #include <iterator>
  21 #include <boost/assert.hpp>
  22 #include <boost/random/uniform_01.hpp>
  23 #include <boost/random/uniform_int_distribution.hpp>
  24 #include <boost/random/detail/config.hpp>
  25 #include <boost/random/detail/operators.hpp>
  26 #include <boost/random/detail/vector_io.hpp>
  27
  28 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
  29 #include <initializer_list>
  30 #endif
  31
  32 #include <boost/range/begin.hpp>
  33 #include <boost/range/end.hpp>
  34
  35 #include <boost/random/detail/disable_warnings.hpp>
  36
  37 namespace boost {
  38 namespace random {
  39 namespace detail {
  40
  41 template<class IntType, class WeightType>
  42 struct integer_alias_table {
  43     WeightType get_weight(IntType bin) const {
  44         WeightType result = _average;
  45         if(bin < _excess) ++result;
  46         return result;
  47     }
  48     template<class Iter>
  49     WeightType init_average(Iter begin, Iter end) {
  50         WeightType weight_average = 0;
  51         IntType excess = 0;
  52         IntType n = 0;
  53         // weight_average * n + excess == current partial sum
  54         // This is a bit messy, but it's guaranteed not to overflow
  55         for(Iter iter = begin; iter != end; ++iter) {
  56             ++n;
  57             if(*iter < weight_average) {
  58                 WeightType diff = weight_average - *iter;
  59                 weight_average -= diff / n;
  60                 if(diff % n > excess) {
  61                     --weight_average;
  62                     excess += n - diff % n;
  63                 } else {
  64                     excess -= diff % n;
  65                 }
  66             } else {
  67                 WeightType diff = *iter - weight_average;
  68                 weight_average += diff / n;
  69                 if(diff % n < n - excess) {
  70                     excess += diff % n;
  71                 } else {
  72                     ++weight_average;
  73                     excess -= n - diff % n;
  74                 }
  75             }
  76         }
  77         _alias_table.resize(static_cast<std::size_t>(n));
  78         _average = weight_average;
  79         _excess = excess;
  80         return weight_average;
  81     }
  82     void init_empty()
  83     {
  84         _alias_table.clear();
  85         _alias_table.push_back(std::make_pair(static_cast<WeightType>(1),
  86                                               static_cast<IntType>(0)));
  87         _average = static_cast<WeightType>(1);
  88         _excess = static_cast<IntType>(0);
  89     }
  90     bool operator==(const integer_alias_table& other) const
  91     {
  92         return _alias_table == other._alias_table &&
  93             _average == other._average && _excess == other._excess;
  94     }
  95     static WeightType normalize(WeightType val, WeightType average)
  96     {
  97         return val;
  98     }
  99     static void normalize(std::vector<WeightType>&) {}
 100     template<class URNG>
 101     WeightType test(URNG &urng) const
 102     {
 103         return uniform_int_distribution<WeightType>(0, _average)(urng);
 104     }
 105     bool accept(IntType result, WeightType val) const
 106     {
 107         return result < _excess || val < _average;
 108     }
 109     static WeightType try_get_sum(const std::vector<WeightType>& weights)
 110     {
 111         WeightType result = static_cast<WeightType>(0);
 112         for(typename std::vector<WeightType>::const_iterator
 113                 iter = weights.begin(), end = weights.end();
 114             iter != end; ++iter)
 115         {
 116             if((std::numeric_limits<WeightType>::max)() - result > *iter) {
 117                 return static_cast<WeightType>(0);
 118             }
 119             result += *iter;
 120         }
 121         return result;
 122     }
 123     template<class URNG>
 124     static WeightType generate_in_range(URNG &urng, WeightType max)
 125     {
 126         return uniform_int_distribution<WeightType>(
 127             static_cast<WeightType>(0), max-1)(urng);
 128     }
 129     typedef std::vector<std::pair<WeightType, IntType> > alias_table_t;
 130     alias_table_t _alias_table;
 131     WeightType _average;
 132     IntType _excess;
 133 };
 134
 135 template<class IntType, class WeightType>
 136 struct real_alias_table {
 137     WeightType get_weight(IntType) const
 138     {
 139         return WeightType(1.0);
 140     }
 141     template<class Iter>
 142     WeightType init_average(Iter first, Iter last)
 143     {
 144         std::size_t size = std::distance(first, last);
 145         WeightType weight_sum =
 146             std::accumulate(first, last, static_cast<WeightType>(0));
 147         _alias_table.resize(size);
 148         return weight_sum / size;
 149     }
 150     void init_empty()
 151     {
 152         _alias_table.clear();
 153         _alias_table.push_back(std::make_pair(static_cast<WeightType>(1),
 154                                               static_cast<IntType>(0)));
 155     }
 156     bool operator==(const real_alias_table& other) const
 157     {
 158         return _alias_table == other._alias_table;
 159     }
 160     static WeightType normalize(WeightType val, WeightType average)
 161     {
 162         return val / average;
 163     }
 164     static void normalize(std::vector<WeightType>& weights)
 165     {
 166         WeightType sum =
 167             std::accumulate(weights.begin(), weights.end(),
 168                             static_cast<WeightType>(0));
 169         for(typename std::vector<WeightType>::iterator
 170                 iter = weights.begin(),
 171                 end = weights.end();
 172             iter != end; ++iter)
 173         {
 174             *iter /= sum;
 175         }
 176     }
 177     template<class URNG>
 178     WeightType test(URNG &urng) const
 179     {
 180         return uniform_01<WeightType>()(urng);
 181     }
 182     bool accept(IntType, WeightType) const
 183     {
 184         return true;
 185     }
 186     static WeightType try_get_sum(const std::vector<WeightType>& weights)
 187     {
 188         return static_cast<WeightType>(1);
 189     }
 190     template<class URNG>
 191     static WeightType generate_in_range(URNG &urng, WeightType)
 192     {
 193         return uniform_01<WeightType>()(urng);
 194     }
 195     typedef std::vector<std::pair<WeightType, IntType> > alias_table_t;
 196     alias_table_t _alias_table;
 197 };
 198
 199 template<bool IsIntegral>
 200 struct select_alias_table;
 201
 202 template<>
 203 struct select_alias_table<true> {
 204     template<class IntType, class WeightType>
 205     struct apply {
 206         typedef integer_alias_table<IntType, WeightType> type;
 207     };
 208 };
 209
 210 template<>
 211 struct select_alias_table<false> {
 212     template<class IntType, class WeightType>
 213     struct apply {
 214         typedef real_alias_table<IntType, WeightType> type;
 215     };
 216 };
 217
 218 }
 219
 220 /**
 221  * The class @c discrete_distribution models a \random_distribution.
 222  * It produces integers in the range [0, n) with the probability
 223  * of producing each value is specified by the parameters of the
 224  * distribution.
 225  */
 226 template<class IntType = int, class WeightType = double>
 227 class discrete_distribution {
 228 public:
 229     typedef WeightType input_type;
 230     typedef IntType result_type;
 231
 232     class param_type {
 233     public:
 234
 235         typedef discrete_distribution distribution_type;
 236
 237         /**
 238          * Constructs a @c param_type object, representing a distribution
 239          * with \f$p(0) = 1\f$ and \f$p(k|k>0) = 0\f$.
 240          */
 241         param_type() : _probabilities(1, static_cast<WeightType>(1)) {}
 242         /**
 243          * If @c first == @c last, equivalent to the default constructor.
 244          * Otherwise, the values of the range represent weights for the
 245          * possible values of the distribution.
 246          */
 247         template<class Iter>
 248         param_type(Iter first, Iter last) : _probabilities(first, last)
 249         {
 250             normalize();
 251         }
 252 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
 253         /**
 254          * If wl.size() == 0, equivalent to the default constructor.
 255          * Otherwise, the values of the @c initializer_list represent
 256          * weights for the possible values of the distribution.
 257          */
 258         param_type(const std::initializer_list<WeightType>& wl)
 259           : _probabilities(wl)
 260         {
 261             normalize();
 262         }
 263 #endif
 264         /**
 265          * If the range is empty, equivalent to the default constructor.
 266          * Otherwise, the elements of the range represent
 267          * weights for the possible values of the distribution.
 268          */
 269         template<class Range>
 270         explicit param_type(const Range& range)
 271           : _probabilities(boost::begin(range), boost::end(range))
 272         {
 273             normalize();
 274         }
 275
 276         /**
 277          * If nw is zero, equivalent to the default constructor.
 278          * Otherwise, the range of the distribution is [0, nw),
 279          * and the weights are found by  calling fw with values
 280          * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and
 281          * \f$\mbox{xmax} - \delta/2\f$, where
 282          * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$.
 283          */
 284         template<class Func>
 285         param_type(std::size_t nw, double xmin, double xmax, Func fw)
 286         {
 287             std::size_t n = (nw == 0) ? 1 : nw;
 288             double delta = (xmax - xmin) / n;
 289             BOOST_ASSERT(delta > 0);
 290             for(std::size_t k = 0; k < n; ++k) {
 291                 _probabilities.push_back(fw(xmin + k*delta + delta/2));
 292             }
 293             normalize();
 294         }
 295
 296         /**
 297          * Returns a vector containing the probabilities of each possible
 298          * value of the distribution.
 299          */
 300         std::vector<WeightType> probabilities() const
 301         {
 302             return _probabilities;
 303         }
 304
 305         /** Writes the parameters to a @c std::ostream. */
 306         BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, param_type, parm)
 307         {
 308             detail::print_vector(os, parm._probabilities);
 309             return os;
 310         }
 311
 312         /** Reads the parameters from a @c std::istream. */
 313         BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, param_type, parm)
 314         {
 315             std::vector<WeightType> temp;
 316             detail::read_vector(is, temp);
 317             if(is) {
 318                 parm._probabilities.swap(temp);
 319             }
 320             return is;
 321         }
 322
 323         /** Returns true if the two sets of parameters are the same. */
 324         BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(param_type, lhs, rhs)
 325         {
 326             return lhs._probabilities == rhs._probabilities;
 327         }
 328         /** Returns true if the two sets of parameters are different. */
 329         BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(param_type)
 330     private:
 331         /// @cond show_private
 332         friend class discrete_distribution;
 333         explicit param_type(const discrete_distribution& dist)
 334           : _probabilities(dist.probabilities())
 335         {}
 336         void normalize()
 337         {
 338             impl_type::normalize(_probabilities);
 339         }
 340         std::vector<WeightType> _probabilities;
 341         /// @endcond
 342     };
 343
 344     /**
 345      * Creates a new @c discrete_distribution object that has
 346      * \f$p(0) = 1\f$ and \f$p(i|i>0) = 0\f$.
 347      */
 348     discrete_distribution()
 349     {
 350         _impl.init_empty();
 351     }
 352     /**
 353      * Constructs a discrete_distribution from an iterator range.
 354      * If @c first == @c last, equivalent to the default constructor.
 355      * Otherwise, the values of the range represent weights for the
 356      * possible values of the distribution.
 357      */
 358     template<class Iter>
 359     discrete_distribution(Iter first, Iter last)
 360     {
 361         init(first, last);
 362     }
 363 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
 364     /**
 365      * Constructs a @c discrete_distribution from a @c std::initializer_list.
 366      * If the @c initializer_list is empty, equivalent to the default
 367      * constructor.  Otherwise, the values of the @c initializer_list
 368      * represent weights for the possible values of the distribution.
 369      * For example, given the distribution
 370      *
 371      * @code
 372      * discrete_distribution<> dist{1, 4, 5};
 373      * @endcode
 374      *
 375      * The probability of a 0 is 1/10, the probability of a 1 is 2/5,
 376      * the probability of a 2 is 1/2, and no other values are possible.
 377      */
 378     discrete_distribution(std::initializer_list<WeightType> wl)
 379     {
 380         init(wl.begin(), wl.end());
 381     }
 382 #endif
 383     /**
 384      * Constructs a discrete_distribution from a Boost.Range range.
 385      * If the range is empty, equivalent to the default constructor.
 386      * Otherwise, the values of the range represent weights for the
 387      * possible values of the distribution.
 388      */
 389     template<class Range>
 390     explicit discrete_distribution(const Range& range)
 391     {
 392         init(boost::begin(range), boost::end(range));
 393     }
 394     /**
 395      * Constructs a discrete_distribution that approximates a function.
 396      * If nw is zero, equivalent to the default constructor.
 397      * Otherwise, the range of the distribution is [0, nw),
 398      * and the weights are found by  calling fw with values
 399      * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and
 400      * \f$\mbox{xmax} - \delta/2\f$, where
 401      * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$.
 402      */
 403     template<class Func>
 404     discrete_distribution(std::size_t nw, double xmin, double xmax, Func fw)
 405     {
 406         std::size_t n = (nw == 0) ? 1 : nw;
 407         double delta = (xmax - xmin) / n;
 408         BOOST_ASSERT(delta > 0);
 409         std::vector<WeightType> weights;
 410         for(std::size_t k = 0; k < n; ++k) {
 411             weights.push_back(fw(xmin + k*delta + delta/2));
 412         }
 413         init(weights.begin(), weights.end());
 414     }
 415     /**
 416      * Constructs a discrete_distribution from its parameters.
 417      */
 418     explicit discrete_distribution(const param_type& parm)
 419     {
 420         param(parm);
 421     }
 422
 423     /**
 424      * Returns a value distributed according to the parameters of the
 425      * discrete_distribution.
 426      */
 427     template<class URNG>
 428     IntType operator()(URNG& urng) const
 429     {
 430         BOOST_ASSERT(!_impl._alias_table.empty());
 431         IntType result;
 432         WeightType test;
 433         do {
 434             result = uniform_int_distribution<IntType>((min)(), (max)())(urng);
 435             test = _impl.test(urng);
 436         } while(!_impl.accept(result, test));
 437         if(test < _impl._alias_table[result].first) {
 438             return result;
 439         } else {
 440             return(_impl._alias_table[result].second);
 441         }
 442     }
 443
 444     /**
 445      * Returns a value distributed according to the parameters
 446      * specified by param.
 447      */
 448     template<class URNG>
 449     IntType operator()(URNG& urng, const param_type& parm) const
 450     {
 451         if(WeightType limit = impl_type::try_get_sum(parm._probabilities)) {
 452             WeightType val = impl_type::generate_in_range(urng, limit);
 453             WeightType sum = 0;
 454             std::size_t result = 0;
 455             for(typename std::vector<WeightType>::const_iterator
 456                     iter = parm._probabilities.begin(),
 457                     end = parm._probabilities.end();
 458                 iter != end; ++iter, ++result)
 459             {
 460                 sum += *iter;
 461                 if(sum > val) {
 462                     return result;
 463                 }
 464             }
 465             // This shouldn't be reachable, but round-off error
 466             // can prevent any match from being found when val is
 467             // very close to 1.
 468             return static_cast<IntType>(parm._probabilities.size() - 1);
 469         } else {
 470             // WeightType is integral and sum(parm._probabilities)
 471             // would overflow.  Just use the easy solution.
 472             return discrete_distribution(parm)(urng);
 473         }
 474     }
 475
 476     /** Returns the smallest value that the distribution can produce. */
 477     result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return 0; }
 478     /** Returns the largest value that the distribution can produce. */
 479     result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const
 480     { return static_cast<result_type>(_impl._alias_table.size() - 1); }
 481
 482     /**
 483      * Returns a vector containing the probabilities of each
 484      * value of the distribution.  For example, given
 485      *
 486      * @code
 487      * discrete_distribution<> dist = { 1, 4, 5 };
 488      * std::vector<double> p = dist.param();
 489      * @endcode
 490      *
 491      * the vector, p will contain {0.1, 0.4, 0.5}.
 492      *
 493      * If @c WeightType is integral, then the weights
 494      * will be returned unchanged.
 495      */
 496     std::vector<WeightType> probabilities() const
 497     {
 498         std::vector<WeightType> result(_impl._alias_table.size());
 499         std::size_t i = 0;
 500         for(typename impl_type::alias_table_t::const_iterator
 501                 iter = _impl._alias_table.begin(),
 502                 end = _impl._alias_table.end();
 503                 iter != end; ++iter, ++i)
 504         {
 505             WeightType val = iter->first;
 506             result[i] += val;
 507             result[iter->second] += _impl.get_weight(i) - val;
 508         }
 509         impl_type::normalize(result);
 510         return(result);
 511     }
 512
 513     /** Returns the parameters of the distribution. */
 514     param_type param() const
 515     {
 516         return param_type(*this);
 517     }
 518     /** Sets the parameters of the distribution. */
 519     void param(const param_type& parm)
 520     {
 521         init(parm._probabilities.begin(), parm._probabilities.end());
 522     }
 523
 524     /**
 525      * Effects: Subsequent uses of the distribution do not depend
 526      * on values produced by any engine prior to invoking reset.
 527      */
 528     void reset() {}
 529
 530     /** Writes a distribution to a @c std::ostream. */
 531     BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, discrete_distribution, dd)
 532     {
 533         os << dd.param();
 534         return os;
 535     }
 536
 537     /** Reads a distribution from a @c std::istream */
 538     BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, discrete_distribution, dd)
 539     {
 540         param_type parm;
 541         if(is >> parm) {
 542             dd.param(parm);
 543         }
 544         return is;
 545     }
 546
 547     /**
 548      * Returns true if the two distributions will return the
 549      * same sequence of values, when passed equal generators.
 550      */
 551     BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(discrete_distribution, lhs, rhs)
 552     {
 553         return lhs._impl == rhs._impl;
 554     }
 555     /**
 556      * Returns true if the two distributions may return different
 557      * sequences of values, when passed equal generators.
 558      */
 559     BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(discrete_distribution)
 560
 561 private:
 562
 563     /// @cond show_private
 564
 565     template<class Iter>
 566     void init(Iter first, Iter last, std::input_iterator_tag)
 567     {
 568         std::vector<WeightType> temp(first, last);
 569         init(temp.begin(), temp.end());
 570     }
 571     template<class Iter>
 572     void init(Iter first, Iter last, std::forward_iterator_tag)
 573     {
 574         std::vector<std::pair<WeightType, IntType> > below_average;
 575         std::vector<std::pair<WeightType, IntType> > above_average;
 576         WeightType weight_average = _impl.init_average(first, last);
 577         WeightType normalized_average = _impl.get_weight(0);
 578         std::size_t i = 0;
 579         for(; first != last; ++first, ++i) {
 580             WeightType val = impl_type::normalize(*first, weight_average);
 581             std::pair<WeightType, IntType> elem(val, static_cast<IntType>(i));
 582             if(val < normalized_average) {
 583                 below_average.push_back(elem);
 584             } else {
 585                 above_average.push_back(elem);
 586             }
 587         }
 588
 589         typename impl_type::alias_table_t::iterator
 590             b_iter = below_average.begin(),
 591             b_end = below_average.end(),
 592             a_iter = above_average.begin(),
 593             a_end = above_average.end()
 594             ;
 595         while(b_iter != b_end && a_iter != a_end) {
 596             _impl._alias_table[b_iter->second] =
 597                 std::make_pair(b_iter->first, a_iter->second);
 598             a_iter->first -= (_impl.get_weight(b_iter->second) - b_iter->first);
 599             if(a_iter->first < normalized_average) {
 600                 *b_iter = *a_iter++;
 601             } else {
 602                 ++b_iter;
 603             }
 604         }
 605         for(; b_iter != b_end; ++b_iter) {
 606             _impl._alias_table[b_iter->second].first =
 607                 _impl.get_weight(b_iter->second);
 608         }
 609         for(; a_iter != a_end; ++a_iter) {
 610             _impl._alias_table[a_iter->second].first =
 611                 _impl.get_weight(a_iter->second);
 612         }
 613     }
 614     template<class Iter>
 615     void init(Iter first, Iter last)
 616     {
 617         if(first == last) {
 618             _impl.init_empty();
 619         } else {
 620             typename std::iterator_traits<Iter>::iterator_category category;
 621             init(first, last, category);
 622         }
 623     }
 624     typedef typename detail::select_alias_table<
 625         (::boost::is_integral<WeightType>::value)
 626     >::template apply<IntType, WeightType>::type impl_type;
 627     impl_type _impl;
 628     /// @endcond
 629 };
 630
 631 }
 632 }
 633
 634 #include <boost/random/detail/enable_warnings.hpp>
 635
 636 #endif