libs/math/doc/distributions/geometric.qbk

   1 [section:geometric_dist Geometric Distribution]
   2
   3 ``#include <boost/math/distributions/geometric.hpp>``
   4
   5    namespace boost{ namespace math{
   6
   7    template <class RealType = double,
   8              class ``__Policy``   = ``__policy_class`` >
   9    class geometric_distribution;
  10
  11    typedef geometric_distribution<> geometric;
  12
  13    template <class RealType, class ``__Policy``>
  14    class geometric_distribution
  15    {
  16    public:
  17       typedef RealType value_type;
  18       typedef Policy   policy_type;
  19       // Constructor from success_fraction:
  20       geometric_distribution(RealType p);
  21
  22       // Parameter accessors:
  23       RealType success_fraction() const;
  24       RealType successes() const;
  25
  26       // Bounds on success fraction:
  27       static RealType find_lower_bound_on_p(
  28          RealType trials,
  29          RealType successes,
  30          RealType probability); // alpha
  31       static RealType find_upper_bound_on_p(
  32          RealType trials,
  33          RealType successes,
  34          RealType probability); // alpha
  35
  36       // Estimate min/max number of trials:
  37       static RealType find_minimum_number_of_trials(
  38          RealType k,     // Number of failures.
  39          RealType p,     // Success fraction.
  40          RealType probability); // Probability threshold alpha.
  41       static RealType find_maximum_number_of_trials(
  42          RealType k,     // Number of failures.
  43          RealType p,     // Success fraction.
  44          RealType probability); // Probability threshold alpha.
  45    };
  46
  47    }} // namespaces
  48
  49 The class type `geometric_distribution` represents a
  50 [@http://en.wikipedia.org/wiki/geometric_distribution geometric distribution]:
  51 it is used when there are exactly two mutually exclusive outcomes of a
  52 [@http://en.wikipedia.org/wiki/Bernoulli_trial Bernoulli trial]:
  53 these outcomes are labelled "success" and "failure".
  54
  55 For [@http://en.wikipedia.org/wiki/Bernoulli_trial Bernoulli trials]
  56 each with success fraction /p/, the geometric distribution gives
  57 the probability of observing /k/ trials (failures, events, occurrences, or arrivals)
  58 before the first success.
  59
  60 [note For this implementation, the set of trials *includes zero*
  61 (unlike another definition where the set of trials starts at one, sometimes named /shifted/).]
  62 The geometric distribution assumes that success_fraction /p/ is fixed for all /k/ trials.
  63
  64 The probability that there are /k/ failures before the first success
  65
  66 [expression Pr(Y=/k/) = (1-/p/)[super /k/] /p/]
  67
  68 For example, when throwing a 6-face dice the success probability /p/ = 1/6 = 0.1666[recur].
  69 Throwing repeatedly until a /three/ appears,
  70 the probability distribution of the number of times /not-a-three/ is thrown is geometric.
  71
  72 Geometric distribution has the Probability Density Function PDF:
  73
  74 [expression (1-/p/)[super /k/] /p/]
  75
  76 The following graph illustrates how the PDF and CDF vary for three examples
  77 of the success fraction /p/,
  78 (when considering the geometric distribution as a continuous function),
  79
  80 [graph geometric_pdf_2]
  81
  82 [graph geometric_cdf_2]
  83
  84 and as discrete.
  85
  86 [graph geometric_pdf_discrete]
  87
  88 [graph geometric_cdf_discrete]
  89
  90
  91 [h4 Related Distributions]
  92
  93 The geometric distribution is a special case of
  94 the __negative_binomial_distrib with successes parameter /r/ = 1,
  95 so only one first and only success is required : thus by definition
  96 __spaces `geometric(p) == negative_binomial(1, p)`
  97
  98   negative_binomial_distribution(RealType r, RealType success_fraction);
  99   negative_binomial nb(1, success_fraction);
 100   geometric g(success_fraction);
 101   ASSERT(pdf(nb, 1) == pdf(g, 1));
 102
 103 This implementation uses real numbers for the computation throughout
 104 (because it uses the *real-valued* power and exponential functions).
 105 So to obtain a conventional strictly-discrete geometric distribution
 106 you must ensure that an integer value is provided for the number of trials
 107 (random variable) /k/,
 108 and take integer values (floor or ceil functions) from functions that return
 109 a number of successes.
 110
 111 [discrete_quantile_warning geometric]
 112
 113 [h4 Member Functions]
 114
 115 [h5 Constructor]
 116
 117    geometric_distribution(RealType p);
 118
 119 Constructor: /p/ or success_fraction is the probability of success of a single trial.
 120
 121 Requires: `0 <= p <= 1`.
 122
 123 [h5 Accessors]
 124
 125    RealType success_fraction() const; // successes / trials (0 <= p <= 1)
 126
 127 Returns the success_fraction parameter /p/ from which this distribution was constructed.
 128
 129    RealType successes() const; // required successes always one,
 130    // included for compatibility with negative binomial distribution
 131    // with successes r == 1.
 132
 133 Returns unity.
 134
 135 The following functions are equivalent to those provided for the negative binomial,
 136 with successes = 1, but are provided here for completeness.
 137
 138 The best method of calculation for the following functions is disputed:
 139 see __binomial_distrib and __negative_binomial_distrib for more discussion.
 140
 141 [h5 Lower Bound on success_fraction Parameter ['p]]
 142
 143       static RealType find_lower_bound_on_p(
 144         RealType failures,
 145         RealType probability) // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence.
 146
 147 Returns a *lower bound* on the success fraction:
 148
 149 [variablelist
 150 [[failures][The total number of failures before the 1st success.]]
 151 [[alpha][The largest acceptable probability that the true value of
 152          the success fraction is [*less than] the value returned.]]
 153 ]
 154
 155 For example, if you observe /k/ failures from /n/ trials
 156 the best estimate for the success fraction is simply 1/['n], but if you
 157 want to be 95% sure that the true value is [*greater than] some value,
 158 ['p[sub min]], then:
 159
 160    p``[sub min]`` = geometric_distribution<RealType>::
 161       find_lower_bound_on_p(failures, 0.05);
 162
 163 [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative_binomial confidence interval example.]
 164
 165 This function uses the Clopper-Pearson method of computing the lower bound on the
 166 success fraction, whilst many texts refer to this method as giving an "exact"
 167 result in practice it produces an interval that guarantees ['at least] the
 168 coverage required, and may produce pessimistic estimates for some combinations
 169 of /failures/ and /successes/.  See:
 170
 171 [@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf
 172 Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions.
 173 Computational statistics and data analysis, 2005, vol. 48, no3, 605-621].
 174
 175 [h5 Upper Bound on success_fraction Parameter p]
 176
 177    static RealType find_upper_bound_on_p(
 178       RealType trials,
 179       RealType alpha); // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence.
 180
 181 Returns an *upper bound* on the success fraction:
 182
 183 [variablelist
 184 [[trials][The total number of trials conducted.]]
 185 [[alpha][The largest acceptable probability that the true value of
 186          the success fraction is [*greater than] the value returned.]]
 187 ]
 188
 189 For example, if you observe /k/ successes from /n/ trials the
 190 best estimate for the success fraction is simply ['k/n], but if you
 191 want to be 95% sure that the true value is [*less than] some value,
 192 ['p[sub max]], then:
 193
 194    p``[sub max]`` = geometric_distribution<RealType>::find_upper_bound_on_p(
 195                        k, 0.05);
 196
 197 [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_conf See negative binomial confidence interval example.]
 198
 199 This function uses the Clopper-Pearson method of computing the lower bound on the
 200 success fraction, whilst many texts refer to this method as giving an "exact"
 201 result in practice it produces an interval that guarantees ['at least] the
 202 coverage required, and may produce pessimistic estimates for some combinations
 203 of /failures/ and /successes/.  See:
 204
 205 [@http://www.ucs.louisiana.edu/~kxk4695/Discrete_new.pdf
 206 Yong Cai and K. Krishnamoorthy, A Simple Improved Inferential Method for Some Discrete Distributions.
 207 Computational statistics and data analysis, 2005, vol. 48, no3, 605-621].
 208
 209 [h5 Estimating Number of Trials to Ensure at Least a Certain Number of Failures]
 210
 211    static RealType find_minimum_number_of_trials(
 212       RealType k,     // number of failures.
 213       RealType p,     // success fraction.
 214       RealType alpha); // probability threshold (0.05 equivalent to 95%).
 215
 216 This functions estimates the number of trials required to achieve a certain
 217 probability that [*more than ['k] failures will be observed].
 218
 219 [variablelist
 220 [[k][The target number of failures to be observed.]]
 221 [[p][The probability of ['success] for each trial.]]
 222 [[alpha][The maximum acceptable ['risk] that only ['k] failures or fewer will be observed.]]
 223 ]
 224
 225 For example:
 226
 227    geometric_distribution<RealType>::find_minimum_number_of_trials(10, 0.5, 0.05);
 228
 229 Returns the smallest number of trials we must conduct to be 95% (1-0.05) sure
 230 of seeing 10 failures that occur with frequency one half.
 231
 232 [link math_toolkit.stat_tut.weg.neg_binom_eg.neg_binom_size_eg Worked Example.]
 233
 234 This function uses numeric inversion of the geometric distribution
 235 to obtain the result: another interpretation of the result is that it finds
 236 the number of trials (failures) that will lead to an /alpha/ probability
 237 of observing /k/ failures or fewer.
 238
 239 [h5 Estimating Number of Trials to Ensure a Maximum Number of Failures or Less]
 240
 241    static RealType find_maximum_number_of_trials(
 242       RealType k,     // number of failures.
 243       RealType p,     // success fraction.
 244       RealType alpha); // probability threshold (0.05 equivalent to 95%).
 245
 246 This functions estimates the maximum number of trials we can conduct and achieve
 247 a certain probability that [*k failures or fewer will be observed].
 248
 249 [variablelist
 250 [[k][The maximum number of failures to be observed.]]
 251 [[p][The probability of ['success] for each trial.]]
 252 [[alpha][The maximum acceptable ['risk] that more than ['k] failures will be observed.]]
 253 ]
 254
 255 For example:
 256
 257    geometric_distribution<RealType>::find_maximum_number_of_trials(0, 1.0-1.0/1000000, 0.05);
 258
 259 Returns the largest number of trials we can conduct and still be 95% sure
 260 of seeing no failures that occur with frequency one in one million.
 261
 262 This function uses numeric inversion of the geometric distribution
 263 to obtain the result: another interpretation of the result, is that it finds
 264 the number of trials that will lead to an /alpha/ probability
 265 of observing more than k failures.
 266
 267 [h4 Non-member Accessors]
 268
 269 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 270 that are generic to all distributions are supported: __usual_accessors.
 271
 272 However it's worth taking a moment to define what these actually mean in
 273 the context of this distribution:
 274
 275 [table Meaning of the non-member accessors.
 276 [[Function][Meaning]]
 277 [[__pdf]
 278    [The probability of obtaining [*exactly k failures] from /k/ trials
 279    with success fraction p.  For example:
 280
 281 ``pdf(geometric(p), k)``]]
 282 [[__cdf]
 283    [The probability of obtaining [*k failures or fewer] from /k/ trials
 284    with success fraction p and success on the last trial.  For example:
 285
 286 ``cdf(geometric(p), k)``]]
 287 [[__ccdf]
 288    [The probability of obtaining [*more than k failures] from /k/ trials
 289    with success fraction p and success on the last trial.  For example:
 290
 291 ``cdf(complement(geometric(p), k))``]]
 292 [[__quantile]
 293    [The [*greatest] number of failures /k/ expected to be observed from /k/ trials
 294    with success fraction /p/, at probability /P/.  Note that the value returned
 295    is a real-number, and not an integer.  Depending on the use case you may
 296    want to take either the floor or ceiling of the real result.  For example:
 297 ``quantile(geometric(p), P)``]]
 298 [[__quantile_c]
 299    [The [*smallest] number of failures /k/ expected to be observed from /k/ trials
 300    with success fraction /p/, at probability /P/.  Note that the value returned
 301    is a real-number, and not an integer.  Depending on the use case you may
 302    want to take either the floor or ceiling of the real result. For example:
 303    ``quantile(complement(geometric(p), P))``]]
 304 ]
 305
 306 [h4 Accuracy]
 307
 308 This distribution is implemented using the pow and exp functions, so most results
 309 are accurate within a few epsilon for the RealType.
 310 For extreme values of `double` /p/, for example 0.9999999999,
 311 accuracy can fall significantly, for example to 10 decimal digits (from 16).
 312
 313 [h4 Implementation]
 314
 315 In the following table, /p/ is the probability that any one trial will
 316 be successful (the success fraction), /k/ is the number of failures,
 317 /p/ is the probability and /q = 1-p/,
 318 /x/ is the given probability to estimate
 319 the expected number of failures using the quantile.
 320
 321 [table
 322 [[Function][Implementation Notes]]
 323 [[pdf][pdf =  p * pow(q, k)]]
 324 [[cdf][cdf = 1 - q[super k=1]]]
 325 [[cdf complement][exp(log1p(-p) * (k+1))]]
 326 [[quantile][k = log1p(-x) / log1p(-p) -1]]
 327 [[quantile from the complement][k = log(x) / log1p(-p) -1]]
 328 [[mean][(1-p)/p]]
 329 [[variance][(1-p)/p[sup2]]]
 330 [[mode][0]]
 331 [[skewness][(2-p)/[sqrt]q]]
 332 [[kurtosis][9+p[sup2]/q]]
 333 [[kurtosis excess][6 +p[sup2]/q]]
 334 [[parameter estimation member functions][See __negative_binomial_distrib]]
 335 [[`find_lower_bound_on_p`][See __negative_binomial_distrib]]
 336 [[`find_upper_bound_on_p`][See __negative_binomial_distrib]]
 337 [[`find_minimum_number_of_trials`][See __negative_binomial_distrib]]
 338 [[`find_maximum_number_of_trials`][See __negative_binomial_distrib]]
 339 ]
 340
 341 [endsect] [/section:geometric_dist geometric]
 342
 343 [/ geometric.qbk
 344   Copyright 2010 John Maddock and Paul A. Bristow.
 345   Distributed under the Boost Software License, Version 1.0.
 346   (See accompanying file LICENSE_1_0.txt or copy at
 347   http://www.boost.org/LICENSE_1_0.txt).
 348 ]
 349