1 // (C) Copyright Nick Thompson 2018.
2 // Use, modification and distribution are subject to the
3 // Boost Software License, Version 1.0. (See accompanying file
4 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP
7 #define BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP
11 #include <boost/assert.hpp>
13 namespace boost::math::statistics {
15 template<class ForwardIterator>
16 auto mean(ForwardIterator first, ForwardIterator last)
18 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
19 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
20 if constexpr (std::is_integral<Real>::value)
24 for(auto it = first; it != last; ++it) {
25 mu = mu + (*it - mu)/i;
30 else if constexpr (std::is_same_v<typename std::iterator_traits<ForwardIterator>::iterator_category, std::random_access_iterator_tag>)
32 size_t elements = std::distance(first, last);
38 auto end = last - (elements % 4);
39 for(auto it = first; it != end; it += 4) {
41 Real tmp0 = (*it - mu0);
42 Real tmp1 = (*(it+1) - mu1);
43 Real tmp2 = (*(it+2) - mu2);
44 Real tmp3 = (*(it+3) - mu3);
45 // please generate a vectorized fma here
52 Real num1 = Real(elements - (elements %4))/Real(4);
53 Real num2 = num1 + Real(elements % 4);
55 for (auto it = end; it != last; ++it)
61 return (num1*(mu0+mu1+mu2) + num2*mu3)/Real(elements);
77 template<class Container>
78 inline auto mean(Container const & v)
80 return mean(v.cbegin(), v.cend());
83 template<class ForwardIterator>
84 auto variance(ForwardIterator first, ForwardIterator last)
86 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
87 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
88 // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
89 if constexpr (std::is_integral<Real>::value)
94 for (auto it = std::next(first); it != last; ++it)
97 Q = Q + ((k-1)*tmp*tmp)/k;
108 for (auto it = std::next(first); it != last; ++it)
110 Real tmp = (*it - M)/k;
111 Q += k*(k-1)*tmp*tmp;
119 template<class Container>
120 inline auto variance(Container const & v)
122 return variance(v.cbegin(), v.cend());
125 template<class ForwardIterator>
126 auto sample_variance(ForwardIterator first, ForwardIterator last)
128 size_t n = std::distance(first, last);
129 BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance.");
130 return n*variance(first, last)/(n-1);
133 template<class Container>
134 inline auto sample_variance(Container const & v)
136 return sample_variance(v.cbegin(), v.cend());
139 template<class ForwardIterator>
140 auto mean_and_sample_variance(ForwardIterator first, ForwardIterator last)
142 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
143 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
144 // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
145 if constexpr (std::is_integral<Real>::value)
150 for (auto it = std::next(first); it != last; ++it)
152 double tmp = *it - M;
153 Q = Q + ((k-1)*tmp*tmp)/k;
157 return std::pair<double, double>{M, Q/(k-2)};
164 for (auto it = std::next(first); it != last; ++it)
166 Real tmp = (*it - M)/k;
167 Q += k*(k-1)*tmp*tmp;
171 return std::pair<Real, Real>{M, Q/(k-2)};
175 template<class Container>
176 auto mean_and_sample_variance(Container const & v)
178 return mean_and_sample_variance(v.begin(), v.end());
181 // Follows equation 1.5 of:
182 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
183 template<class ForwardIterator>
184 auto skewness(ForwardIterator first, ForwardIterator last)
186 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
187 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness.");
188 if constexpr (std::is_integral<Real>::value)
194 for (auto it = std::next(first); it != last; ++it)
196 double delta21 = *it - M1;
197 double tmp = delta21/n;
198 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
199 M2 = M2 + tmp*(n-1)*delta21;
204 double var = M2/(n-1);
207 // The limit is technically undefined, but the interpretation here is clear:
208 // A constant dataset has no skewness.
211 double skew = M3/(M2*sqrt(var));
220 for (auto it = std::next(first); it != last; ++it)
222 Real delta21 = *it - M1;
223 Real tmp = delta21/n;
224 M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
225 M2 += tmp*(n-1)*delta21;
233 // The limit is technically undefined, but the interpretation here is clear:
234 // A constant dataset has no skewness.
237 Real skew = M3/(M2*sqrt(var));
242 template<class Container>
243 inline auto skewness(Container const & v)
245 return skewness(v.cbegin(), v.cend());
248 // Follows equation 1.5/1.6 of:
249 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
250 template<class ForwardIterator>
251 auto first_four_moments(ForwardIterator first, ForwardIterator last)
253 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
254 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments.");
255 if constexpr (std::is_integral<Real>::value)
262 for (auto it = std::next(first); it != last; ++it)
264 double delta21 = *it - M1;
265 double tmp = delta21/n;
266 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
267 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
268 M2 = M2 + tmp*(n-1)*delta21;
273 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
282 for (auto it = std::next(first); it != last; ++it)
284 Real delta21 = *it - M1;
285 Real tmp = delta21/n;
286 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
287 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
288 M2 = M2 + tmp*(n-1)*delta21;
293 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
297 template<class Container>
298 inline auto first_four_moments(Container const & v)
300 return first_four_moments(v.cbegin(), v.cend());
304 // Follows equation 1.6 of:
305 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
306 template<class ForwardIterator>
307 auto kurtosis(ForwardIterator first, ForwardIterator last)
309 auto [M1, M2, M3, M4] = first_four_moments(first, last);
317 template<class Container>
318 inline auto kurtosis(Container const & v)
320 return kurtosis(v.cbegin(), v.cend());
323 template<class ForwardIterator>
324 auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
326 return kurtosis(first, last) - 3;
329 template<class Container>
330 inline auto excess_kurtosis(Container const & v)
332 return excess_kurtosis(v.cbegin(), v.cend());
336 template<class RandomAccessIterator>
337 auto median(RandomAccessIterator first, RandomAccessIterator last)
339 size_t num_elems = std::distance(first, last);
340 BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
343 auto middle = first + (num_elems - 1)/2;
344 std::nth_element(first, middle, last);
349 auto middle = first + num_elems/2 - 1;
350 std::nth_element(first, middle, last);
351 std::nth_element(middle, middle+1, last);
352 return (*middle + *(middle+1))/2;
357 template<class RandomAccessContainer>
358 inline auto median(RandomAccessContainer & v)
360 return median(v.begin(), v.end());
363 template<class RandomAccessIterator>
364 auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
366 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
367 BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
369 std::sort(first, last);
370 if constexpr (std::is_integral<Real>::value)
375 for (auto it = first; it != last; ++it)
382 // If the l1 norm is zero, all elements are zero, so every element is the same.
388 return ((2*num)/denom - i)/(i-1);
395 for (auto it = first; it != last; ++it)
402 // If the l1 norm is zero, all elements are zero, so every element is the same.
408 return ((2*num)/denom - i)/(i-1);
412 template<class RandomAccessContainer>
413 inline auto gini_coefficient(RandomAccessContainer & v)
415 return gini_coefficient(v.begin(), v.end());
418 template<class RandomAccessIterator>
419 inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
421 size_t n = std::distance(first, last);
422 return n*gini_coefficient(first, last)/(n-1);
425 template<class RandomAccessContainer>
426 inline auto sample_gini_coefficient(RandomAccessContainer & v)
428 return sample_gini_coefficient(v.begin(), v.end());
431 template<class RandomAccessIterator>
432 auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN())
435 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
439 center = boost::math::statistics::median(first, last);
441 size_t num_elems = std::distance(first, last);
442 BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined.");
443 auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);};
446 auto middle = first + (num_elems - 1)/2;
447 std::nth_element(first, middle, last, comparator);
452 auto middle = first + num_elems/2 - 1;
453 std::nth_element(first, middle, last, comparator);
454 std::nth_element(middle, middle+1, last, comparator);
455 return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2));
459 template<class RandomAccessContainer>
460 inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN())
462 return median_absolute_deviation(v.begin(), v.end(), center);