1 // (C) Copyright Nick Thompson 2018.
2 // Use, modification and distribution are subject to the
3 // Boost Software License, Version 1.0. (See accompanying file
4 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
7 #define BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
12 #include <boost/assert.hpp>
13 #include <boost/config/header_deprecated.hpp>
15 BOOST_HEADER_DEPRECATED("<boost/math/statistics/univariate_statistics.hpp>");
17 namespace boost::math::tools {
19 template<class ForwardIterator>
20 auto mean(ForwardIterator first, ForwardIterator last)
22 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
23 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
24 if constexpr (std::is_integral<Real>::value)
28 for(auto it = first; it != last; ++it) {
29 mu = mu + (*it - mu)/i;
34 else if constexpr (std::is_same_v<typename std::iterator_traits<ForwardIterator>::iterator_category, std::random_access_iterator_tag>)
36 size_t elements = std::distance(first, last);
42 auto end = last - (elements % 4);
43 for(auto it = first; it != end; it += 4) {
45 Real tmp0 = (*it - mu0);
46 Real tmp1 = (*(it+1) - mu1);
47 Real tmp2 = (*(it+2) - mu2);
48 Real tmp3 = (*(it+3) - mu3);
49 // please generate a vectorized fma here
56 Real num1 = Real(elements - (elements %4))/Real(4);
57 Real num2 = num1 + Real(elements % 4);
59 for (auto it = end; it != last; ++it)
65 return (num1*(mu0+mu1+mu2) + num2*mu3)/Real(elements);
81 template<class Container>
82 inline auto mean(Container const & v)
84 return mean(v.cbegin(), v.cend());
87 template<class ForwardIterator>
88 auto variance(ForwardIterator first, ForwardIterator last)
90 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
91 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
92 // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
93 if constexpr (std::is_integral<Real>::value)
98 for (auto it = std::next(first); it != last; ++it)
100 double tmp = *it - M;
101 Q = Q + ((k-1)*tmp*tmp)/k;
112 for (auto it = std::next(first); it != last; ++it)
114 Real tmp = (*it - M)/k;
115 Q += k*(k-1)*tmp*tmp;
123 template<class Container>
124 inline auto variance(Container const & v)
126 return variance(v.cbegin(), v.cend());
129 template<class ForwardIterator>
130 auto sample_variance(ForwardIterator first, ForwardIterator last)
132 size_t n = std::distance(first, last);
133 BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance.");
134 return n*variance(first, last)/(n-1);
137 template<class Container>
138 inline auto sample_variance(Container const & v)
140 return sample_variance(v.cbegin(), v.cend());
144 // Follows equation 1.5 of:
145 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
146 template<class ForwardIterator>
147 auto skewness(ForwardIterator first, ForwardIterator last)
149 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
150 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness.");
151 if constexpr (std::is_integral<Real>::value)
157 for (auto it = std::next(first); it != last; ++it)
159 double delta21 = *it - M1;
160 double tmp = delta21/n;
161 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
162 M2 = M2 + tmp*(n-1)*delta21;
167 double var = M2/(n-1);
170 // The limit is technically undefined, but the interpretation here is clear:
171 // A constant dataset has no skewness.
174 double skew = M3/(M2*sqrt(var));
183 for (auto it = std::next(first); it != last; ++it)
185 Real delta21 = *it - M1;
186 Real tmp = delta21/n;
187 M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
188 M2 += tmp*(n-1)*delta21;
196 // The limit is technically undefined, but the interpretation here is clear:
197 // A constant dataset has no skewness.
200 Real skew = M3/(M2*sqrt(var));
205 template<class Container>
206 inline auto skewness(Container const & v)
208 return skewness(v.cbegin(), v.cend());
211 // Follows equation 1.5/1.6 of:
212 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
213 template<class ForwardIterator>
214 auto first_four_moments(ForwardIterator first, ForwardIterator last)
216 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
217 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments.");
218 if constexpr (std::is_integral<Real>::value)
225 for (auto it = std::next(first); it != last; ++it)
227 double delta21 = *it - M1;
228 double tmp = delta21/n;
229 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
230 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
231 M2 = M2 + tmp*(n-1)*delta21;
236 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
245 for (auto it = std::next(first); it != last; ++it)
247 Real delta21 = *it - M1;
248 Real tmp = delta21/n;
249 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
250 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
251 M2 = M2 + tmp*(n-1)*delta21;
256 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
260 template<class Container>
261 inline auto first_four_moments(Container const & v)
263 return first_four_moments(v.cbegin(), v.cend());
267 // Follows equation 1.6 of:
268 // https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
269 template<class ForwardIterator>
270 auto kurtosis(ForwardIterator first, ForwardIterator last)
272 auto [M1, M2, M3, M4] = first_four_moments(first, last);
280 template<class Container>
281 inline auto kurtosis(Container const & v)
283 return kurtosis(v.cbegin(), v.cend());
286 template<class ForwardIterator>
287 auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
289 return kurtosis(first, last) - 3;
292 template<class Container>
293 inline auto excess_kurtosis(Container const & v)
295 return excess_kurtosis(v.cbegin(), v.cend());
299 template<class RandomAccessIterator>
300 auto median(RandomAccessIterator first, RandomAccessIterator last)
302 size_t num_elems = std::distance(first, last);
303 BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
306 auto middle = first + (num_elems - 1)/2;
307 std::nth_element(first, middle, last);
312 auto middle = first + num_elems/2 - 1;
313 std::nth_element(first, middle, last);
314 std::nth_element(middle, middle+1, last);
315 return (*middle + *(middle+1))/2;
320 template<class RandomAccessContainer>
321 inline auto median(RandomAccessContainer & v)
323 return median(v.begin(), v.end());
326 template<class RandomAccessIterator>
327 auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
329 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
330 BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
332 std::sort(first, last);
333 if constexpr (std::is_integral<Real>::value)
338 for (auto it = first; it != last; ++it)
345 // If the l1 norm is zero, all elements are zero, so every element is the same.
351 return ((2*num)/denom - i)/(i-1);
358 for (auto it = first; it != last; ++it)
365 // If the l1 norm is zero, all elements are zero, so every element is the same.
371 return ((2*num)/denom - i)/(i-1);
375 template<class RandomAccessContainer>
376 inline auto gini_coefficient(RandomAccessContainer & v)
378 return gini_coefficient(v.begin(), v.end());
381 template<class RandomAccessIterator>
382 inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
384 size_t n = std::distance(first, last);
385 return n*gini_coefficient(first, last)/(n-1);
388 template<class RandomAccessContainer>
389 inline auto sample_gini_coefficient(RandomAccessContainer & v)
391 return sample_gini_coefficient(v.begin(), v.end());
394 template<class RandomAccessIterator>
395 auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN())
398 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
402 center = boost::math::tools::median(first, last);
404 size_t num_elems = std::distance(first, last);
405 BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined.");
406 auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);};
409 auto middle = first + (num_elems - 1)/2;
410 std::nth_element(first, middle, last, comparator);
415 auto middle = first + num_elems/2 - 1;
416 std::nth_element(first, middle, last, comparator);
417 std::nth_element(middle, middle+1, last, comparator);
418 return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2));
422 template<class RandomAccessContainer>
423 inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN())
425 return median_absolute_deviation(v.begin(), v.end(), center);