2 * (C) Copyright Nick Thompson 2018.
3 * Use, modification and distribution are subject to the
4 * Boost Software License, Version 1.0. (See accompanying file
5 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
10 #include <forward_list>
13 #include <boost/core/lightweight_test.hpp>
14 #include <boost/numeric/ublas/vector.hpp>
15 #include <boost/math/constants/constants.hpp>
16 #include <boost/math/statistics/univariate_statistics.hpp>
17 #include <boost/multiprecision/cpp_bin_float.hpp>
18 #include <boost/multiprecision/cpp_complex.hpp>
20 using boost::multiprecision::cpp_bin_float_50;
21 using boost::multiprecision::cpp_complex_50;
25 * 1) Does it work with multiprecision?
26 * 2) Does it work with .cbegin()/.cend() if the data is not altered?
27 * 3) Does it work with ublas and std::array? (Checking Eigen and Armadillo will make the CI system really unhappy.)
28 * 4) Does it work with std::forward_list if a forward iterator is all that is required?
29 * 5) Does it work with complex data if complex data is sensible?
32 // To stress test, set global_seed = 0, global_size = huge.
33 static const constexpr size_t global_seed = 0;
34 static const constexpr size_t global_size = 128;
37 std::vector<T> generate_random_vector(size_t size, size_t seed)
41 std::random_device rd;
44 std::vector<T> v(size);
46 std::mt19937 gen(seed);
48 if constexpr (std::is_floating_point<T>::value)
50 std::normal_distribution<T> dis(0, 1);
51 for (size_t i = 0; i < v.size(); ++i)
57 else if constexpr (std::is_integral<T>::value)
59 // Rescaling by larger than 2 is UB!
60 std::uniform_int_distribution<T> dis(std::numeric_limits<T>::lowest()/2, (std::numeric_limits<T>::max)()/2);
61 for (size_t i = 0; i < v.size(); ++i)
67 else if constexpr (boost::is_complex<T>::value)
69 std::normal_distribution<typename T::value_type> dis(0, 1);
70 for (size_t i = 0; i < v.size(); ++i)
72 v[i] = {dis(gen), dis(gen)};
76 else if constexpr (boost::multiprecision::number_category<T>::value == boost::multiprecision::number_kind_complex)
78 std::normal_distribution<long double> dis(0, 1);
79 for (size_t i = 0; i < v.size(); ++i)
81 v[i] = {dis(gen), dis(gen)};
85 else if constexpr (boost::multiprecision::number_category<T>::value == boost::multiprecision::number_kind_floating_point)
87 std::normal_distribution<long double> dis(0, 1);
88 for (size_t i = 0; i < v.size(); ++i)
96 BOOST_ASSERT_MSG(false, "Could not identify type for random vector generation.");
103 void test_integer_mean()
105 double tol = 100*std::numeric_limits<double>::epsilon();
106 std::vector<Z> v{1,2,3,4,5};
107 double mu = boost::math::statistics::mean(v);
108 BOOST_TEST(abs(mu - 3) < tol);
110 // Work with std::array?
111 std::array<Z, 5> w{1,2,3,4,5};
112 mu = boost::math::statistics::mean(w);
113 BOOST_TEST(abs(mu - 3) < tol);
115 v = generate_random_vector<Z>(global_size, global_seed);
118 double m1 = scale*boost::math::statistics::mean(v);
123 double m2 = boost::math::statistics::mean(v);
124 BOOST_TEST(abs(m1 - m2) < tol*abs(m1));
127 template<class RandomAccessContainer>
128 auto naive_mean(RandomAccessContainer const & v)
130 typename RandomAccessContainer::value_type sum = 0;
141 Real tol = std::numeric_limits<Real>::epsilon();
142 std::vector<Real> v{1,2,3,4,5};
143 Real mu = boost::math::statistics::mean(v.begin(), v.end());
144 BOOST_TEST(abs(mu - 3) < tol);
146 // Does range call work?
147 mu = boost::math::statistics::mean(v);
148 BOOST_TEST(abs(mu - 3) < tol);
150 // Can we successfully average only part of the vector?
151 mu = boost::math::statistics::mean(v.begin(), v.begin() + 3);
152 BOOST_TEST(abs(mu - 2) < tol);
154 // Does it work when we const qualify?
155 mu = boost::math::statistics::mean(v.cbegin(), v.cend());
156 BOOST_TEST(abs(mu - 3) < tol);
158 // Does it work for std::array?
159 std::array<Real, 7> u{1,2,3,4,5,6,7};
160 mu = boost::math::statistics::mean(u.begin(), u.end());
161 BOOST_TEST(abs(mu - 4) < tol);
163 // Does it work for a forward iterator?
164 std::forward_list<Real> l{1,2,3,4,5,6,7};
165 mu = boost::math::statistics::mean(l.begin(), l.end());
166 BOOST_TEST(abs(mu - 4) < tol);
168 // Does it work with ublas vectors?
169 boost::numeric::ublas::vector<Real> w(7);
170 for (size_t i = 0; i < w.size(); ++i)
174 mu = boost::math::statistics::mean(w.cbegin(), w.cend());
175 BOOST_TEST(abs(mu - 4) < tol);
177 v = generate_random_vector<Real>(global_size, global_seed);
179 Real m1 = scale*boost::math::statistics::mean(v);
184 Real m2 = boost::math::statistics::mean(v);
185 BOOST_TEST(abs(m1 - m2) < tol*abs(m1));
188 for (size_t i = 1; i < 30; ++i)
190 v = generate_random_vector<Real>(i, 12803);
191 auto naive_ = naive_mean(v);
192 auto higham_ = boost::math::statistics::mean(v);
193 if (abs(higham_ - naive_) >= 100*tol*abs(naive_))
195 std::cout << std::hexfloat;
196 std::cout << "Terms = " << v.size() << "\n";
197 std::cout << "higham = " << higham_ << "\n";
198 std::cout << "naive_ = " << naive_ << "\n";
200 BOOST_TEST(abs(higham_ - naive_) < 100*tol*abs(naive_));
205 template<class Complex>
206 void test_complex_mean()
208 typedef typename Complex::value_type Real;
209 Real tol = std::numeric_limits<Real>::epsilon();
210 std::vector<Complex> v{{0,1},{0,2},{0,3},{0,4},{0,5}};
211 auto mu = boost::math::statistics::mean(v.begin(), v.end());
212 BOOST_TEST(abs(mu.imag() - 3) < tol);
213 BOOST_TEST(abs(mu.real()) < tol);
216 mu = boost::math::statistics::mean(v);
217 BOOST_TEST(abs(mu.imag() - 3) < tol);
218 BOOST_TEST(abs(mu.real()) < tol);
224 Real tol = std::numeric_limits<Real>::epsilon();
225 std::vector<Real> v{1,1,1,1,1,1};
226 Real sigma_sq = boost::math::statistics::variance(v.begin(), v.end());
227 BOOST_TEST(abs(sigma_sq) < tol);
229 sigma_sq = boost::math::statistics::variance(v);
230 BOOST_TEST(abs(sigma_sq) < tol);
232 Real s_sq = boost::math::statistics::sample_variance(v);
233 BOOST_TEST(abs(s_sq) < tol);
235 std::vector<Real> u{1};
236 sigma_sq = boost::math::statistics::variance(u.cbegin(), u.cend());
237 BOOST_TEST(abs(sigma_sq) < tol);
239 std::array<Real, 8> w{0,1,0,1,0,1,0,1};
240 sigma_sq = boost::math::statistics::variance(w.begin(), w.end());
241 BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol);
243 sigma_sq = boost::math::statistics::variance(w);
244 BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol);
246 std::forward_list<Real> l{0,1,0,1,0,1,0,1};
247 sigma_sq = boost::math::statistics::variance(l.begin(), l.end());
248 BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol);
250 v = generate_random_vector<Real>(global_size, global_seed);
252 Real m1 = scale*scale*boost::math::statistics::variance(v);
257 Real m2 = boost::math::statistics::variance(v);
258 BOOST_TEST(abs(m1 - m2) < tol*abs(m1));
260 // Wikipedia example for a variance of N sided die:
261 // https://en.wikipedia.org/wiki/Variance
262 for (size_t j = 16; j < 2048; j *= 2)
266 for (size_t i = 0; i < v.size(); ++i)
271 sigma_sq = boost::math::statistics::variance(v);
273 BOOST_TEST(abs(sigma_sq - (n*n-1)/Real(12)) <= tol*sigma_sq);
279 void test_integer_variance()
281 double tol = std::numeric_limits<double>::epsilon();
282 std::vector<Z> v{1,1,1,1,1,1};
283 double sigma_sq = boost::math::statistics::variance(v);
284 BOOST_TEST(abs(sigma_sq) < tol);
286 std::forward_list<Z> l{0,1,0,1,0,1,0,1};
287 sigma_sq = boost::math::statistics::variance(l.begin(), l.end());
288 BOOST_TEST(abs(sigma_sq - 1.0/4.0) < tol);
290 v = generate_random_vector<Z>(global_size, global_seed);
292 double m1 = scale*scale*boost::math::statistics::variance(v);
297 double m2 = boost::math::statistics::variance(v);
298 BOOST_TEST(abs(m1 - m2) < tol*abs(m1));
302 void test_integer_skewness()
304 double tol = std::numeric_limits<double>::epsilon();
305 std::vector<Z> v{1,1,1};
306 double skew = boost::math::statistics::skewness(v);
307 BOOST_TEST(abs(skew) < tol);
309 // Dataset is symmetric about the mean:
311 skew = boost::math::statistics::skewness(v);
312 BOOST_TEST(abs(skew) < tol);
315 // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2
316 skew = boost::math::statistics::skewness(v);
317 BOOST_TEST(abs(skew - 3.0/2.0) < tol);
319 std::forward_list<Z> v2{0,0,0,0,5};
320 skew = boost::math::statistics::skewness(v);
321 BOOST_TEST(abs(skew - 3.0/2.0) < tol);
324 v = generate_random_vector<Z>(global_size, global_seed);
326 double m1 = boost::math::statistics::skewness(v);
331 double m2 = boost::math::statistics::skewness(v);
332 BOOST_TEST(abs(m1 - m2) < tol*abs(m1));
339 Real tol = std::numeric_limits<Real>::epsilon();
340 std::vector<Real> v{1,1,1};
341 Real skew = boost::math::statistics::skewness(v);
342 BOOST_TEST(abs(skew) < tol);
344 // Dataset is symmetric about the mean:
346 skew = boost::math::statistics::skewness(v);
347 BOOST_TEST(abs(skew) < tol);
350 // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2
351 skew = boost::math::statistics::skewness(v);
352 BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol);
354 std::array<Real, 5> w1{0,0,0,0,5};
355 skew = boost::math::statistics::skewness(w1);
356 BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol);
358 std::forward_list<Real> w2{0,0,0,0,5};
359 skew = boost::math::statistics::skewness(w2);
360 BOOST_TEST(abs(skew - Real(3)/Real(2)) < tol);
362 v = generate_random_vector<Real>(global_size, global_seed);
364 Real m1 = boost::math::statistics::skewness(v);
369 Real m2 = boost::math::statistics::skewness(v);
370 BOOST_TEST(abs(m1 - m2) < tol*abs(m1));
376 Real tol = std::numeric_limits<Real>::epsilon();
377 std::vector<Real> v{1,1,1};
378 Real kurt = boost::math::statistics::kurtosis(v);
379 BOOST_TEST(abs(kurt) < tol);
382 // mu =1, sigma^2 = 2, kurtosis = 17/10
383 kurt = boost::math::statistics::kurtosis(v);
384 BOOST_TEST(abs(kurt - Real(17)/Real(10)) < tol);
387 // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4
388 kurt = boost::math::statistics::kurtosis(v);
389 BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol);
391 std::array<Real, 5> v1{0,0,0,0,5};
392 kurt = boost::math::statistics::kurtosis(v1);
393 BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol);
395 std::forward_list<Real> v2{0,0,0,0,5};
396 kurt = boost::math::statistics::kurtosis(v2);
397 BOOST_TEST(abs(kurt - Real(13)/Real(4)) < tol);
399 std::vector<Real> v3(10000);
400 std::mt19937 gen(42);
401 std::normal_distribution<long double> dis(0, 1);
402 for (size_t i = 0; i < v3.size(); ++i) {
405 kurt = boost::math::statistics::kurtosis(v3);
406 BOOST_TEST(abs(kurt - 3) < 0.1);
408 std::uniform_real_distribution<long double> udis(-1, 3);
409 for (size_t i = 0; i < v3.size(); ++i) {
412 auto excess_kurtosis = boost::math::statistics::excess_kurtosis(v3);
413 BOOST_TEST(abs(excess_kurtosis + 6.0/5.0) < 0.2);
415 v = generate_random_vector<Real>(global_size, global_seed);
417 Real m1 = boost::math::statistics::kurtosis(v);
422 Real m2 = boost::math::statistics::kurtosis(v);
423 BOOST_TEST(abs(m1 - m2) < tol*abs(m1));
425 // This test only passes when there are a large number of samples.
426 // Otherwise, the distribution doesn't generate enough outliers to give,
427 // or generates too many, giving pretty wildly different values of kurtosis on different runs.
428 // However, by kicking up the samples to 1,000,000, I got very close to 6 for the excess kurtosis on every run.
429 // The CI system, however, would die on a million long doubles.
430 //v3.resize(1000000);
431 //std::exponential_distribution<long double> edis(0.1);
432 //for (size_t i = 0; i < v3.size(); ++i) {
433 // v3[i] = edis(gen);
435 //excess_kurtosis = boost::math::statistics::kurtosis(v3) - 3;
436 //BOOST_TEST(abs(excess_kurtosis - 6.0) < 0.2);
440 void test_integer_kurtosis()
442 double tol = std::numeric_limits<double>::epsilon();
443 std::vector<Z> v{1,1,1};
444 double kurt = boost::math::statistics::kurtosis(v);
445 BOOST_TEST(abs(kurt) < tol);
448 // mu =1, sigma^2 = 2, kurtosis = 17/10
449 kurt = boost::math::statistics::kurtosis(v);
450 BOOST_TEST(abs(kurt - 17.0/10.0) < tol);
453 // mu = 1, sigma^2 = 4, sigma = 2, skew = 3/2, kurtosis = 13/4
454 kurt = boost::math::statistics::kurtosis(v);
455 BOOST_TEST(abs(kurt - 13.0/4.0) < tol);
457 v = generate_random_vector<Z>(global_size, global_seed);
459 double m1 = boost::math::statistics::kurtosis(v);
464 double m2 = boost::math::statistics::kurtosis(v);
465 BOOST_TEST(abs(m1 - m2) < tol*abs(m1));
469 void test_first_four_moments()
471 Real tol = 10*std::numeric_limits<Real>::epsilon();
472 std::vector<Real> v{1,1,1};
473 auto [M1_1, M2_1, M3_1, M4_1] = boost::math::statistics::first_four_moments(v);
474 BOOST_TEST(abs(M1_1 - 1) < tol);
475 BOOST_TEST(abs(M2_1) < tol);
476 BOOST_TEST(abs(M3_1) < tol);
477 BOOST_TEST(abs(M4_1) < tol);
480 auto [M1_2, M2_2, M3_2, M4_2] = boost::math::statistics::first_four_moments(v);
481 BOOST_TEST(abs(M1_2 - 3) < tol);
482 BOOST_TEST(abs(M2_2 - 2) < tol);
483 BOOST_TEST(abs(M3_2) < tol);
484 BOOST_TEST(abs(M4_2 - Real(34)/Real(5)) < tol);
491 std::vector<Real> v{1,2,3,4,5,6,7};
493 Real m = boost::math::statistics::median(v.begin(), v.end());
496 std::shuffle(v.begin(), v.end(), g);
497 // Does range call work?
498 m = boost::math::statistics::median(v);
502 m = boost::math::statistics::median(v.begin(), v.end());
504 std::shuffle(v.begin(), v.end(), g);
505 m = boost::math::statistics::median(v.begin(), v.end());
509 m = boost::math::statistics::median(v.begin(), v.end());
513 m = boost::math::statistics::median(v.begin(), v.end());
517 m = boost::math::statistics::median(v.begin(), v.end());
521 m = boost::math::statistics::median(v.begin(), v.end());
525 m = boost::math::statistics::median(v.begin(), v.end());
527 std::shuffle(v.begin(), v.end(), g);
528 m = boost::math::statistics::median(v.begin(), v.end());
531 // Does it work with std::array?
532 std::array<Real, 3> w{1,2,3};
533 m = boost::math::statistics::median(w);
536 // Does it work with ublas?
537 boost::numeric::ublas::vector<Real> w1(3);
541 m = boost::math::statistics::median(w);
546 void test_median_absolute_deviation()
548 std::vector<Real> v{-1, 2, -3, 4, -5, 6, -7};
550 Real m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
554 std::shuffle(v.begin(), v.end(), g);
555 m = boost::math::statistics::median_absolute_deviation(v, 0);
558 v = {1, -2, -3, 3, -4, -5};
559 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
561 std::shuffle(v.begin(), v.end(), g);
562 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
566 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
570 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
572 // The median is zero, so coincides with the default:
573 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end());
576 m = boost::math::statistics::median_absolute_deviation(v);
581 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
585 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
589 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
591 std::shuffle(v.begin(), v.end(), g);
592 m = boost::math::statistics::median_absolute_deviation(v.begin(), v.end(), 0);
595 std::array<Real, 3> w{1, 2, -3};
596 m = boost::math::statistics::median_absolute_deviation(w, 0);
599 // boost.ublas vector?
600 boost::numeric::ublas::vector<Real> u(6);
607 m = boost::math::statistics::median_absolute_deviation(u, 0);
613 void test_sample_gini_coefficient()
615 Real tol = std::numeric_limits<Real>::epsilon();
616 std::vector<Real> v{1,0,0};
617 Real gini = boost::math::statistics::sample_gini_coefficient(v.begin(), v.end());
618 BOOST_TEST(abs(gini - 1) < tol);
620 gini = boost::math::statistics::sample_gini_coefficient(v);
621 BOOST_TEST(abs(gini - 1) < tol);
626 gini = boost::math::statistics::sample_gini_coefficient(v.begin(), v.end());
627 BOOST_TEST(abs(gini) < tol);
632 gini = boost::math::statistics::sample_gini_coefficient(v.begin(), v.end());
633 BOOST_TEST(abs(gini) < tol);
635 std::array<Real, 3> w{0,0,0};
636 gini = boost::math::statistics::sample_gini_coefficient(w);
637 BOOST_TEST(abs(gini) < tol);
642 void test_gini_coefficient()
644 Real tol = std::numeric_limits<Real>::epsilon();
645 std::vector<Real> v{1,0,0};
646 Real gini = boost::math::statistics::gini_coefficient(v.begin(), v.end());
647 Real expected = Real(2)/Real(3);
648 BOOST_TEST(abs(gini - expected) < tol);
650 gini = boost::math::statistics::gini_coefficient(v);
651 BOOST_TEST(abs(gini - expected) < tol);
656 gini = boost::math::statistics::gini_coefficient(v.begin(), v.end());
657 BOOST_TEST(abs(gini) < tol);
662 gini = boost::math::statistics::gini_coefficient(v.begin(), v.end());
663 BOOST_TEST(abs(gini) < tol);
665 std::array<Real, 3> w{0,0,0};
666 gini = boost::math::statistics::gini_coefficient(w);
667 BOOST_TEST(abs(gini) < tol);
669 boost::numeric::ublas::vector<Real> w1(3);
673 gini = boost::math::statistics::gini_coefficient(w1);
674 BOOST_TEST(abs(gini) < tol);
676 std::mt19937 gen(18);
677 // Gini coefficient for a uniform distribution is (b-a)/(3*(b+a));
678 std::uniform_real_distribution<long double> dis(0, 3);
679 expected = (dis.b() - dis.a())/(3*(dis.b()+ dis.a()));
681 for(size_t i = 0; i < v.size(); ++i)
685 gini = boost::math::statistics::gini_coefficient(v);
686 BOOST_TEST(abs(gini - expected) < 0.01);
691 void test_integer_gini_coefficient()
693 double tol = std::numeric_limits<double>::epsilon();
694 std::vector<Z> v{1,0,0};
695 double gini = boost::math::statistics::gini_coefficient(v.begin(), v.end());
696 double expected = 2.0/3.0;
697 BOOST_TEST(abs(gini - expected) < tol);
699 gini = boost::math::statistics::gini_coefficient(v);
700 BOOST_TEST(abs(gini - expected) < tol);
705 gini = boost::math::statistics::gini_coefficient(v.begin(), v.end());
706 BOOST_TEST(abs(gini) < tol);
711 gini = boost::math::statistics::gini_coefficient(v.begin(), v.end());
712 BOOST_TEST(abs(gini) < tol);
714 std::array<Z, 3> w{0,0,0};
715 gini = boost::math::statistics::gini_coefficient(w);
716 BOOST_TEST(abs(gini) < tol);
718 boost::numeric::ublas::vector<Z> w1(3);
722 gini = boost::math::statistics::gini_coefficient(w1);
723 BOOST_TEST(abs(gini) < tol);
730 test_mean<long double>();
731 test_mean<cpp_bin_float_50>();
733 test_integer_mean<unsigned>();
734 test_integer_mean<int>();
736 test_complex_mean<std::complex<float>>();
737 test_complex_mean<cpp_complex_50>();
739 test_variance<float>();
740 test_variance<double>();
741 test_variance<long double>();
742 test_variance<cpp_bin_float_50>();
744 test_integer_variance<int>();
745 test_integer_variance<unsigned>();
747 test_skewness<float>();
748 test_skewness<double>();
749 test_skewness<long double>();
750 test_skewness<cpp_bin_float_50>();
752 test_integer_skewness<int>();
753 test_integer_skewness<unsigned>();
755 test_first_four_moments<float>();
756 test_first_four_moments<double>();
757 test_first_four_moments<long double>();
758 test_first_four_moments<cpp_bin_float_50>();
760 test_kurtosis<float>();
761 test_kurtosis<double>();
762 test_kurtosis<long double>();
764 //test_kurtosis<cpp_bin_float_50>();
766 test_integer_kurtosis<int>();
767 test_integer_kurtosis<unsigned>();
769 test_median<float>();
770 test_median<double>();
771 test_median<long double>();
772 test_median<cpp_bin_float_50>();
775 test_median_absolute_deviation<float>();
776 test_median_absolute_deviation<double>();
777 test_median_absolute_deviation<long double>();
778 test_median_absolute_deviation<cpp_bin_float_50>();
780 test_gini_coefficient<float>();
781 test_gini_coefficient<double>();
782 test_gini_coefficient<long double>();
783 test_gini_coefficient<cpp_bin_float_50>();
785 test_integer_gini_coefficient<unsigned>();
786 test_integer_gini_coefficient<int>();
788 test_sample_gini_coefficient<float>();
789 test_sample_gini_coefficient<double>();
790 test_sample_gini_coefficient<long double>();
791 test_sample_gini_coefficient<cpp_bin_float_50>();
793 return boost::report_errors();