1 // Copyright 2019 Hans Dembinski
3 // Distributed under the Boost Software License, Version 1.0.
4 // (See accompanying file LICENSE_1_0.txt
5 // or copy at http://www.boost.org/LICENSE_1_0.txt)
7 #ifndef BOOST_HISTOGRAM_DETAIL_FILL_N_HPP
8 #define BOOST_HISTOGRAM_DETAIL_FILL_N_HPP
11 #include <boost/assert.hpp>
12 #include <boost/histogram/axis/option.hpp>
13 #include <boost/histogram/axis/traits.hpp>
14 #include <boost/histogram/detail/axes.hpp>
15 #include <boost/histogram/detail/detect.hpp>
16 #include <boost/histogram/detail/fill.hpp>
17 #include <boost/histogram/detail/linearize.hpp>
18 #include <boost/histogram/detail/non_member_container_access.hpp>
19 #include <boost/histogram/detail/optional_index.hpp>
20 #include <boost/histogram/detail/span.hpp>
21 #include <boost/histogram/detail/static_if.hpp>
22 #include <boost/histogram/fwd.hpp>
23 #include <boost/mp11/algorithm.hpp>
24 #include <boost/mp11/bind.hpp>
25 #include <boost/mp11/utility.hpp>
26 #include <boost/throw_exception.hpp>
27 #include <boost/variant2/variant.hpp>
29 #include <type_traits>
36 namespace dtl = boost::histogram::detail;
38 template <class Axes, class T>
39 using is_convertible_to_any_value_type =
40 mp11::mp_any_of_q<value_types<Axes>, mp11::mp_bind_front<std::is_convertible, T>>;
42 template <class... Ts>
43 void fold(Ts&&...) noexcept {} // helper to enable operator folding
46 auto to_ptr_size(const T& x) {
47 return static_if<std::is_scalar<T>>(
48 [](const auto& x) { return std::make_pair(&x, static_cast<std::size_t>(0)); },
49 [](const auto& x) { return std::make_pair(dtl::data(x), dtl::size(x)); }, x);
52 template <class F, class V>
53 decltype(auto) maybe_visit(F&& f, V&& v) {
54 return static_if<is_variant<std::decay_t<V>>>(
55 [](auto&& f, auto&& v) {
56 return variant2::visit(std::forward<F>(f), std::forward<V>(v));
58 [](auto&& f, auto&& v) { return std::forward<F>(f)(std::forward<V>(v)); },
59 std::forward<F>(f), std::forward<V>(v));
62 template <class Index, class Axis, class IsGrowing>
63 struct index_visitor {
64 using index_type = Index;
65 using pointer = index_type*;
66 using value_type = axis::traits::value_type<Axis>;
67 using Opt = axis::traits::static_options<Axis>;
70 const std::size_t stride_, start_, size_; // start and size of value collection
72 axis::index_type* shift_;
74 index_visitor(Axis& a, std::size_t& str, const std::size_t& sta, const std::size_t& si,
75 const pointer it, axis::index_type* shift)
76 : axis_(a), stride_(str), start_(sta), size_(si), begin_(it), shift_(shift) {}
79 void call_2(std::true_type, pointer it, const T& x) const {
80 // must use this code for all axes if one of them is growing
81 axis::index_type shift;
82 linearize_growth(*it, shift, stride_, axis_,
83 try_cast<value_type, std::invalid_argument>(x));
84 if (shift > 0) { // shift previous indices, because axis zero-point has changed
85 while (it != begin_) *--it += static_cast<std::size_t>(shift) * stride_;
91 void call_2(std::false_type, pointer it, const T& x) const {
93 linearize(*it, stride_, axis_, try_cast<value_type, std::invalid_argument>(x));
97 void call_1(std::false_type, const T& iterable) const {
98 // T is iterable; fill N values
99 const auto* tp = dtl::data(iterable) + start_;
100 for (auto it = begin_; it != begin_ + size_; ++it) call_2(IsGrowing{}, it, *tp++);
104 void call_1(std::true_type, const T& value) const {
105 // T is compatible value; fill single value N times
106 index_type idx{*begin_};
107 call_2(IsGrowing{}, &idx, value);
110 static_cast<std::intptr_t>(idx) - static_cast<std::intptr_t>(*begin_);
111 for (auto&& i : make_span(begin_, size_)) i += delta;
113 std::fill(begin_, begin_ + size_, invalid_index);
117 void operator()(const T& iterable_or_value) const {
118 call_1(mp11::mp_bool<(std::is_convertible<T, value_type>::value ||
119 !is_iterable<T>::value)>{},
124 template <class Index, class S, class Axes, class T>
125 void fill_n_indices(Index* indices, const std::size_t start, const std::size_t size,
126 const std::size_t offset, S& storage, Axes& axes, const T* viter) {
127 axis::index_type extents[buffer_size<Axes>::value];
128 axis::index_type shifts[buffer_size<Axes>::value];
129 for_each_axis(axes, [eit = extents, sit = shifts](const auto& a) mutable {
131 *eit++ = axis::traits::extent(a);
132 }); // LCOV_EXCL_LINE: gcc-8 is missing this line for no reason
134 // offset must be zero for growing axes
135 using IsGrowing = has_growing_axis<Axes>;
136 std::fill(indices, indices + size, IsGrowing::value ? 0 : offset);
137 for_each_axis(axes, [&, stride = static_cast<std::size_t>(1),
138 pshift = shifts](auto& axis) mutable {
139 using Axis = std::decay_t<decltype(axis)>;
141 index_visitor<Index, Axis, IsGrowing>{axis, stride, start, size, indices, pshift},
143 stride *= static_cast<std::size_t>(axis::traits::extent(axis));
147 bool update_needed = false;
148 for_each_axis(axes, [&update_needed, eit = extents](const auto& a) mutable {
149 update_needed |= *eit++ != axis::traits::extent(a);
152 storage_grower<Axes> g(axes);
153 g.from_extents(extents);
154 g.apply(storage, shifts);
158 template <class S, class Index, class... Ts>
159 void fill_n_storage(S& s, const Index idx, Ts&&... p) noexcept {
161 BOOST_ASSERT(idx < s.size());
162 fill_storage_3(s[idx], *p.first...);
164 fold((p.second ? ++p.first : 0)...);
167 template <class S, class Index, class T, class... Ts>
168 void fill_n_storage(S& s, const Index idx, weight_type<T>&& w, Ts&&... ps) noexcept {
170 BOOST_ASSERT(idx < s.size());
171 fill_storage_3(s[idx], weight_type<decltype(*w.value.first)>{*w.value.first},
174 if (w.value.second) ++w.value.first;
175 fold((ps.second ? ++ps.first : 0)...);
178 // general Nd treatment
179 template <class Index, class S, class A, class T, class... Ts>
180 void fill_n_nd(const std::size_t offset, S& storage, A& axes, const std::size_t vsize,
181 const T* values, Ts&&... ts) {
182 constexpr std::size_t buffer_size = 1ul << 14;
183 Index indices[buffer_size];
186 Parallelization options.
188 A) Run the whole fill2 method in parallel, each thread fills its own buffer of
189 indices, synchronization (atomics) are needed to synchronize the incrementing of
190 the storage cells. This leads to a lot of congestion for small histograms.
192 B) Run only fill_n_indices in parallel, subsections of the indices buffer
193 can be filled by different threads. The final loop that fills the storage runs
194 in the main thread, this requires no synchronization for the storage, cells do
195 not need to support atomic operations.
197 C) Like B), then sort the indices in the main thread and fill the
198 storage in parallel, where each thread uses a disjunct set of indices. This
199 should create less congestion and requires no synchronization for the storage.
201 Note on C): Let's say we have an axis with 5 bins (with *flow to simplify).
202 Then after filling 10 values, converting to indices and sorting, the index
203 buffer may look like this: 0 0 0 1 2 2 2 4 4 5. Let's use two threads to fill
204 the storage. Still in the main thread, we compute an iterator to the middle of
205 the index buffer and move it to the right until the pointee changes. Now we have
206 two ranges which contain disjunct sets of indices. We pass these ranges to the
207 threads which then fill the storage. Since the threads by construction do not
208 compete to increment the same cell, no further synchronization is required.
210 In all cases, growing axes cannot be parallelized.
213 for (std::size_t start = 0; start < vsize; start += buffer_size) {
214 const std::size_t n = std::min(buffer_size, vsize - start);
215 // fill buffer of indices...
216 fill_n_indices(indices, start, n, offset, storage, axes, values);
217 // ...and fill corresponding storage cells
218 for (auto&& idx : make_span(indices, n))
219 fill_n_storage(storage, idx, std::forward<Ts>(ts)...);
223 template <class S, class... As, class T, class... Us>
224 void fill_n_1(const std::size_t offset, S& storage, std::tuple<As...>& axes,
225 const std::size_t vsize, const T* values, Us&&... us) {
227 mp11::mp_if<has_non_inclusive_axis<std::tuple<As...>>, optional_index, std::size_t>;
228 fill_n_nd<index_type>(offset, storage, axes, vsize, values, std::forward<Us>(us)...);
231 template <class S, class A, class T, class... Us>
232 void fill_n_1(const std::size_t offset, S& storage, A& axes, const std::size_t vsize,
233 const T* values, Us&&... us) {
234 bool all_inclusive = true;
236 [&](const auto& ax) { all_inclusive &= axis::traits::inclusive(ax); });
237 if (axes_rank(axes) == 1) {
240 std::tuple<decltype(ax)> axes{ax};
241 fill_n_1(offset, storage, axes, vsize, values, std::forward<Us>(us)...);
246 fill_n_nd<std::size_t>(offset, storage, axes, vsize, values,
247 std::forward<Us>(us)...);
249 fill_n_nd<optional_index>(offset, storage, axes, vsize, values,
250 std::forward<Us>(us)...);
254 template <class A, class T, std::size_t N>
255 std::size_t get_total_size(const A& axes, const dtl::span<const T, N>& values) {
256 // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant):
257 // - span<CT, N>: for any histogram, N == rank
258 // - span<V<T, CT>, N>: for any histogram, N == rank
259 BOOST_ASSERT(axes_rank(axes) == values.size());
260 constexpr auto unset = static_cast<std::size_t>(-1);
261 std::size_t size = unset;
262 for_each_axis(axes, [&size, vit = values.begin()](const auto& ax) mutable {
263 using AV = axis::traits::value_type<std::decay_t<decltype(ax)>>;
265 [&size](const auto& v) {
266 // v is either convertible to value or a sequence of values
267 using V = std::remove_const_t<std::remove_reference_t<decltype(v)>>;
268 static_if_c<(std::is_convertible<decltype(v), AV>::value ||
269 !is_iterable<V>::value)>(
271 [&size](const auto& v) {
272 const auto n = dtl::size(v);
273 // must repeat this here for msvc :(
274 constexpr auto unset = static_cast<std::size_t>(-1);
278 BOOST_THROW_EXCEPTION(
279 std::invalid_argument("spans must have compatible lengths"));
285 // if all arguments are not iterables, return size of 1
286 return size == unset ? 1 : size;
289 inline void fill_n_check_extra_args(std::size_t) noexcept {}
291 template <class T, class... Ts>
292 void fill_n_check_extra_args(std::size_t size, T&& x, Ts&&... ts) {
293 // sequences must have same lengths, but sequences of length 0 are broadcast
294 if (x.second != 0 && x.second != size)
295 BOOST_THROW_EXCEPTION(std::invalid_argument("spans must have compatible lengths"));
296 fill_n_check_extra_args(size, std::forward<Ts>(ts)...);
299 template <class T, class... Ts>
300 void fill_n_check_extra_args(std::size_t size, weight_type<T>&& w, Ts&&... ts) {
301 fill_n_check_extra_args(size, w.value, std::forward<Ts>(ts)...);
304 template <class S, class A, class T, std::size_t N, class... Us>
305 void fill_n(std::true_type, const std::size_t offset, S& storage, A& axes,
306 const dtl::span<const T, N> values, Us&&... us) {
307 // supported cases (T = value type; CT = containter of T; V<T, CT, ...> = variant):
308 // - span<T, N>: only valid for 1D histogram, N > 1 allowed
309 // - span<CT, N>: for any histogram, N == rank
310 // - span<V<T, CT>, N>: for any histogram, N == rank
311 static_if<is_convertible_to_any_value_type<A, T>>(
312 [&](const auto& values, auto&&... us) {
313 // T matches one of the axis value types, must be 1D special case
314 if (axes_rank(axes) != 1)
315 BOOST_THROW_EXCEPTION(
316 std::invalid_argument("number of arguments must match histogram rank"));
317 fill_n_check_extra_args(values.size(), std::forward<Us>(us)...);
318 fill_n_1(offset, storage, axes, values.size(), &values, std::forward<Us>(us)...);
320 [&](const auto& values, auto&&... us) {
322 if (axes_rank(axes) != values.size())
323 BOOST_THROW_EXCEPTION(
324 std::invalid_argument("number of arguments must match histogram rank"));
325 const auto vsize = get_total_size(axes, values);
326 fill_n_check_extra_args(vsize, std::forward<Us>(us)...);
327 fill_n_1(offset, storage, axes, vsize, values.data(), std::forward<Us>(us)...);
329 values, std::forward<Us>(us)...);
332 // empty implementation for bad arguments to stop compiler from showing internals
333 template <class... Ts>
334 void fill_n(std::false_type, Ts...) {}
336 } // namespace detail
337 } // namespace histogram
340 #endif // BOOST_HISTOGRAM_DETAIL_FILL_N_HPP