2 Copyright (c) 2005-2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
17 #ifndef __TBB_parallel_for_H
18 #define __TBB_parallel_for_H
20 #define __TBB_parallel_for_H_include_area
21 #include "internal/_warning_suppress_enable_notice.h"
25 #include "partitioner.h"
26 #include "blocked_range.h"
27 #include "tbb_exception.h"
28 #include "internal/_tbb_trace_impl.h"
32 namespace interface9 {
36 //! allocate right task with new parent
37 void* allocate_sibling(task* start_for_task, size_t bytes);
39 //! Task type used in parallel_for
40 /** @ingroup algorithms */
41 template<typename Range, typename Body, typename Partitioner>
42 class start_for: public task {
45 typename Partitioner::task_partition_type my_partition;
46 task* execute() __TBB_override;
48 //! Update affinity info, if any.
49 void note_affinity( affinity_id id ) __TBB_override {
50 my_partition.note_affinity( id );
54 //! Constructor for root task.
55 start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
58 my_partition(partitioner)
60 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, NULL);
62 //! Splitting constructor used to generate children.
63 /** parent_ becomes left child. Newly constructed object is right child. */
64 start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
65 my_range(parent_.my_range, split_obj),
66 my_body(parent_.my_body),
67 my_partition(parent_.my_partition, split_obj)
69 my_partition.set_affinity(*this);
70 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
72 //! Construct right child from the given range as response to the demand.
73 /** parent_ remains left child. Newly constructed object is right child. */
74 start_for( start_for& parent_, const Range& r, depth_t d ) :
76 my_body(parent_.my_body),
77 my_partition(parent_.my_partition, split())
79 my_partition.set_affinity(*this);
80 my_partition.align_depth( d );
81 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
83 static void run( const Range& range, const Body& body, Partitioner& partitioner ) {
84 if( !range.empty() ) {
85 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
86 start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
88 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
89 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
90 task_group_context context(PARALLEL_FOR);
91 start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
92 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
94 fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
95 task::spawn_root_and_wait(a);
96 fgt_end_algorithm( (void*)&context );
100 #if __TBB_TASK_GROUP_CONTEXT
101 static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
102 if( !range.empty() ) {
103 start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
105 fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
106 task::spawn_root_and_wait(a);
107 fgt_end_algorithm( (void*)&context );
111 #endif /* __TBB_TASK_GROUP_CONTEXT */
112 //! Run body for range, serves as callback for partitioner
113 void run_body( Range &r ) {
114 fgt_alg_begin_body( tbb::internal::PARALLEL_FOR_TASK, (void *)const_cast<Body*>(&(this->my_body)), (void*)this );
116 fgt_alg_end_body( (void *)const_cast<Body*>(&(this->my_body)) );
119 //! spawn right task, serves as callback for partitioner
120 void offer_work(typename Partitioner::split_type& split_obj) {
121 spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
123 //! spawn right task, serves as callback for partitioner
124 void offer_work(const Range& r, depth_t d = 0) {
125 spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
129 //! allocate right task with new parent
130 // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
131 inline void* allocate_sibling(task* start_for_task, size_t bytes) {
132 task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
133 start_for_task->set_parent(parent_ptr);
134 parent_ptr->set_ref_count(2);
135 return &parent_ptr->allocate_child().allocate(bytes);
138 //! execute task for parallel_for
139 template<typename Range, typename Body, typename Partitioner>
140 task* start_for<Range,Body,Partitioner>::execute() {
141 my_partition.check_being_stolen( *this );
142 my_partition.execute(*this, my_range);
145 } // namespace internal
147 } // namespace interfaceX
151 using interface9::internal::start_for;
153 //! Calls the function with values from range [begin, end) with a step provided
154 template<typename Function, typename Index>
155 class parallel_for_body : internal::no_assign {
156 const Function &my_func;
157 const Index my_begin;
160 parallel_for_body( const Function& _func, Index& _begin, Index& _step )
161 : my_func(_func), my_begin(_begin), my_step(_step) {}
163 void operator()( const tbb::blocked_range<Index>& r ) const {
164 // A set of local variables to help the compiler with vectorization of the following loop.
168 Index k = my_begin + b*ms;
172 #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
173 #pragma vector always assert
176 for ( Index i = b; i < e; ++i, k += ms ) {
181 } // namespace internal
184 // Requirements on Range concept are documented in blocked_range.h
186 /** \page parallel_for_body_req Requirements on parallel_for body
187 Class \c Body implementing the concept of parallel_for body must define:
188 - \code Body::Body( const Body& ); \endcode Copy constructor
189 - \code Body::~Body(); \endcode Destructor
190 - \code void Body::operator()( Range& r ) const; \endcode Function call operator applying the body to range \c r.
193 /** \name parallel_for
194 See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
197 //! Parallel iteration over range with default partitioner.
198 /** @ingroup algorithms **/
199 template<typename Range, typename Body>
200 void parallel_for( const Range& range, const Body& body ) {
201 internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
204 //! Parallel iteration over range with simple partitioner.
205 /** @ingroup algorithms **/
206 template<typename Range, typename Body>
207 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
208 internal::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
211 //! Parallel iteration over range with auto_partitioner.
212 /** @ingroup algorithms **/
213 template<typename Range, typename Body>
214 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
215 internal::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
218 //! Parallel iteration over range with static_partitioner.
219 /** @ingroup algorithms **/
220 template<typename Range, typename Body>
221 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
222 internal::start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
225 //! Parallel iteration over range with affinity_partitioner.
226 /** @ingroup algorithms **/
227 template<typename Range, typename Body>
228 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
229 internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
232 #if __TBB_TASK_GROUP_CONTEXT
233 //! Parallel iteration over range with default partitioner and user-supplied context.
234 /** @ingroup algorithms **/
235 template<typename Range, typename Body>
236 void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
237 internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
240 //! Parallel iteration over range with simple partitioner and user-supplied context.
241 /** @ingroup algorithms **/
242 template<typename Range, typename Body>
243 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
244 internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
247 //! Parallel iteration over range with auto_partitioner and user-supplied context.
248 /** @ingroup algorithms **/
249 template<typename Range, typename Body>
250 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
251 internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
254 //! Parallel iteration over range with static_partitioner and user-supplied context.
255 /** @ingroup algorithms **/
256 template<typename Range, typename Body>
257 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
258 internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
261 //! Parallel iteration over range with affinity_partitioner and user-supplied context.
262 /** @ingroup algorithms **/
263 template<typename Range, typename Body>
264 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
265 internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
267 #endif /* __TBB_TASK_GROUP_CONTEXT */
270 namespace strict_ppl {
273 //! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner
274 template <typename Index, typename Function, typename Partitioner>
275 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
277 internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
278 else if (last > first) {
279 // Above "else" avoids "potential divide by zero" warning on some platforms
280 Index end = (last - first - Index(1)) / step + Index(1);
281 tbb::blocked_range<Index> range(static_cast<Index>(0), end);
282 internal::parallel_for_body<Function, Index> body(f, first, step);
283 tbb::parallel_for(range, body, partitioner);
287 //! Parallel iteration over a range of integers with a step provided and default partitioner
288 template <typename Index, typename Function>
289 void parallel_for(Index first, Index last, Index step, const Function& f) {
290 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
292 //! Parallel iteration over a range of integers with a step provided and simple partitioner
293 template <typename Index, typename Function>
294 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
295 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
297 //! Parallel iteration over a range of integers with a step provided and auto partitioner
298 template <typename Index, typename Function>
299 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
300 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
302 //! Parallel iteration over a range of integers with a step provided and static partitioner
303 template <typename Index, typename Function>
304 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
305 parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
307 //! Parallel iteration over a range of integers with a step provided and affinity partitioner
308 template <typename Index, typename Function>
309 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
310 parallel_for_impl(first, last, step, f, partitioner);
313 //! Parallel iteration over a range of integers with a default step value and default partitioner
314 template <typename Index, typename Function>
315 void parallel_for(Index first, Index last, const Function& f) {
316 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
318 //! Parallel iteration over a range of integers with a default step value and simple partitioner
319 template <typename Index, typename Function>
320 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
321 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
323 //! Parallel iteration over a range of integers with a default step value and auto partitioner
324 template <typename Index, typename Function>
325 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
326 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
328 //! Parallel iteration over a range of integers with a default step value and static partitioner
329 template <typename Index, typename Function>
330 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
331 parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
333 //! Parallel iteration over a range of integers with a default step value and affinity partitioner
334 template <typename Index, typename Function>
335 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
336 parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
339 #if __TBB_TASK_GROUP_CONTEXT
340 //! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner
341 template <typename Index, typename Function, typename Partitioner>
342 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
344 internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
345 else if (last > first) {
346 // Above "else" avoids "potential divide by zero" warning on some platforms
347 Index end = (last - first - Index(1)) / step + Index(1);
348 tbb::blocked_range<Index> range(static_cast<Index>(0), end);
349 internal::parallel_for_body<Function, Index> body(f, first, step);
350 tbb::parallel_for(range, body, partitioner, context);
354 //! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner
355 template <typename Index, typename Function>
356 void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
357 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
359 //! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner
360 template <typename Index, typename Function>
361 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
362 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
364 //! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner
365 template <typename Index, typename Function>
366 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
367 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
369 //! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner
370 template <typename Index, typename Function>
371 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
372 parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
374 //! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner
375 template <typename Index, typename Function>
376 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
377 parallel_for_impl(first, last, step, f, partitioner, context);
381 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner
382 template <typename Index, typename Function>
383 void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
384 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
386 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner
387 template <typename Index, typename Function>
388 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
389 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
391 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner
392 template <typename Index, typename Function>
393 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
394 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
396 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner
397 template <typename Index, typename Function>
398 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
399 parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
401 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner
402 template <typename Index, typename Function>
403 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
404 parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
407 #endif /* __TBB_TASK_GROUP_CONTEXT */
410 } // namespace strict_ppl
412 using strict_ppl::parallel_for;
416 #if TBB_PREVIEW_SERIAL_SUBSET
417 #define __TBB_NORMAL_EXECUTION
418 #include "../serial/tbb/parallel_for.h"
419 #undef __TBB_NORMAL_EXECUTION
422 #include "internal/_warning_suppress_disable_notice.h"
423 #undef __TBB_parallel_for_H_include_area
425 #endif /* __TBB_parallel_for_H */