0b2a845ac1fb6136e63b467fb2df4b09339d550d
[profile/ivi/opencv.git] / modules / core / src / parallel.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44
45 #if defined WIN32 || defined WINCE
46     #include <windows.h>
47     #undef small
48     #undef min
49     #undef max
50     #undef abs
51 #endif
52
53 #if defined __linux__ || defined __APPLE__
54     #include <unistd.h>
55     #include <stdio.h>
56     #include <sys/types.h>
57     #if defined ANDROID
58         #include <sys/sysconf.h>
59     #else
60         #include <sys/sysctl.h>
61     #endif
62 #endif
63
64 #ifdef _OPENMP
65     #define HAVE_OPENMP
66 #endif
67
68 #ifdef __APPLE__
69     #define HAVE_GCD
70 #endif
71
72 #if defined _MSC_VER && _MSC_VER >= 1600
73     #define HAVE_CONCURRENCY
74 #endif
75
76 /* IMPORTANT: always use the same order of defines
77    1. HAVE_TBB         - 3rdparty library, should be explicitly enabled
78    2. HAVE_CSTRIPES    - 3rdparty library, should be explicitly enabled
79    3. HAVE_OPENMP      - integrated to compiler, should be explicitly enabled
80    4. HAVE_GCD         - system wide, used automatically        (APPLE only)
81    5. HAVE_CONCURRENCY - part of runtime, used automatically    (Windows only - MSVS 10, MSVS 11)
82 */
83
84 #if defined HAVE_TBB
85     #include "tbb/tbb_stddef.h"
86     #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
87         #include "tbb/tbb.h"
88         #include "tbb/task.h"
89         #if TBB_INTERFACE_VERSION >= 6100
90             #include "tbb/task_arena.h"
91         #endif
92         #undef min
93         #undef max
94     #else
95         #undef HAVE_TBB
96     #endif // end TBB version
97 #endif
98
99 #ifndef HAVE_TBB
100     #if defined HAVE_CSTRIPES
101         #include "C=.h"
102         #undef shared
103     #elif defined HAVE_OPENMP
104         #include <omp.h>
105     #elif defined HAVE_GCD
106         #include <dispatch/dispatch.h>
107         #include <pthread.h>
108     #elif defined HAVE_CONCURRENCY
109         #include <ppl.h>
110     #endif
111 #endif
112
113 #if defined HAVE_TBB || defined HAVE_CSTRIPES || defined HAVE_OPENMP || defined HAVE_GCD || defined HAVE_CONCURRENCY
114    #define HAVE_PARALLEL_FRAMEWORK
115 #endif
116
117 namespace cv
118 {
119     ParallelLoopBody::~ParallelLoopBody() {}
120 }
121
122 namespace
123 {
124 #ifdef HAVE_PARALLEL_FRAMEWORK
125     class ParallelLoopBodyWrapper
126     {
127     public:
128         ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
129         {
130             body = &_body;
131             wholeRange = _r;
132             double len = wholeRange.end - wholeRange.start;
133             nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
134         }
135         void operator()(const cv::Range& sr) const
136         {
137             cv::Range r;
138             r.start = (int)(wholeRange.start +
139                             ((size_t)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
140             r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
141                             ((size_t)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
142             (*body)(r);
143         }
144         cv::Range stripeRange() const { return cv::Range(0, nstripes); }
145
146     protected:
147         const cv::ParallelLoopBody* body;
148         cv::Range wholeRange;
149         int nstripes;
150     };
151
152 #if defined HAVE_TBB
153     class ProxyLoopBody : public ParallelLoopBodyWrapper
154     {
155     public:
156         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
157         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
158         {}
159
160         void operator ()(const tbb::blocked_range<int>& range) const
161         {
162             this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
163         }
164     };
165 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
166     typedef ParallelLoopBodyWrapper ProxyLoopBody;
167 #elif defined HAVE_GCD
168     typedef ParallelLoopBodyWrapper ProxyLoopBody;
169     static void block_function(void* context, size_t index)
170     {
171         ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
172         (*ptr_body)(cv::Range(index, index + 1));
173     }
174 #elif defined HAVE_CONCURRENCY
175     class ProxyLoopBody : public ParallelLoopBodyWrapper
176     {
177     public:
178         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
179         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
180         {}
181
182         void operator ()(int i) const
183         {
184             this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
185         }
186     };
187 #else
188     typedef ParallelLoopBodyWrapper ProxyLoopBody;
189 #endif
190
191 static int numThreads = -1;
192
193 #if defined HAVE_TBB
194 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
195 #elif defined HAVE_CSTRIPES
196 // nothing for C=
197 #elif defined HAVE_OPENMP
198 static int numThreadsMax = omp_get_max_threads();
199 #elif defined HAVE_GCD
200 // nothing for GCD
201 #elif defined HAVE_CONCURRENCY
202 class SchedPtr
203 {
204     Concurrency::Scheduler* sched_;
205 public:
206     Concurrency::Scheduler* operator->() { return sched_; }
207     operator Concurrency::Scheduler*() { return sched_; }
208
209     void operator=(Concurrency::Scheduler* sched)
210     {
211         if (sched_) sched_->Release();
212         sched_ = sched;
213     }
214
215     SchedPtr() : sched_(0) {}
216     ~SchedPtr() { *this = 0; }
217 };
218 static SchedPtr pplScheduler;
219 #endif
220
221 #endif // HAVE_PARALLEL_FRAMEWORK
222
223 } //namespace
224
225 /* ================================   parallel_for_  ================================ */
226
227 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
228 {
229 #ifdef HAVE_PARALLEL_FRAMEWORK
230
231     if(numThreads != 0)
232     {
233         ProxyLoopBody pbody(body, range, nstripes);
234         cv::Range stripeRange = pbody.stripeRange();
235
236 #if defined HAVE_TBB
237
238         tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
239
240 #elif defined HAVE_CSTRIPES
241
242         parallel(MAX(0, numThreads))
243         {
244             int offset = stripeRange.start;
245             int len = stripeRange.end - offset;
246             Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
247             pbody(r);
248             barrier();
249         }
250
251 #elif defined HAVE_OPENMP
252
253         #pragma omp parallel for schedule(dynamic)
254         for (int i = stripeRange.start; i < stripeRange.end; ++i)
255             pbody(Range(i, i + 1));
256
257 #elif defined HAVE_GCD
258
259         dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
260         dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
261
262 #elif defined HAVE_CONCURRENCY
263
264         if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
265         {
266             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
267         }
268         else
269         {
270             pplScheduler->Attach();
271             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
272             Concurrency::CurrentScheduler::Detach();
273         }
274
275 #else
276
277 #error You have hacked and compiling with unsupported parallel framework
278
279 #endif
280
281     }
282     else
283
284 #endif // HAVE_PARALLEL_FRAMEWORK
285     {
286         (void)nstripes;
287         body(range);
288     }
289 }
290
291 int cv::getNumThreads(void)
292 {
293 #ifdef HAVE_PARALLEL_FRAMEWORK
294
295     if(numThreads == 0)
296         return 1;
297
298 #endif
299
300 #if defined HAVE_TBB
301
302     return tbbScheduler.is_active()
303            ? numThreads
304            : tbb::task_scheduler_init::default_num_threads();
305
306 #elif defined HAVE_CSTRIPES
307
308     return numThreads > 0
309             ? numThreads
310             : cv::getNumberOfCPUs();
311
312 #elif defined HAVE_OPENMP
313
314     return omp_get_max_threads();
315
316 #elif defined HAVE_GCD
317
318     return 512; // the GCD thread pool limit
319
320 #elif defined HAVE_CONCURRENCY
321
322     return 1 + (pplScheduler == 0
323                 ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
324                 : pplScheduler->GetNumberOfVirtualProcessors());
325
326 #else
327
328     return 1;
329
330 #endif
331 }
332
333 void cv::setNumThreads( int threads )
334 {
335     (void)threads;
336 #ifdef HAVE_PARALLEL_FRAMEWORK
337     numThreads = threads;
338 #endif
339
340 #ifdef HAVE_TBB
341
342     if(tbbScheduler.is_active()) tbbScheduler.terminate();
343     if(threads > 0) tbbScheduler.initialize(threads);
344
345 #elif defined HAVE_CSTRIPES
346
347     return; // nothing needed
348
349 #elif defined HAVE_OPENMP
350
351     if(omp_in_parallel())
352         return; // can't change number of openmp threads inside a parallel region
353
354     omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
355
356 #elif defined HAVE_GCD
357
358     // unsupported
359     // there is only private dispatch_queue_set_width() and only for desktop
360
361 #elif defined HAVE_CONCURRENCY
362
363     if (threads <= 0)
364     {
365         pplScheduler = 0;
366     }
367     else if (threads == 1)
368     {
369         // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
370         numThreads = 0;
371     }
372     else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
373     {
374         pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
375                        Concurrency::MinConcurrency, threads-1,
376                        Concurrency::MaxConcurrency, threads-1));
377     }
378
379 #endif
380 }
381
382
383 int cv::getThreadNum(void)
384 {
385 #if defined HAVE_TBB
386     #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
387         return tbb::task_arena::current_slot();
388     #else
389         return 0;
390     #endif
391 #elif defined HAVE_CSTRIPES
392     return pix();
393 #elif defined HAVE_OPENMP
394     return omp_get_thread_num();
395 #elif defined HAVE_GCD
396     return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
397 #elif defined HAVE_CONCURRENCY
398     return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
399 #else
400     return 0;
401 #endif
402 }
403
404 #ifdef ANDROID
405 static inline int getNumberOfCPUsImpl()
406 {
407    FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r");
408    if(!cpuPossible)
409        return 1;
410
411    char buf[2000]; //big enough for 1000 CPUs in worst possible configuration
412    char* pbuf = fgets(buf, sizeof(buf), cpuPossible);
413    fclose(cpuPossible);
414    if(!pbuf)
415       return 1;
416
417    //parse string of form "0-1,3,5-7,10,13-15"
418    int cpusAvailable = 0;
419
420    while(*pbuf)
421    {
422       const char* pos = pbuf;
423       bool range = false;
424       while(*pbuf && *pbuf != ',')
425       {
426           if(*pbuf == '-') range = true;
427           ++pbuf;
428       }
429       if(*pbuf) *pbuf++ = 0;
430       if(!range)
431         ++cpusAvailable;
432       else
433       {
434           int rstart = 0, rend = 0;
435           sscanf(pos, "%d-%d", &rstart, &rend);
436           cpusAvailable += rend - rstart + 1;
437       }
438
439    }
440    return cpusAvailable ? cpusAvailable : 1;
441 }
442 #endif
443
444 int cv::getNumberOfCPUs(void)
445 {
446 #if defined WIN32 || defined _WIN32
447     SYSTEM_INFO sysinfo;
448     GetSystemInfo( &sysinfo );
449
450     return (int)sysinfo.dwNumberOfProcessors;
451 #elif defined ANDROID
452     static int ncpus = getNumberOfCPUsImpl();
453     return ncpus;
454 #elif defined __linux__
455     return (int)sysconf( _SC_NPROCESSORS_ONLN );
456 #elif defined __APPLE__
457     int numCPU=0;
458     int mib[4];
459     size_t len = sizeof(numCPU);
460
461     /* set the mib for hw.ncpu */
462     mib[0] = CTL_HW;
463     mib[1] = HW_AVAILCPU;  // alternatively, try HW_NCPU;
464
465     /* get the number of CPUs from the system */
466     sysctl(mib, 2, &numCPU, &len, NULL, 0);
467
468     if( numCPU < 1 )
469     {
470         mib[1] = HW_NCPU;
471         sysctl( mib, 2, &numCPU, &len, NULL, 0 );
472
473         if( numCPU < 1 )
474             numCPU = 1;
475     }
476
477     return (int)numCPU;
478 #else
479     return 1;
480 #endif
481 }
482
483 CV_IMPL void cvSetNumThreads(int nt)
484 {
485     cv::setNumThreads(nt);
486 }
487
488 CV_IMPL int cvGetNumThreads()
489 {
490     return cv::getNumThreads();
491 }
492
493 CV_IMPL int cvGetThreadNum()
494 {
495     return cv::getThreadNum();
496 }