enabled gst
[profile/ivi/opencv.git] / modules / core / src / parallel.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44
45 #if defined WIN32 || defined WINCE
46     #include <windows.h>
47     #undef small
48     #undef min
49     #undef max
50     #undef abs
51 #endif
52
53 #if defined __linux__ || defined __APPLE__
54     #include <unistd.h>
55     #include <stdio.h>
56     #include <sys/types.h>
57     #if defined ANDROID
58         #include <sys/sysconf.h>
59     #else
60         #include <sys/sysctl.h>
61     #endif
62 #endif
63
64 #ifdef _OPENMP
65     #define HAVE_OPENMP
66 #endif
67
68 #ifdef __APPLE__
69     #define HAVE_GCD
70 #endif
71
72 #if defined _MSC_VER && _MSC_VER >= 1600
73     #define HAVE_CONCURRENCY
74 #endif
75
76 /* IMPORTANT: always use the same order of defines
77    1. HAVE_TBB         - 3rdparty library, should be explicitly enabled
78    2. HAVE_CSTRIPES    - 3rdparty library, should be explicitly enabled
79    3. HAVE_OPENMP      - integrated to compiler, should be explicitly enabled
80    4. HAVE_GCD         - system wide, used automatically        (APPLE only)
81    5. HAVE_CONCURRENCY - part of runtime, used automatically    (Windows only - MSVS 10, MSVS 11)
82 */
83
84 #if defined HAVE_TBB
85     #include "tbb/tbb_stddef.h"
86     #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
87         #include "tbb/tbb.h"
88         #include "tbb/task.h"
89         #if TBB_INTERFACE_VERSION >= 6100
90             #include "tbb/task_arena.h"
91         #endif
92         #undef min
93         #undef max
94     #else
95         #undef HAVE_TBB
96     #endif // end TBB version
97 #endif
98
99 #ifndef HAVE_TBB
100     #if defined HAVE_CSTRIPES
101         #include "C=.h"
102         #undef shared
103     #elif defined HAVE_OPENMP
104         #include <omp.h>
105     #elif defined HAVE_GCD
106         #include <dispatch/dispatch.h>
107         #include <pthread.h>
108     #elif defined HAVE_CONCURRENCY
109         #include <ppl.h>
110     #endif
111 #endif
112
113 #if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
114 #  define CV_PARALLEL_FRAMEWORK "tbb"
115 #elif defined HAVE_CSTRIPES
116 #  define CV_PARALLEL_FRAMEWORK "cstripes"
117 #elif defined HAVE_OPENMP
118 #  define CV_PARALLEL_FRAMEWORK "openmp"
119 #elif defined HAVE_GCD
120 #  define CV_PARALLEL_FRAMEWORK "gcd"
121 #elif defined HAVE_CONCURRENCY
122 #  define CV_PARALLEL_FRAMEWORK "ms-concurrency"
123 #endif
124
125 namespace cv
126 {
127     ParallelLoopBody::~ParallelLoopBody() {}
128 }
129
130 namespace
131 {
132 #ifdef CV_PARALLEL_FRAMEWORK
133     class ParallelLoopBodyWrapper
134     {
135     public:
136         ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
137         {
138             body = &_body;
139             wholeRange = _r;
140             double len = wholeRange.end - wholeRange.start;
141             nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
142         }
143         void operator()(const cv::Range& sr) const
144         {
145             cv::Range r;
146             r.start = (int)(wholeRange.start +
147                             ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
148             r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
149                             ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
150             (*body)(r);
151         }
152         cv::Range stripeRange() const { return cv::Range(0, nstripes); }
153
154     protected:
155         const cv::ParallelLoopBody* body;
156         cv::Range wholeRange;
157         int nstripes;
158     };
159
160 #if defined HAVE_TBB
161     class ProxyLoopBody : public ParallelLoopBodyWrapper
162     {
163     public:
164         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
165         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
166         {}
167
168         void operator ()(const tbb::blocked_range<int>& range) const
169         {
170             this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
171         }
172     };
173 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
174     typedef ParallelLoopBodyWrapper ProxyLoopBody;
175 #elif defined HAVE_GCD
176     typedef ParallelLoopBodyWrapper ProxyLoopBody;
177     static void block_function(void* context, size_t index)
178     {
179         ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
180         (*ptr_body)(cv::Range((int)index, (int)index + 1));
181     }
182 #elif defined HAVE_CONCURRENCY
183     class ProxyLoopBody : public ParallelLoopBodyWrapper
184     {
185     public:
186         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
187         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
188         {}
189
190         void operator ()(int i) const
191         {
192             this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
193         }
194     };
195 #else
196     typedef ParallelLoopBodyWrapper ProxyLoopBody;
197 #endif
198
199 static int numThreads = -1;
200
201 #if defined HAVE_TBB
202 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
203 #elif defined HAVE_CSTRIPES
204 // nothing for C=
205 #elif defined HAVE_OPENMP
206 static int numThreadsMax = omp_get_max_threads();
207 #elif defined HAVE_GCD
208 // nothing for GCD
209 #elif defined HAVE_CONCURRENCY
210 class SchedPtr
211 {
212     Concurrency::Scheduler* sched_;
213 public:
214     Concurrency::Scheduler* operator->() { return sched_; }
215     operator Concurrency::Scheduler*() { return sched_; }
216
217     void operator=(Concurrency::Scheduler* sched)
218     {
219         if (sched_) sched_->Release();
220         sched_ = sched;
221     }
222
223     SchedPtr() : sched_(0) {}
224     ~SchedPtr() { *this = 0; }
225 };
226 static SchedPtr pplScheduler;
227 #endif
228
229 #endif // CV_PARALLEL_FRAMEWORK
230
231 } //namespace
232
233 /* ================================   parallel_for_  ================================ */
234
235 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
236 {
237 #ifdef CV_PARALLEL_FRAMEWORK
238
239     if(numThreads != 0)
240     {
241         ProxyLoopBody pbody(body, range, nstripes);
242         cv::Range stripeRange = pbody.stripeRange();
243         if( stripeRange.end - stripeRange.start == 1 )
244         {
245             body(range);
246             return;
247         }
248
249 #if defined HAVE_TBB
250
251         tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
252
253 #elif defined HAVE_CSTRIPES
254
255         parallel(MAX(0, numThreads))
256         {
257             int offset = stripeRange.start;
258             int len = stripeRange.end - offset;
259             Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
260             pbody(r);
261             barrier();
262         }
263
264 #elif defined HAVE_OPENMP
265
266         #pragma omp parallel for schedule(dynamic)
267         for (int i = stripeRange.start; i < stripeRange.end; ++i)
268             pbody(Range(i, i + 1));
269
270 #elif defined HAVE_GCD
271
272         dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
273         dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
274
275 #elif defined HAVE_CONCURRENCY
276
277         if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
278         {
279             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
280         }
281         else
282         {
283             pplScheduler->Attach();
284             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
285             Concurrency::CurrentScheduler::Detach();
286         }
287
288 #else
289
290 #error You have hacked and compiling with unsupported parallel framework
291
292 #endif
293
294     }
295     else
296
297 #endif // CV_PARALLEL_FRAMEWORK
298     {
299         (void)nstripes;
300         body(range);
301     }
302 }
303
304 int cv::getNumThreads(void)
305 {
306 #ifdef CV_PARALLEL_FRAMEWORK
307
308     if(numThreads == 0)
309         return 1;
310
311 #endif
312
313 #if defined HAVE_TBB
314
315     return tbbScheduler.is_active()
316            ? numThreads
317            : tbb::task_scheduler_init::default_num_threads();
318
319 #elif defined HAVE_CSTRIPES
320
321     return numThreads > 0
322             ? numThreads
323             : cv::getNumberOfCPUs();
324
325 #elif defined HAVE_OPENMP
326
327     return omp_get_max_threads();
328
329 #elif defined HAVE_GCD
330
331     return 512; // the GCD thread pool limit
332
333 #elif defined HAVE_CONCURRENCY
334
335     return 1 + (pplScheduler == 0
336                 ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
337                 : pplScheduler->GetNumberOfVirtualProcessors());
338
339 #else
340
341     return 1;
342
343 #endif
344 }
345
346 void cv::setNumThreads( int threads )
347 {
348     (void)threads;
349 #ifdef CV_PARALLEL_FRAMEWORK
350     numThreads = threads;
351 #endif
352
353 #ifdef HAVE_TBB
354
355     if(tbbScheduler.is_active()) tbbScheduler.terminate();
356     if(threads > 0) tbbScheduler.initialize(threads);
357
358 #elif defined HAVE_CSTRIPES
359
360     return; // nothing needed
361
362 #elif defined HAVE_OPENMP
363
364     if(omp_in_parallel())
365         return; // can't change number of openmp threads inside a parallel region
366
367     omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
368
369 #elif defined HAVE_GCD
370
371     // unsupported
372     // there is only private dispatch_queue_set_width() and only for desktop
373
374 #elif defined HAVE_CONCURRENCY
375
376     if (threads <= 0)
377     {
378         pplScheduler = 0;
379     }
380     else if (threads == 1)
381     {
382         // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
383         numThreads = 0;
384     }
385     else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
386     {
387         pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
388                        Concurrency::MinConcurrency, threads-1,
389                        Concurrency::MaxConcurrency, threads-1));
390     }
391
392 #endif
393 }
394
395
396 int cv::getThreadNum(void)
397 {
398 #if defined HAVE_TBB
399     #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
400         return tbb::task_arena::current_slot();
401     #else
402         return 0;
403     #endif
404 #elif defined HAVE_CSTRIPES
405     return pix();
406 #elif defined HAVE_OPENMP
407     return omp_get_thread_num();
408 #elif defined HAVE_GCD
409     return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
410 #elif defined HAVE_CONCURRENCY
411     return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
412 #else
413     return 0;
414 #endif
415 }
416
417 #ifdef ANDROID
418 static inline int getNumberOfCPUsImpl()
419 {
420    FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r");
421    if(!cpuPossible)
422        return 1;
423
424    char buf[2000]; //big enough for 1000 CPUs in worst possible configuration
425    char* pbuf = fgets(buf, sizeof(buf), cpuPossible);
426    fclose(cpuPossible);
427    if(!pbuf)
428       return 1;
429
430    //parse string of form "0-1,3,5-7,10,13-15"
431    int cpusAvailable = 0;
432
433    while(*pbuf)
434    {
435       const char* pos = pbuf;
436       bool range = false;
437       while(*pbuf && *pbuf != ',')
438       {
439           if(*pbuf == '-') range = true;
440           ++pbuf;
441       }
442       if(*pbuf) *pbuf++ = 0;
443       if(!range)
444         ++cpusAvailable;
445       else
446       {
447           int rstart = 0, rend = 0;
448           sscanf(pos, "%d-%d", &rstart, &rend);
449           cpusAvailable += rend - rstart + 1;
450       }
451
452    }
453    return cpusAvailable ? cpusAvailable : 1;
454 }
455 #endif
456
457 int cv::getNumberOfCPUs(void)
458 {
459 #if defined WIN32 || defined _WIN32
460     SYSTEM_INFO sysinfo;
461 #if defined(_M_ARM) || defined(_M_X64) || defined(HAVE_WINRT)
462     GetNativeSystemInfo( &sysinfo );
463 #else
464     GetSystemInfo( &sysinfo );
465 #endif
466
467     return (int)sysinfo.dwNumberOfProcessors;
468 #elif defined ANDROID
469     static int ncpus = getNumberOfCPUsImpl();
470     return ncpus;
471 #elif defined __linux__
472     return (int)sysconf( _SC_NPROCESSORS_ONLN );
473 #elif defined __APPLE__
474     int numCPU=0;
475     int mib[4];
476     size_t len = sizeof(numCPU);
477
478     /* set the mib for hw.ncpu */
479     mib[0] = CTL_HW;
480     mib[1] = HW_AVAILCPU;  // alternatively, try HW_NCPU;
481
482     /* get the number of CPUs from the system */
483     sysctl(mib, 2, &numCPU, &len, NULL, 0);
484
485     if( numCPU < 1 )
486     {
487         mib[1] = HW_NCPU;
488         sysctl( mib, 2, &numCPU, &len, NULL, 0 );
489
490         if( numCPU < 1 )
491             numCPU = 1;
492     }
493
494     return (int)numCPU;
495 #else
496     return 1;
497 #endif
498 }
499
500 const char* cv::currentParallelFramework() {
501 #ifdef CV_PARALLEL_FRAMEWORK
502     return CV_PARALLEL_FRAMEWORK;
503 #else
504     return NULL;
505 #endif
506 }
507
508 CV_IMPL void cvSetNumThreads(int nt)
509 {
510     cv::setNumThreads(nt);
511 }
512
513 CV_IMPL int cvGetNumThreads()
514 {
515     return cv::getNumThreads();
516 }
517
518 CV_IMPL int cvGetThreadNum()
519 {
520     return cv::getThreadNum();
521 }