Merge pull request #1263 from abidrahmank:pyCLAHE_24
[profile/ivi/opencv.git] / modules / core / src / parallel.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44
45 #if defined WIN32 || defined WINCE
46     #include <windows.h>
47     #undef small
48     #undef min
49     #undef max
50     #undef abs
51 #endif
52
53 #if defined __linux__ || defined __APPLE__
54     #include <unistd.h>
55     #include <stdio.h>
56     #include <sys/types.h>
57     #if defined ANDROID
58         #include <sys/sysconf.h>
59     #else
60         #include <sys/sysctl.h>
61     #endif
62 #endif
63
64 #ifdef _OPENMP
65     #define HAVE_OPENMP
66 #endif
67
68 #ifdef __APPLE__
69     #define HAVE_GCD
70 #endif
71
72 #if defined _MSC_VER && _MSC_VER >= 1600
73     #define HAVE_CONCURRENCY
74 #endif
75
76 /* IMPORTANT: always use the same order of defines
77    1. HAVE_TBB         - 3rdparty library, should be explicitly enabled
78    2. HAVE_CSTRIPES    - 3rdparty library, should be explicitly enabled
79    3. HAVE_OPENMP      - integrated to compiler, should be explicitly enabled
80    4. HAVE_GCD         - system wide, used automatically        (APPLE only)
81    5. HAVE_CONCURRENCY - part of runtime, used automatically    (Windows only - MSVS 10, MSVS 11)
82 */
83
84 #if defined HAVE_TBB
85     #include "tbb/tbb_stddef.h"
86     #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
87         #include "tbb/tbb.h"
88         #include "tbb/task.h"
89         #if TBB_INTERFACE_VERSION >= 6100
90             #include "tbb/task_arena.h"
91         #endif
92         #undef min
93         #undef max
94     #else
95         #undef HAVE_TBB
96     #endif // end TBB version
97 #endif
98
99 #ifndef HAVE_TBB
100     #if defined HAVE_CSTRIPES
101         #include "C=.h"
102         #undef shared
103     #elif defined HAVE_OPENMP
104         #include <omp.h>
105     #elif defined HAVE_GCD
106         #include <dispatch/dispatch.h>
107         #include <pthread.h>
108     #elif defined HAVE_CONCURRENCY
109         #include <ppl.h>
110     #endif
111 #endif
112
113 #if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
114 #  define CV_PARALLEL_FRAMEWORK "tbb"
115 #elif defined HAVE_CSTRIPES
116 #  define CV_PARALLEL_FRAMEWORK "cstripes"
117 #elif defined HAVE_OPENMP
118 #  define CV_PARALLEL_FRAMEWORK "openmp"
119 #elif defined HAVE_GCD
120 #  define CV_PARALLEL_FRAMEWORK "gcd"
121 #elif defined HAVE_CONCURRENCY
122 #  define CV_PARALLEL_FRAMEWORK "ms-concurrency"
123 #endif
124
125 namespace cv
126 {
127     ParallelLoopBody::~ParallelLoopBody() {}
128 }
129
130 namespace
131 {
132 #ifdef CV_PARALLEL_FRAMEWORK
133     class ParallelLoopBodyWrapper
134     {
135     public:
136         ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
137         {
138             body = &_body;
139             wholeRange = _r;
140             double len = wholeRange.end - wholeRange.start;
141             nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
142         }
143         void operator()(const cv::Range& sr) const
144         {
145             cv::Range r;
146             r.start = (int)(wholeRange.start +
147                             ((size_t)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
148             r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
149                             ((size_t)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
150             (*body)(r);
151         }
152         cv::Range stripeRange() const { return cv::Range(0, nstripes); }
153
154     protected:
155         const cv::ParallelLoopBody* body;
156         cv::Range wholeRange;
157         int nstripes;
158     };
159
160 #if defined HAVE_TBB
161     class ProxyLoopBody : public ParallelLoopBodyWrapper
162     {
163     public:
164         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
165         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
166         {}
167
168         void operator ()(const tbb::blocked_range<int>& range) const
169         {
170             this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
171         }
172     };
173 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
174     typedef ParallelLoopBodyWrapper ProxyLoopBody;
175 #elif defined HAVE_GCD
176     typedef ParallelLoopBodyWrapper ProxyLoopBody;
177     static void block_function(void* context, size_t index)
178     {
179         ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
180         (*ptr_body)(cv::Range(index, index + 1));
181     }
182 #elif defined HAVE_CONCURRENCY
183     class ProxyLoopBody : public ParallelLoopBodyWrapper
184     {
185     public:
186         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
187         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
188         {}
189
190         void operator ()(int i) const
191         {
192             this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
193         }
194     };
195 #else
196     typedef ParallelLoopBodyWrapper ProxyLoopBody;
197 #endif
198
199 static int numThreads = -1;
200
201 #if defined HAVE_TBB
202 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
203 #elif defined HAVE_CSTRIPES
204 // nothing for C=
205 #elif defined HAVE_OPENMP
206 static int numThreadsMax = omp_get_max_threads();
207 #elif defined HAVE_GCD
208 // nothing for GCD
209 #elif defined HAVE_CONCURRENCY
210 class SchedPtr
211 {
212     Concurrency::Scheduler* sched_;
213 public:
214     Concurrency::Scheduler* operator->() { return sched_; }
215     operator Concurrency::Scheduler*() { return sched_; }
216
217     void operator=(Concurrency::Scheduler* sched)
218     {
219         if (sched_) sched_->Release();
220         sched_ = sched;
221     }
222
223     SchedPtr() : sched_(0) {}
224     ~SchedPtr() { *this = 0; }
225 };
226 static SchedPtr pplScheduler;
227 #endif
228
229 #endif // CV_PARALLEL_FRAMEWORK
230
231 } //namespace
232
233 /* ================================   parallel_for_  ================================ */
234
235 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
236 {
237 #ifdef CV_PARALLEL_FRAMEWORK
238
239     if(numThreads != 0)
240     {
241         ProxyLoopBody pbody(body, range, nstripes);
242         cv::Range stripeRange = pbody.stripeRange();
243
244 #if defined HAVE_TBB
245
246         tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
247
248 #elif defined HAVE_CSTRIPES
249
250         parallel(MAX(0, numThreads))
251         {
252             int offset = stripeRange.start;
253             int len = stripeRange.end - offset;
254             Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
255             pbody(r);
256             barrier();
257         }
258
259 #elif defined HAVE_OPENMP
260
261         #pragma omp parallel for schedule(dynamic)
262         for (int i = stripeRange.start; i < stripeRange.end; ++i)
263             pbody(Range(i, i + 1));
264
265 #elif defined HAVE_GCD
266
267         dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
268         dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
269
270 #elif defined HAVE_CONCURRENCY
271
272         if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
273         {
274             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
275         }
276         else
277         {
278             pplScheduler->Attach();
279             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
280             Concurrency::CurrentScheduler::Detach();
281         }
282
283 #else
284
285 #error You have hacked and compiling with unsupported parallel framework
286
287 #endif
288
289     }
290     else
291
292 #endif // CV_PARALLEL_FRAMEWORK
293     {
294         (void)nstripes;
295         body(range);
296     }
297 }
298
299 int cv::getNumThreads(void)
300 {
301 #ifdef CV_PARALLEL_FRAMEWORK
302
303     if(numThreads == 0)
304         return 1;
305
306 #endif
307
308 #if defined HAVE_TBB
309
310     return tbbScheduler.is_active()
311            ? numThreads
312            : tbb::task_scheduler_init::default_num_threads();
313
314 #elif defined HAVE_CSTRIPES
315
316     return numThreads > 0
317             ? numThreads
318             : cv::getNumberOfCPUs();
319
320 #elif defined HAVE_OPENMP
321
322     return omp_get_max_threads();
323
324 #elif defined HAVE_GCD
325
326     return 512; // the GCD thread pool limit
327
328 #elif defined HAVE_CONCURRENCY
329
330     return 1 + (pplScheduler == 0
331                 ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
332                 : pplScheduler->GetNumberOfVirtualProcessors());
333
334 #else
335
336     return 1;
337
338 #endif
339 }
340
341 void cv::setNumThreads( int threads )
342 {
343     (void)threads;
344 #ifdef CV_PARALLEL_FRAMEWORK
345     numThreads = threads;
346 #endif
347
348 #ifdef HAVE_TBB
349
350     if(tbbScheduler.is_active()) tbbScheduler.terminate();
351     if(threads > 0) tbbScheduler.initialize(threads);
352
353 #elif defined HAVE_CSTRIPES
354
355     return; // nothing needed
356
357 #elif defined HAVE_OPENMP
358
359     if(omp_in_parallel())
360         return; // can't change number of openmp threads inside a parallel region
361
362     omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
363
364 #elif defined HAVE_GCD
365
366     // unsupported
367     // there is only private dispatch_queue_set_width() and only for desktop
368
369 #elif defined HAVE_CONCURRENCY
370
371     if (threads <= 0)
372     {
373         pplScheduler = 0;
374     }
375     else if (threads == 1)
376     {
377         // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
378         numThreads = 0;
379     }
380     else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
381     {
382         pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
383                        Concurrency::MinConcurrency, threads-1,
384                        Concurrency::MaxConcurrency, threads-1));
385     }
386
387 #endif
388 }
389
390
391 int cv::getThreadNum(void)
392 {
393 #if defined HAVE_TBB
394     #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
395         return tbb::task_arena::current_slot();
396     #else
397         return 0;
398     #endif
399 #elif defined HAVE_CSTRIPES
400     return pix();
401 #elif defined HAVE_OPENMP
402     return omp_get_thread_num();
403 #elif defined HAVE_GCD
404     return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
405 #elif defined HAVE_CONCURRENCY
406     return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
407 #else
408     return 0;
409 #endif
410 }
411
412 #ifdef ANDROID
413 static inline int getNumberOfCPUsImpl()
414 {
415    FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r");
416    if(!cpuPossible)
417        return 1;
418
419    char buf[2000]; //big enough for 1000 CPUs in worst possible configuration
420    char* pbuf = fgets(buf, sizeof(buf), cpuPossible);
421    fclose(cpuPossible);
422    if(!pbuf)
423       return 1;
424
425    //parse string of form "0-1,3,5-7,10,13-15"
426    int cpusAvailable = 0;
427
428    while(*pbuf)
429    {
430       const char* pos = pbuf;
431       bool range = false;
432       while(*pbuf && *pbuf != ',')
433       {
434           if(*pbuf == '-') range = true;
435           ++pbuf;
436       }
437       if(*pbuf) *pbuf++ = 0;
438       if(!range)
439         ++cpusAvailable;
440       else
441       {
442           int rstart = 0, rend = 0;
443           sscanf(pos, "%d-%d", &rstart, &rend);
444           cpusAvailable += rend - rstart + 1;
445       }
446
447    }
448    return cpusAvailable ? cpusAvailable : 1;
449 }
450 #endif
451
452 int cv::getNumberOfCPUs(void)
453 {
454 #if defined WIN32 || defined _WIN32
455     SYSTEM_INFO sysinfo;
456 #if defined(_M_ARM) || defined(_M_X64) || defined(HAVE_WINRT)
457     GetNativeSystemInfo( &sysinfo );
458 #else
459     GetSystemInfo( &sysinfo );
460 #endif
461
462     return (int)sysinfo.dwNumberOfProcessors;
463 #elif defined ANDROID
464     static int ncpus = getNumberOfCPUsImpl();
465     return ncpus;
466 #elif defined __linux__
467     return (int)sysconf( _SC_NPROCESSORS_ONLN );
468 #elif defined __APPLE__
469     int numCPU=0;
470     int mib[4];
471     size_t len = sizeof(numCPU);
472
473     /* set the mib for hw.ncpu */
474     mib[0] = CTL_HW;
475     mib[1] = HW_AVAILCPU;  // alternatively, try HW_NCPU;
476
477     /* get the number of CPUs from the system */
478     sysctl(mib, 2, &numCPU, &len, NULL, 0);
479
480     if( numCPU < 1 )
481     {
482         mib[1] = HW_NCPU;
483         sysctl( mib, 2, &numCPU, &len, NULL, 0 );
484
485         if( numCPU < 1 )
486             numCPU = 1;
487     }
488
489     return (int)numCPU;
490 #else
491     return 1;
492 #endif
493 }
494
495 const char* cv::currentParallelFramework() {
496 #ifdef CV_PARALLEL_FRAMEWORK
497     return CV_PARALLEL_FRAMEWORK;
498 #else
499     return NULL;
500 #endif
501 }
502
503 CV_IMPL void cvSetNumThreads(int nt)
504 {
505     cv::setNumThreads(nt);
506 }
507
508 CV_IMPL int cvGetNumThreads()
509 {
510     return cv::getNumThreads();
511 }
512
513 CV_IMPL int cvGetThreadNum()
514 {
515     return cv::getThreadNum();
516 }