1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
45 #if defined WIN32 || defined WINCE
53 #if defined __linux__ || defined __APPLE__
56 #include <sys/types.h>
58 #include <sys/sysconf.h>
60 #include <sys/sysctl.h>
72 #if defined _MSC_VER && _MSC_VER >= 1600
73 #define HAVE_CONCURRENCY
76 /* IMPORTANT: always use the same order of defines
77 1. HAVE_TBB - 3rdparty library, should be explicitly enabled
78 2. HAVE_CSTRIPES - 3rdparty library, should be explicitly enabled
79 3. HAVE_OPENMP - integrated to compiler, should be explicitly enabled
80 4. HAVE_GCD - system wide, used automatically (APPLE only)
81 5. HAVE_CONCURRENCY - part of runtime, used automatically (Windows only - MSVS 10, MSVS 11)
85 #include "tbb/tbb_stddef.h"
86 #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
89 #if TBB_INTERFACE_VERSION >= 6100
90 #include "tbb/task_arena.h"
96 #endif // end TBB version
100 #if defined HAVE_CSTRIPES
103 #elif defined HAVE_OPENMP
105 #elif defined HAVE_GCD
106 #include <dispatch/dispatch.h>
108 #elif defined HAVE_CONCURRENCY
113 #if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
114 # define CV_PARALLEL_FRAMEWORK "tbb"
115 #elif defined HAVE_CSTRIPES
116 # define CV_PARALLEL_FRAMEWORK "cstripes"
117 #elif defined HAVE_OPENMP
118 # define CV_PARALLEL_FRAMEWORK "openmp"
119 #elif defined HAVE_GCD
120 # define CV_PARALLEL_FRAMEWORK "gcd"
121 #elif defined HAVE_CONCURRENCY
122 # define CV_PARALLEL_FRAMEWORK "ms-concurrency"
127 ParallelLoopBody::~ParallelLoopBody() {}
132 #ifdef CV_PARALLEL_FRAMEWORK
133 class ParallelLoopBodyWrapper
136 ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
140 double len = wholeRange.end - wholeRange.start;
141 nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
143 void operator()(const cv::Range& sr) const
146 r.start = (int)(wholeRange.start +
147 ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
148 r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
149 ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
152 cv::Range stripeRange() const { return cv::Range(0, nstripes); }
155 const cv::ParallelLoopBody* body;
156 cv::Range wholeRange;
161 class ProxyLoopBody : public ParallelLoopBodyWrapper
164 ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
165 : ParallelLoopBodyWrapper(_body, _r, _nstripes)
168 void operator ()(const tbb::blocked_range<int>& range) const
170 this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
173 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
174 typedef ParallelLoopBodyWrapper ProxyLoopBody;
175 #elif defined HAVE_GCD
176 typedef ParallelLoopBodyWrapper ProxyLoopBody;
177 static void block_function(void* context, size_t index)
179 ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
180 (*ptr_body)(cv::Range((int)index, (int)index + 1));
182 #elif defined HAVE_CONCURRENCY
183 class ProxyLoopBody : public ParallelLoopBodyWrapper
186 ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
187 : ParallelLoopBodyWrapper(_body, _r, _nstripes)
190 void operator ()(int i) const
192 this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
196 typedef ParallelLoopBodyWrapper ProxyLoopBody;
199 static int numThreads = -1;
202 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
203 #elif defined HAVE_CSTRIPES
205 #elif defined HAVE_OPENMP
206 static int numThreadsMax = omp_get_max_threads();
207 #elif defined HAVE_GCD
209 #elif defined HAVE_CONCURRENCY
212 Concurrency::Scheduler* sched_;
214 Concurrency::Scheduler* operator->() { return sched_; }
215 operator Concurrency::Scheduler*() { return sched_; }
217 void operator=(Concurrency::Scheduler* sched)
219 if (sched_) sched_->Release();
223 SchedPtr() : sched_(0) {}
224 ~SchedPtr() { *this = 0; }
226 static SchedPtr pplScheduler;
229 #endif // CV_PARALLEL_FRAMEWORK
233 /* ================================ parallel_for_ ================================ */
235 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
237 #ifdef CV_PARALLEL_FRAMEWORK
241 ProxyLoopBody pbody(body, range, nstripes);
242 cv::Range stripeRange = pbody.stripeRange();
243 if( stripeRange.end - stripeRange.start == 1 )
251 tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
253 #elif defined HAVE_CSTRIPES
255 parallel(MAX(0, numThreads))
257 int offset = stripeRange.start;
258 int len = stripeRange.end - offset;
259 Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
264 #elif defined HAVE_OPENMP
266 #pragma omp parallel for schedule(dynamic)
267 for (int i = stripeRange.start; i < stripeRange.end; ++i)
268 pbody(Range(i, i + 1));
270 #elif defined HAVE_GCD
272 dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
273 dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
275 #elif defined HAVE_CONCURRENCY
277 if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
279 Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
283 pplScheduler->Attach();
284 Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
285 Concurrency::CurrentScheduler::Detach();
290 #error You have hacked and compiling with unsupported parallel framework
297 #endif // CV_PARALLEL_FRAMEWORK
304 int cv::getNumThreads(void)
306 #ifdef CV_PARALLEL_FRAMEWORK
315 return tbbScheduler.is_active()
317 : tbb::task_scheduler_init::default_num_threads();
319 #elif defined HAVE_CSTRIPES
321 return numThreads > 0
323 : cv::getNumberOfCPUs();
325 #elif defined HAVE_OPENMP
327 return omp_get_max_threads();
329 #elif defined HAVE_GCD
331 return 512; // the GCD thread pool limit
333 #elif defined HAVE_CONCURRENCY
335 return 1 + (pplScheduler == 0
336 ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
337 : pplScheduler->GetNumberOfVirtualProcessors());
346 void cv::setNumThreads( int threads )
349 #ifdef CV_PARALLEL_FRAMEWORK
350 numThreads = threads;
355 if(tbbScheduler.is_active()) tbbScheduler.terminate();
356 if(threads > 0) tbbScheduler.initialize(threads);
358 #elif defined HAVE_CSTRIPES
360 return; // nothing needed
362 #elif defined HAVE_OPENMP
364 if(omp_in_parallel())
365 return; // can't change number of openmp threads inside a parallel region
367 omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
369 #elif defined HAVE_GCD
372 // there is only private dispatch_queue_set_width() and only for desktop
374 #elif defined HAVE_CONCURRENCY
380 else if (threads == 1)
382 // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
385 else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
387 pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
388 Concurrency::MinConcurrency, threads-1,
389 Concurrency::MaxConcurrency, threads-1));
396 int cv::getThreadNum(void)
399 #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
400 return tbb::task_arena::current_slot();
404 #elif defined HAVE_CSTRIPES
406 #elif defined HAVE_OPENMP
407 return omp_get_thread_num();
408 #elif defined HAVE_GCD
409 return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
410 #elif defined HAVE_CONCURRENCY
411 return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
418 static inline int getNumberOfCPUsImpl()
420 FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r");
424 char buf[2000]; //big enough for 1000 CPUs in worst possible configuration
425 char* pbuf = fgets(buf, sizeof(buf), cpuPossible);
430 //parse string of form "0-1,3,5-7,10,13-15"
431 int cpusAvailable = 0;
435 const char* pos = pbuf;
437 while(*pbuf && *pbuf != ',')
439 if(*pbuf == '-') range = true;
442 if(*pbuf) *pbuf++ = 0;
447 int rstart = 0, rend = 0;
448 sscanf(pos, "%d-%d", &rstart, &rend);
449 cpusAvailable += rend - rstart + 1;
453 return cpusAvailable ? cpusAvailable : 1;
457 int cv::getNumberOfCPUs(void)
459 #if defined WIN32 || defined _WIN32
461 #if defined(_M_ARM) || defined(_M_X64) || defined(HAVE_WINRT)
462 GetNativeSystemInfo( &sysinfo );
464 GetSystemInfo( &sysinfo );
467 return (int)sysinfo.dwNumberOfProcessors;
468 #elif defined ANDROID
469 static int ncpus = getNumberOfCPUsImpl();
471 #elif defined __linux__
472 return (int)sysconf( _SC_NPROCESSORS_ONLN );
473 #elif defined __APPLE__
476 size_t len = sizeof(numCPU);
478 /* set the mib for hw.ncpu */
480 mib[1] = HW_AVAILCPU; // alternatively, try HW_NCPU;
482 /* get the number of CPUs from the system */
483 sysctl(mib, 2, &numCPU, &len, NULL, 0);
488 sysctl( mib, 2, &numCPU, &len, NULL, 0 );
500 const char* cv::currentParallelFramework() {
501 #ifdef CV_PARALLEL_FRAMEWORK
502 return CV_PARALLEL_FRAMEWORK;
508 CV_IMPL void cvSetNumThreads(int nt)
510 cv::setNumThreads(nt);
513 CV_IMPL int cvGetNumThreads()
515 return cv::getNumThreads();
518 CV_IMPL int cvGetThreadNum()
520 return cv::getThreadNum();