namespace cv
{
- ParallelLoopBody::~ParallelLoopBody() { }
+ class ParallelLoopBodyWrapper
+ {
+ public:
+ ParallelLoopBodyWrapper(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
+ {
+ body = &_body;
+ wholeRange = _r;
+ double len = wholeRange.end - wholeRange.start;
+ nstripes = cvRound(_nstripes < 0 ? len : MIN(MAX(_nstripes, 1.), len));
+ }
+ void operator()(const Range& sr) const
+ {
+ Range r;
+ r.start = (int)(wholeRange.start +
+ ((size_t)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
+ r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
+ ((size_t)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
+ (*body)(r);
+ }
+ Range stripeRange() const { return Range(0, nstripes); }
-#ifdef HAVE_TBB
- class TbbProxyLoopBody
+ protected:
+ const ParallelLoopBody* body;
+ Range wholeRange;
+ int nstripes;
+ };
+
+ ParallelLoopBody::~ParallelLoopBody() {}
+
+#if defined HAVE_TBB
+ class ProxyLoopBody : public ParallelLoopBodyWrapper
{
public:
- TbbProxyLoopBody(const ParallelLoopBody& _body) :
- body(&_body)
- { }
+ ProxyLoopBody(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
+ : ParallelLoopBodyWrapper(_body, _r, _nstripes)
+ {}
void operator ()(const tbb::blocked_range<int>& range) const
{
- body->operator()(Range(range.begin(), range.end()));
+ (*this)(Range(range.begin(), range.end()));
}
-
- private:
- const ParallelLoopBody* body;
};
-#endif // end HAVE_TBB
+#elif defined HAVE_GCD
-#ifdef HAVE_GCD
+ typedef ParallelLoopBodyWrapper ProxyLoopBody;
static
void block_function(void* context, size_t index)
{
- ParallelLoopBody* ptr_body = static_cast<ParallelLoopBody*>(context);
- ptr_body->operator()(Range(index, index + 1));
+ ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
+ (*ptr_body)(Range(index, index + 1));
}
-#endif // HAVE_GCD
+#elif defined HAVE_CONCURRENCY
+ class ProxyLoopBody : public ParallelLoopBodyWrapper
+ {
+ public:
+ ProxyLoopBody(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
+ : ParallelLoopBodyWrapper(_body, _r, _nstripes)
+ {}
+
+ void operator ()(int i) const
+ {
+ (*this)(Range(i, i + 1));
+ }
+ }
+#else
+ typedef ParallelLoopBodyWrapper ProxyLoopBody;
+#endif
- void parallel_for_(const Range& range, const ParallelLoopBody& body)
+ void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes)
{
-#ifdef HAVE_TBB
+ ProxyLoopBody pbody(body, range, nstripes);
+ Range stripeRange = pbody.stripeRange();
+
+#if defined HAVE_TBB
- tbb::parallel_for(tbb::blocked_range<int>(range.start, range.end), TbbProxyLoopBody(body));
+ tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
#elif defined HAVE_CONCURRENCY
- class ConcurrencyProxyLoopBody
- {
- public:
- ConcurrencyProxyLoopBody(const ParallelLoopBody& body) : _body(body) {}
-
- void operator ()(int i) const
- {
- _body(Range(i, i + 1));
- }
-
- private:
- const ParallelLoopBody& _body;
- ConcurrencyProxyLoopBody& operator=(const ConcurrencyProxyLoopBody&) {return *this;}
- } proxy(body);
-
- Concurrency::parallel_for(range.start, range.end, proxy);
+ Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
#elif defined HAVE_OPENMP
#pragma omp parallel for schedule(dynamic)
- for (int i = range.start; i < range.end; ++i)
- body(Range(i, i + 1));
+ for (int i = stripeRange.start; i < stripeRange.end; ++i)
+ pbody(Range(i, i + 1));
#elif defined HAVE_GCD
dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
- dispatch_apply_f(range.end - range.start, concurrent_queue, &const_cast<ParallelLoopBody&>(body), block_function);
+ dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
#elif defined HAVE_CSTRIPES
parallel()
{
- int offset = range.start;
- int len = range.end - offset;
+ int offset = stripeRange.start;
+ int len = stripeRange.end - offset;
Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
- body(r);
+ pbody(r);
barrier();
}
#else
- body(range);
+ pbody(stripeRange);
-#endif // end HAVE_TBB
+#endif
}
} // namespace cv
Range range(0, dsize.height);
resizeNNInvoker invoker(src, dst, x_ofs, pix_size4, ify);
- parallel_for_(range, invoker);
+ parallel_for_(range, invoker, dst.total()/(double)(1<<16));
}
Range range(0, dsize.height);
resizeGeneric_Invoker<HResize, VResize> invoker(src, dst, xofs, yofs, (const AT*)_alpha, beta,
ssize, dsize, ksize, xmin, xmax);
- parallel_for_(range, invoker);
+ parallel_for_(range, invoker, dst.total()/(double)(1<<16));
}
template <typename T, typename WT>
Range range(0, dst.rows);
resizeAreaFast_Invoker<T, WT, VecOp> invoker(src, dst, scale_x,
scale_y, ofs, xofs);
- parallel_for_(range, invoker);
+ parallel_for_(range, invoker, dst.total()/(double)(1<<16));
}
struct DecimateAlpha
const Mat& _fxy, const void* _wtab,
int borderType, const Scalar& _borderValue);
-class remapInvoker :
+class RemapInvoker :
public ParallelLoopBody
{
public:
- remapInvoker(const Mat& _src, Mat _dst, const Mat& _map1, const Mat& _map2, const Mat *_m1,
+ RemapInvoker(const Mat& _src, Mat& _dst, const Mat *_m1,
const Mat *_m2, int _interpolation, int _borderType, const Scalar &_borderValue,
int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
- ParallelLoopBody(), src(_src), dst(_dst), map1(_map1), map2(_map2), m1(_m1), m2(_m2),
+ ParallelLoopBody(), src(&_src), dst(&_dst), m1(_m1), m2(_m2),
interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue),
planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
{
{
int x, y, x1, y1;
const int buf_size = 1 << 14;
- int brows0 = std::min(128, dst.rows), map_depth = map1.depth();
- int bcols0 = std::min(buf_size/brows0, dst.cols);
- brows0 = std::min(buf_size/bcols0, dst.rows);
+ int brows0 = std::min(128, dst->rows), map_depth = m1->depth();
+ int bcols0 = std::min(buf_size/brows0, dst->cols);
+ brows0 = std::min(buf_size/bcols0, dst->rows);
#if CV_SSE2
bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
for( y = range.start; y < range.end; y += brows0 )
{
- for( x = 0; x < dst.cols; x += bcols0 )
+ for( x = 0; x < dst->cols; x += bcols0 )
{
int brows = std::min(brows0, range.end - y);
- int bcols = std::min(bcols0, dst.cols - x);
- Mat dpart(dst, Rect(x, y, bcols, brows));
+ int bcols = std::min(bcols0, dst->cols - x);
+ Mat dpart(*dst, Rect(x, y, bcols, brows));
Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
if( nnfunc )
{
- if( map1.type() == CV_16SC2 && !map2.data ) // the data is already in the right format
- bufxy = map1(Rect(x, y, bcols, brows));
+ if( m1->type() == CV_16SC2 && !m2->data ) // the data is already in the right format
+ bufxy = (*m1)(Rect(x, y, bcols, brows));
else if( map_depth != CV_32F )
{
for( y1 = 0; y1 < brows; y1++ )
}
}
else if( !planar_input )
- map1(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
+ (*m1)(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
else
{
for( y1 = 0; y1 < brows; y1++ )
{
short* XY = (short*)(bufxy.data + bufxy.step*y1);
- const float* sX = (const float*)(map1.data + map1.step*(y+y1)) + x;
- const float* sY = (const float*)(map2.data + map2.step*(y+y1)) + x;
+ const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
+ const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
x1 = 0;
#if CV_SSE2
}
}
}
- nnfunc( src, dpart, bufxy, borderType, borderValue );
+ nnfunc( *src, dpart, bufxy, borderType, borderValue );
continue;
}
short* XY = (short*)(bufxy.data + bufxy.step*y1);
ushort* A = (ushort*)(bufa.data + bufa.step*y1);
- if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ||
- (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1)) )
+ if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
{
- bufxy = m1->operator()(Rect(x, y, bcols, brows));
- bufa = m2->operator()(Rect(x, y, bcols, brows));
+ bufxy = (*m1)(Rect(x, y, bcols, brows));
+ bufa = (*m2)(Rect(x, y, bcols, brows));
}
else if( planar_input )
{
- const float* sX = (const float*)(map1.data + map1.step*(y+y1)) + x;
- const float* sY = (const float*)(map2.data + map2.step*(y+y1)) + x;
+ const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
+ const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
x1 = 0;
#if CV_SSE2
}
else
{
- const float* sXY = (const float*)(map1.data + map1.step*(y+y1)) + x*2;
+ const float* sXY = (const float*)(m1->data + m1->step*(y+y1)) + x*2;
for( x1 = 0; x1 < bcols; x1++ )
{
}
}
}
- ifunc(src, dpart, bufxy, bufa, ctab, borderType, borderValue);
+ ifunc(*src, dpart, bufxy, bufa, ctab, borderType, borderValue);
}
}
}
private:
- Mat src;
- Mat dst;
- Mat map1, map2;
+ const Mat* src;
+ Mat* dst;
const Mat *m1, *m2;
int interpolation, borderType;
Scalar borderValue;
const Mat *m1 = &map1, *m2 = &map2;
- if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ||
- (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1)) )
+ if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1 || !map2.data)) ||
+ (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1 || !map1.data)) )
{
if( map1.type() != CV_16SC2 )
std::swap(m1, m2);
planar_input = map1.channels() == 1;
}
- Range range(0, dst.rows);
- remapInvoker invoker(src, dst, map1, map2, m1, m2, interpolation,
+ RemapInvoker invoker(src, dst, m1, m2, interpolation,
borderType, borderValue, planar_input, nnfunc, ifunc,
ctab);
- parallel_for_(range, invoker);
+ parallel_for_(Range(0, dst.rows), invoker, dst.total()/(double)(1<<16));
}
Range range(0, dst.rows);
warpAffineInvoker invoker(src, dst, interpolation, borderType,
borderValue, adelta, bdelta, M);
- parallel_for_(range, invoker);
+ parallel_for_(range, invoker, dst.total()/(double)(1<<16));
}
Range range(0, dst.rows);
warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
- parallel_for_(range, invoker);
+ parallel_for_(range, invoker, dst.total()/(double)(1<<16));
}
class ThresholdRunner : public ParallelLoopBody
{
public:
- ThresholdRunner(Mat _src, Mat _dst, int _nStripes, double _thresh, double _maxval, int _thresholdType)
+ ThresholdRunner(Mat _src, Mat _dst, double _thresh, double _maxval, int _thresholdType)
{
src = _src;
dst = _dst;
- nStripes = _nStripes;
-
thresh = _thresh;
maxval = _maxval;
thresholdType = _thresholdType;
void operator () ( const Range& range ) const
{
- int row0 = std::min(cvRound(range.start * src.rows / nStripes), src.rows);
- int row1 = range.end >= nStripes ? src.rows :
- std::min(cvRound(range.end * src.rows / nStripes), src.rows);
-
- /*if(0)
- printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
- src.rows, src.cols, range.begin(), range.end(), row0, row1);*/
+ int row0 = range.start;
+ int row1 = range.end;
Mat srcStripe = src.rowRange(row0, row1);
Mat dstStripe = dst.rowRange(row0, row1);
else
CV_Error( CV_StsUnsupportedFormat, "" );
- size_t nStripes = (src.total() + (1<<15)) >> 16;
- nStripes = MAX(MIN(nStripes, (size_t)4), (size_t)1);
- parallel_for_(Range(0, (int)nStripes),
- ThresholdRunner(src, dst, nStripes, thresh, maxval, type));
+ parallel_for_(Range(0, dst.rows),
+ ThresholdRunner(src, dst, thresh, maxval, type),
+ dst.total()/(double)(1<<16));
return thresh;
}