From: Vadim Pisarevsky Date: Tue, 17 Dec 2013 10:29:30 +0000 (+0400) Subject: almost finished opencl-ization of cascade classifier X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~777^2~2^2~4 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9d3e7e027aa24039bff387188373eac21254e2eb;p=profile%2Fivi%2Fopencv.git almost finished opencl-ization of cascade classifier --- diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index f253594..b91d677 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -402,6 +402,61 @@ public: int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this; } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, + const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11, + const _Tp12& a12) + { + int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); + i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11); + set(i, a12); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, + const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11, + const _Tp12& a12, const _Tp13& a13) + { + int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); + i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11); + i = set(i, a12); set(i, a13); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, + const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11, + const _Tp12& a12, const _Tp13& a13, const _Tp14& a14) + { + int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); + i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11); + i = set(i, a12); i = set(i, a13); set(i, a14); return *this; + } + + template + Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, + const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7, + const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11, + const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15) + { + int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5); + i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11); + i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this; + } bool run(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue& q=Queue()); diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 25f9797..25af90e 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1075,6 +1075,11 @@ CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), bool normalize = true, int borderType = BORDER_DEFAULT ); + +CV_EXPORTS_W void sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth, + Size ksize, Point anchor = Point(-1, -1), + bool normalize = true, + int borderType = BORDER_DEFAULT ); //! a synonym for normalized box filter CV_EXPORTS_W void blur( InputArray src, OutputArray dst, diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 2e2eabf..069d6f7 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -741,6 +741,114 @@ void cv::blur( InputArray src, OutputArray dst, boxFilter( src, dst, -1, ksize, anchor, true, borderType ); } + +/****************************************************************************************\ + Squared Box Filter +\****************************************************************************************/ + +namespace cv +{ + +template struct SqrRowSum : public BaseRowFilter +{ + SqrRowSum( int _ksize, int _anchor ) + { + ksize = _ksize; + anchor = _anchor; + } + + void operator()(const uchar* src, uchar* dst, int width, int cn) + { + const T* S = (const T*)src; + ST* D = (ST*)dst; + int i = 0, k, ksz_cn = ksize*cn; + + width = (width - 1)*cn; + for( k = 0; k < cn; k++, S++, D++ ) + { + ST s = 0; + for( i = 0; i < ksz_cn; i += cn ) + { + ST val = (ST)S[i]; + s += val*val; + } + D[0] = s; + for( i = 0; i < width; i += cn ) + { + ST val0 = (ST)S[i], val1 = (ST)S[i + ksz_cn]; + s += val1*val1 - val0*val0; + D[i+cn] = s; + } + } + } +}; + +static Ptr getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor) +{ + int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType); + CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) ); + + if( anchor < 0 ) + anchor = ksize/2; + + if( sdepth == CV_8U && ddepth == CV_32S ) + return makePtr >(ksize, anchor); + if( sdepth == CV_8U && ddepth == CV_64F ) + return makePtr >(ksize, anchor); + if( sdepth == CV_16U && ddepth == CV_64F ) + return makePtr >(ksize, anchor); + if( sdepth == CV_16S && ddepth == CV_64F ) + return makePtr >(ksize, anchor); + if( sdepth == CV_32F && ddepth == CV_64F ) + return makePtr >(ksize, anchor); + if( sdepth == CV_64F && ddepth == CV_64F ) + return makePtr >(ksize, anchor); + + CV_Error_( CV_StsNotImplemented, + ("Unsupported combination of source format (=%d), and buffer format (=%d)", + srcType, sumType)); + + return Ptr(); +} + +} + +void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth, + Size ksize, Point anchor, + bool normalize, int borderType ) +{ + Mat src = _src.getMat(); + int sdepth = src.depth(), cn = src.channels(); + if( ddepth < 0 ) + ddepth = sdepth < CV_32F ? CV_32F : CV_64F; + _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) ); + Mat dst = _dst.getMat(); + if( borderType != BORDER_CONSTANT && normalize ) + { + if( src.rows == 1 ) + ksize.height = 1; + if( src.cols == 1 ) + ksize.width = 1; + } + + int sumType = CV_64F; + if( sdepth == CV_8U ) + sumType = CV_32S; + sumType = CV_MAKETYPE( sumType, cn ); + int srcType = CV_MAKETYPE(sdepth, cn); + int dstType = CV_MAKETYPE(ddepth, cn); + + Ptr rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x ); + Ptr columnFilter = getColumnSumFilter(sumType, + dstType, ksize.height, anchor.y, + normalize ? 1./(ksize.width*ksize.height) : 1); + + Ptr f = makePtr(Ptr(), rowFilter, columnFilter, + srcType, dstType, sumType, borderType ); + f->apply( src, dst ); +} + + /****************************************************************************************\ Gaussian Blur \****************************************************************************************/ diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index 631264c..0d57921 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -135,7 +135,7 @@ public: virtual Ptr clone() const; virtual int getFeatureType() const; - virtual bool setImage(InputArray img, Size origWinSize); + virtual bool setImage(InputArray img, Size origWinSize, Size sumSize); virtual bool setWindow(Point p); virtual double calcOrd(int featureIdx) const; diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 41c0e65..c87a4e9 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -112,6 +112,13 @@ struct Logger namespace cv { + +template void copyVectorToUMat(const std::vector<_Tp>& v, UMat& um) +{ + if(v.empty()) + um.release(); + Mat(1, (int)(v.size()*sizeof(v[0])), CV_8U, (void*)&v[0]).copyTo(um); +} void groupRectangles(std::vector& rectList, int groupThreshold, double eps, std::vector* weights, std::vector* levelWeights) { @@ -434,7 +441,7 @@ FeatureEvaluator::~FeatureEvaluator() {} bool FeatureEvaluator::read(const FileNode&) {return true;} Ptr FeatureEvaluator::clone() const { return Ptr(); } int FeatureEvaluator::getFeatureType() const {return -1;} -bool FeatureEvaluator::setImage(InputArray, Size) {return true;} +bool FeatureEvaluator::setImage(InputArray, Size, Size) {return true;} bool FeatureEvaluator::setWindow(Point) { return true; } double FeatureEvaluator::calcOrd(int) const { return 0.; } int FeatureEvaluator::calcCat(int) const { return 0; } @@ -468,7 +475,6 @@ HaarEvaluator::HaarEvaluator() { optfeaturesPtr = 0; pwin = 0; - pqwin = 0; } HaarEvaluator::~HaarEvaluator() { @@ -478,10 +484,16 @@ bool HaarEvaluator::read(const FileNode& node) { size_t i, n = node.size(); CV_Assert(n > 0); + if(features.empty()) + features = makePtr >(); + if(optfeatures.empty()) + optfeatures = makePtr >(); features->resize(n); FileNodeIterator it = node.begin(); hasTiltedFeatures = false; std::vector ff = *features; + sumSize0 = Size(); + ufbuf.release(); for(i = 0; i < n; i++, ++it) { @@ -502,59 +514,91 @@ Ptr HaarEvaluator::clone() const ret->optfeaturesPtr = optfeatures->empty() ? 0 : &(*(ret->optfeatures))[0]; ret->hasTiltedFeatures = hasTiltedFeatures; ret->sum0 = sum0; ret->sqsum0 = sqsum0; - ret->sum = sum; ret->sqsum = sqsum; ret->tilted = tilted; + ret->sum = sum; ret->sqsum = sqsum; + ret->usum0 = usum0; ret->usqsum0 = usqsum0; ret->ufbuf = ufbuf; ret->normrect = normrect; memcpy( ret->nofs, nofs, 4*sizeof(nofs[0]) ); - memcpy( ret->nqofs, nqofs, 4*sizeof(nqofs[0]) ); - ret->pwin = pwin; ret->pqwin = pqwin; + ret->pwin = pwin; ret->varianceNormFactor = varianceNormFactor; return ret; } -bool HaarEvaluator::setImage( InputArray _image, Size _origWinSize ) +bool HaarEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize ) { Size imgsz = _image.size(); - int rn = imgsz.height+1, cn = imgsz.width+1, rnt = rn; - origWinSize = _origWinSize; - normrect = Rect(1, 1, origWinSize.width-2, origWinSize.height-2); - + int cols = imgsz.width, rows = imgsz.height; + if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height) return false; - - if( hasTiltedFeatures ) - rnt = rn*2; - if( sum0.rows < rnt || sum0.cols < cn ) + origWinSize = _origWinSize; + normrect = Rect(1, 1, origWinSize.width-2, origWinSize.height-2); + + int rn = _sumSize.height, cn = _sumSize.width, rn_scale = hasTiltedFeatures ? 2 : 1; + int sumStep, tofs = 0; + CV_Assert(rn >= rows+1 && cn >= cols+1); + + if( _image.isUMat() ) { - sum0.create(rnt, cn, CV_32S); - sqsum0.create(rn, cn, CV_64F); + usum0.create(rn*rn_scale, cn, CV_32S); + usqsum0.create(rn, cn, CV_32S); + usum = UMat(usum0, Rect(0, 0, cols+1, rows+1)); + usqsum = UMat(usqsum0, Rect(0, 0, cols, rows)); + + if( hasTiltedFeatures ) + { + UMat utilted(usum0, Rect(0, _sumSize.height, cols+1, rows+1)); + integral(_image, usum, noArray(), utilted, CV_32S); + tofs = (int)((utilted.offset - usum.offset)/sizeof(int)); + } + else + integral(_image, usum, noArray(), noArray(), CV_32S); + sqrBoxFilter(_image, usqsum, CV_32S, + Size(normrect.width, normrect.height), + Point(0, 0), false); + sumStep = (int)(usum.step/usum.elemSize()); } - sum = Mat(rn, cn, CV_32S, sum0.data); - sqsum = Mat(rn, cn, CV_64F, sqsum0.data); - if( hasTiltedFeatures ) + else { - tilted = Mat(rn, cn, CV_32S, sum0.data + rn*sum.step); - integral(_image, sum, sqsum, tilted); + sum0.create(rn*rn_scale, cn, CV_32S); + sqsum0.create(rn, cn, CV_32S); + sum = sum0(Rect(0, 0, cols+1, rows+1)); + sqsum = sqsum0(Rect(0, 0, cols, rows)); + + if( hasTiltedFeatures ) + { + Mat tilted = sum0(Rect(0, _sumSize.height, cols+1, rows+1)); + integral(_image, sum, noArray(), tilted, CV_32S); + tofs = (int)((tilted.data - sum.data)/sizeof(int)); + } + else + integral(_image, sum, noArray(), noArray(), CV_32S); + sqrBoxFilter(_image, sqsum, CV_32S, + Size(normrect.width, normrect.height), + Point(0, 0), false); + sumStep = (int)(sum.step/sum.elemSize()); } - else - integral(_image, sum, sqsum); - int sumStep = (int)(sum.step/sum.elemSize()); - int sqsumStep = (int)(sqsum.step/sqsum.elemSize()); - int tofs = hasTiltedFeatures ? sumStep*rn : 0; CV_SUM_OFS( nofs[0], nofs[1], nofs[2], nofs[3], 0, normrect, sumStep ); - CV_SUM_OFS( nqofs[0], nqofs[1], nqofs[2], nqofs[3], 0, normrect, sqsumStep ); size_t fi, nfeatures = features->size(); - - optfeatures->resize(nfeatures); - optfeaturesPtr = &(*optfeatures)[0]; const std::vector& ff = *features; - for( fi = 0; fi < nfeatures; fi++ ) - optfeaturesPtr[fi].setOffsets( ff[fi], sumStep, tofs ); + + if( sumSize0 != _sumSize ) + { + optfeatures->resize(nfeatures); + optfeaturesPtr = &(*optfeatures)[0]; + for( fi = 0; fi < nfeatures; fi++ ) + optfeaturesPtr[fi].setOffsets( ff[fi], sumStep, tofs ); + } + if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) ) + copyVectorToUMat(ff, ufbuf); + sumSize0 = _sumSize; + return true; } + bool HaarEvaluator::setWindow( Point pt ) { if( pt.x < 0 || pt.y < 0 || @@ -563,9 +607,8 @@ bool HaarEvaluator::setWindow( Point pt ) return false; const int* p = &sum.at(pt); - const double* pq = &sqsum.at(pt); int valsum = CALC_SUM_OFS(nofs, p); - double valsqsum = CALC_SUM_OFS(nqofs, pq); + double valsqsum = sqsum.at(pt.y + normrect.y, pt.x + normrect.x); double nf = (double)normrect.area() * valsqsum - (double)valsum * valsum; if( nf > 0. ) @@ -577,6 +620,19 @@ bool HaarEvaluator::setWindow( Point pt ) return true; } + +Rect HaarEvaluator::getNormRect() const +{ + return normrect; +} + +void HaarEvaluator::getUMats(std::vector& bufs) +{ + bufs.clear(); + bufs.push_back(usum); + bufs.push_back(usqsum); + bufs.push_back(ufbuf); +} //---------------------------------------------- LBPEvaluator ------------------------------------- bool LBPEvaluator::Feature :: read(const FileNode& node ) @@ -620,7 +676,7 @@ Ptr LBPEvaluator::clone() const return ret; } -bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize ) +bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size ) { Mat image = _image.getMat(); int rn = image.rows+1, cn = image.cols+1; @@ -702,7 +758,7 @@ Ptr HOGEvaluator::clone() const return ret; } -bool HOGEvaluator::setImage( InputArray _image, Size winSize ) +bool HOGEvaluator::setImage( InputArray _image, Size winSize, Size ) { Mat image = _image.getMat(); int rows = image.rows + 1; @@ -914,11 +970,6 @@ int CascadeClassifierImpl::runAt( Ptr& evaluator, Point pt, do } } -bool CascadeClassifierImpl::setImage( Ptr& evaluator, const Mat& image ) -{ - return empty() ? false : evaluator->setImage(image, data.origWinSize); -} - void CascadeClassifierImpl::setMaskGenerator(const Ptr& _maskGenerator) { maskGenerator=_maskGenerator; @@ -1022,9 +1073,10 @@ struct getNeighbors { int operator ()(const CvAvgComp& e) const { return e.neigh bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processingRectSize, int yStep, double factor, std::vector& candidates, - std::vector& levels, std::vector& weights, bool outputRejectLevels ) + std::vector& levels, std::vector& weights, + Size sumSize0, bool outputRejectLevels ) { - if( !featureEvaluator->setImage( _image, data.origWinSize ) ) + if( !featureEvaluator->setImage(_image, data.origWinSize, sumSize0) ) return false; #if defined (LOG_CASCADE_STATISTIC) @@ -1071,13 +1123,16 @@ bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processin bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize, - int yStep, double factor, std::vector& candidates, - std::vector&, std::vector&, bool ) + int yStep, double factor, Size sumSize0 ) { + const int MAX_FACES = 10000; + Ptr haar = featureEvaluator.dynamicCast(); if( haar.empty() ) return false; + haar->setImage(_image, data.origWinSize, sumSize0); + if( cascadeKernel.empty() ) { //cascadeKernel.create(") @@ -1087,25 +1142,21 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce if( ustages.empty() ) { - #define UPLOAD_CASCADE_PART(NAME) \ - Mat(1, (int)(data.NAME.size()*sizeof(data.NAME[0])), CV_8U, &data.NAME[0]).copyTo(u##NAME) - - UPLOAD_CASCADE_PART(stages); - UPLOAD_CASCADE_PART(classifiers); - UPLOAD_CASCADE_PART(nodes); - UPLOAD_CASCADE_PART(leaves); - ufacepos.create(); + copyVectorToUMat(data.stages, ustages); + copyVectorToUMat(data.classifiers, uclassifiers); + copyVectorToUMat(data.nodes, unodes); + copyVectorToUMat(data.leaves, uleaves); + ufacepos.create(1, MAX_FACES*4 + 1, CV_32S); } - haar->setUMat(_image, data.origWinSize, ugrayImage.size()); std::vector bufs; haar->getUMats(bufs); - CV_Assert(bufs.size() == 5); + CV_Assert(bufs.size() == 3); - size_t globalsize[] = { processingRectSize.width, processingRectSize.height }; + size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep }; - if(!cascadeKernel.args(ocl::KernelArg::PtrReadOnly(bufs[0]), // sum - ocl::KernelArg::PtrReadOnly(bufs[1]), // sqsum + return cascadeKernel.args(ocl::KernelArg::ReadOnly(bufs[0]), // sum + ocl::KernelArg::ReadOnly(bufs[1]), // sqsum ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures // cascade classifier @@ -1115,30 +1166,17 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce ocl::KernelArg::PtrReadOnly(uleaves), ocl::KernelArg::WriteOnly(ufacepos), // positions - ocl::KernelArg::ReadWrite(umisc), + ocl::KernelArg::PtrReadOnly(uparams), processingRectSize.width, - processingRectSize.height).run(2, globalsize, 0, false)) - return false; - - Mat facepos = ufacepos.getMat(ACCESS_READ); - const int* fptr = facepos.ptr(); - int nfaces = fptr[0]; - for( i = 0; i < nfaces; i++ ) - { - int pos = fptr[i+1]; - int x = - candidates.push_back(Rect() - return false; + processingRectSize.height, + yStep, (float)factor, MAX_FACES).run(2, globalsize, 0, false); } - - bool CascadeClassifierImpl::isOldFormatCascade() const { return !oldCascade.empty(); } - int CascadeClassifierImpl::getFeatureType() const { return featureEvaluator->getFeatureType(); @@ -1149,12 +1187,6 @@ Size CascadeClassifierImpl::getOriginalWindowSize() const return data.origWinSize; } -bool CascadeClassifierImpl::setImage(InputArray _image) -{ - Mat image = _image.getMat(); - return featureEvaluator->setImage(image, data.origWinSize); -} - void* CascadeClassifierImpl::getOldCascade() { return oldCascade; @@ -1196,12 +1228,12 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: if( maxObjectSize.height == 0 || maxObjectSize.width == 0 ) maxObjectSize = imgsz; - bool use_ocl = ocl::useOpenCL() && + bool use_ocl = false;/*ocl::useOpenCL() && getFeatureType() == FeatureEvaluator::HAAR && !isOldFormatCascade() && maskGenerator.empty() && !outputRejectLevels && - tryOpenCL; + tryOpenCL;*/ if( !use_ocl ) { @@ -1228,6 +1260,8 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: uimage.copyTo(ugrayImage); uimageBuffer.create(imgsz.height + 1, imgsz.width + 1, CV_8U); } + + Size sumSize0((imgsz.width + SUM_ALIGN) & -SUM_ALIGN, imgsz.height+1); for( double factor = 1; ; factor *= scaleFactor ) { @@ -1260,8 +1294,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: UMat uscaledImage(uimageBuffer, Rect(0, 0, scaledImageSize.width, scaledImageSize.height)); resize( ugrayImage, uscaledImage, scaledImageSize, 0, 0, INTER_LINEAR ); - if( ocl_detectSingleScale( uscaledImage, processingRectSize, yStep, factor, candidates, - rejectLevels, levelWeights, outputRejectLevels ) ) + if( ocl_detectSingleScale( uscaledImage, processingRectSize, yStep, factor, sumSize0 ) ) continue; /////// if the OpenCL branch has been executed but failed, fall back to CPU: ///// @@ -1282,10 +1315,21 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: resize( grayImage, scaledImage, scaledImageSize, 0, 0, INTER_LINEAR ); if( !detectSingleScale( scaledImage, processingRectSize, yStep, factor, candidates, - rejectLevels, levelWeights, outputRejectLevels ) ) + rejectLevels, levelWeights, sumSize0, outputRejectLevels ) ) break; } } + + if( use_ocl && tryOpenCL ) + { + Mat facepos = ufacepos.getMat(ACCESS_READ); + const int* fptr = facepos.ptr(); + int i, nfaces = fptr[0]; + for( i = 0; i < nfaces; i++ ) + { + candidates.push_back(Rect(fptr[i*4+1], fptr[i*4+2], fptr[i*4+3], fptr[i*4+4])); + } + } } void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector& objects, diff --git a/modules/objdetect/src/cascadedetect.hpp b/modules/objdetect/src/cascadedetect.hpp index e2a9ffa..9841748 100644 --- a/modules/objdetect/src/cascadedetect.hpp +++ b/modules/objdetect/src/cascadedetect.hpp @@ -42,21 +42,20 @@ public: bool isOldFormatCascade() const; Size getOriginalWindowSize() const; int getFeatureType() const; - bool setImage( InputArray ); void* getOldCascade(); void setMaskGenerator(const Ptr& maskGenerator); Ptr getMaskGenerator(); protected: + enum { SUM_ALIGN = 16 }; + bool detectSingleScale( InputArray image, Size processingRectSize, int yStep, double factor, std::vector& candidates, std::vector& rejectLevels, std::vector& levelWeights, - bool outputRejectLevels = false ); + Size sumSize0, bool outputRejectLevels = false ); bool ocl_detectSingleScale( InputArray image, Size processingRectSize, - int yStep, double factor, std::vector& candidates, - std::vector& rejectLevels, std::vector& levelWeights, - bool outputRejectLevels = false ); + int yStep, double factor, Size sumSize0 ); void detectMultiScaleNoGrouping( InputArray image, std::vector& candidates, @@ -86,7 +85,6 @@ protected: template friend int predictCategoricalStump( CascadeClassifierImpl& cascade, Ptr &featureEvaluator, double& weight); - bool setImage( Ptr& feval, const Mat& image); int runAt( Ptr& feval, Point pt, double& weight ); class Data @@ -134,7 +132,7 @@ protected: Ptr maskGenerator; UMat ugrayImage, uimageBuffer; - UMat ufacepos, ustages, uclassifiers, unodes, uleaves, usubsets; + UMat ufacepos, ustages, uclassifiers, unodes, uleaves, usubsets, uparams; ocl::Kernel cascadeKernel; bool tryOpenCL; @@ -270,10 +268,9 @@ public: virtual Ptr clone() const; virtual int getFeatureType() const { return FeatureEvaluator::HAAR; } - virtual bool setImage(InputArray, Size origWinSize); + virtual bool setImage(InputArray, Size origWinSize, Size sumSize); virtual bool setWindow(Point pt); - - virtual bool setUMat(InputArray, Size origWinSize, Size origImgSize); + virtual Rect getNormRect() const; virtual void getUMats(std::vector& bufs); double operator()(int featureIdx) const @@ -282,22 +279,19 @@ public: { return (*this)(featureIdx); } protected: - Size origWinSize, origImgSize; + Size origWinSize, sumSize0; Ptr > features; Ptr > optfeatures; OptFeature* optfeaturesPtr; // optimization bool hasTiltedFeatures; - Mat sum0, sqsum0; - Mat sum, sqsum, tilted; - UMat usum, usqsum, fbuf; + Mat sum0, sum, sqsum0, sqsum; + UMat usum0, usum, usqsum0, usqsum, ufbuf; Rect normrect; int nofs[4]; - int nqofs[4]; const int* pwin; - const double* pqwin; double varianceNormFactor; }; @@ -376,7 +370,7 @@ public: virtual Ptr clone() const; virtual int getFeatureType() const { return FeatureEvaluator::LBP; } - virtual bool setImage(InputArray image, Size _origWinSize); + virtual bool setImage(InputArray image, Size _origWinSize, Size); virtual bool setWindow(Point pt); int operator()(int featureIdx) const @@ -453,7 +447,7 @@ public: virtual bool read( const FileNode& node ); virtual Ptr clone() const; virtual int getFeatureType() const { return FeatureEvaluator::HOG; } - virtual bool setImage( InputArray image, Size winSize ); + virtual bool setImage( InputArray image, Size winSize, Size ); virtual bool setWindow( Point pt ); double operator()(int featureIdx) const {