From b7553d4e2eddc5645023a94a850fe092dd9f223e Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 19 Dec 2013 14:09:44 +0400 Subject: [PATCH] some attempts to tune the performance --- data/haarcascades/haarcascade_eye.xml | 1 - .../haarcascade_eye_tree_eyeglasses.xml | 1 - data/haarcascades/haarcascade_frontalface_alt.xml | 1 - data/haarcascades/haarcascade_frontalface_alt2.xml | 1 - .../haarcascade_frontalface_alt_tree.xml | 1 - .../haarcascade_frontalface_default.xml | 1 - data/haarcascades/haarcascade_fullbody.xml | 1 - data/haarcascades/haarcascade_lefteye_2splits.xml | 1 - data/haarcascades/haarcascade_lowerbody.xml | 1 - data/haarcascades/haarcascade_mcs_eyepair_big.xml | 1 - .../haarcascades/haarcascade_mcs_eyepair_small.xml | 1 - data/haarcascades/haarcascade_mcs_leftear.xml | 1 - data/haarcascades/haarcascade_mcs_lefteye.xml | 1 - data/haarcascades/haarcascade_mcs_mouth.xml | 1 - data/haarcascades/haarcascade_mcs_nose.xml | 1 - data/haarcascades/haarcascade_mcs_rightear.xml | 1 - data/haarcascades/haarcascade_mcs_righteye.xml | 1 - data/haarcascades/haarcascade_mcs_upperbody.xml | 1 - data/haarcascades/haarcascade_profileface.xml | 1 - data/haarcascades/haarcascade_righteye_2splits.xml | 1 - data/haarcascades/haarcascade_smile.xml | 1 - data/haarcascades/haarcascade_upperbody.xml | 1 - modules/objdetect/src/cascadedetect.cpp | 56 ++++--- modules/objdetect/src/cascadedetect.hpp | 96 ++++++------ modules/objdetect/src/cascadedetect_convert.cpp | 1 - modules/objdetect/src/opencl/haarobjectdetect.cl | 162 ++++++++++++--------- 26 files changed, 187 insertions(+), 150 deletions(-) diff --git a/data/haarcascades/haarcascade_eye.xml b/data/haarcascades/haarcascade_eye.xml index 85e64fb..b21e3b9 100644 --- a/data/haarcascades/haarcascade_eye.xml +++ b/data/haarcascades/haarcascade_eye.xml @@ -48,7 +48,6 @@ 20 20 - 0 93 0 diff --git a/data/haarcascades/haarcascade_eye_tree_eyeglasses.xml b/data/haarcascades/haarcascade_eye_tree_eyeglasses.xml index afc72b3..6813d24 100644 --- a/data/haarcascades/haarcascade_eye_tree_eyeglasses.xml +++ b/data/haarcascades/haarcascade_eye_tree_eyeglasses.xml @@ -48,7 +48,6 @@ 20 20 - 0 47 0 diff --git a/data/haarcascades/haarcascade_frontalface_alt.xml b/data/haarcascades/haarcascade_frontalface_alt.xml index c09a7c7..ade4b21 100644 --- a/data/haarcascades/haarcascade_frontalface_alt.xml +++ b/data/haarcascades/haarcascade_frontalface_alt.xml @@ -48,7 +48,6 @@ 20 20 - 0 213 0 diff --git a/data/haarcascades/haarcascade_frontalface_alt2.xml b/data/haarcascades/haarcascade_frontalface_alt2.xml index 28dd0b3..b49cf5d 100644 --- a/data/haarcascades/haarcascade_frontalface_alt2.xml +++ b/data/haarcascades/haarcascade_frontalface_alt2.xml @@ -48,7 +48,6 @@ 20 20 - 0 109 0 diff --git a/data/haarcascades/haarcascade_frontalface_alt_tree.xml b/data/haarcascades/haarcascade_frontalface_alt_tree.xml index 4a27dcc..e0420a2 100644 --- a/data/haarcascades/haarcascade_frontalface_alt_tree.xml +++ b/data/haarcascades/haarcascade_frontalface_alt_tree.xml @@ -49,7 +49,6 @@ 20 20 - 0 406 0 diff --git a/data/haarcascades/haarcascade_frontalface_default.xml b/data/haarcascades/haarcascade_frontalface_default.xml index d744175..cbd1aa8 100644 --- a/data/haarcascades/haarcascade_frontalface_default.xml +++ b/data/haarcascades/haarcascade_frontalface_default.xml @@ -48,7 +48,6 @@ 24 24 - 0 211 0 diff --git a/data/haarcascades/haarcascade_fullbody.xml b/data/haarcascades/haarcascade_fullbody.xml index 922ac22..1f4e3a7 100644 --- a/data/haarcascades/haarcascade_fullbody.xml +++ b/data/haarcascades/haarcascade_fullbody.xml @@ -142,7 +142,6 @@ Thanks to Martin Spengler, ETH Zurich, for providing the demo movie. 14 28 - 0 107 0 diff --git a/data/haarcascades/haarcascade_lefteye_2splits.xml b/data/haarcascades/haarcascade_lefteye_2splits.xml index 61995f0..9a9ef58 100644 --- a/data/haarcascades/haarcascade_lefteye_2splits.xml +++ b/data/haarcascades/haarcascade_lefteye_2splits.xml @@ -49,7 +49,6 @@ 20 20 - 0 33 0 diff --git a/data/haarcascades/haarcascade_lowerbody.xml b/data/haarcascades/haarcascade_lowerbody.xml index a17a195..59949b0 100644 --- a/data/haarcascades/haarcascade_lowerbody.xml +++ b/data/haarcascades/haarcascade_lowerbody.xml @@ -142,7 +142,6 @@ Thanks to Martin Spengler, ETH Zurich, for providing the demo movie. 19 23 - 0 89 0 diff --git a/data/haarcascades/haarcascade_mcs_eyepair_big.xml b/data/haarcascades/haarcascade_mcs_eyepair_big.xml index 9ed9be2..48048c4 100644 --- a/data/haarcascades/haarcascade_mcs_eyepair_big.xml +++ b/data/haarcascades/haarcascade_mcs_eyepair_big.xml @@ -88,7 +88,6 @@ mcastrillon@iusiani.ulpgc.es 45 11 - 0 85 0 diff --git a/data/haarcascades/haarcascade_mcs_eyepair_small.xml b/data/haarcascades/haarcascade_mcs_eyepair_small.xml index 4095dbb..cfa4d02 100644 --- a/data/haarcascades/haarcascade_mcs_eyepair_small.xml +++ b/data/haarcascades/haarcascade_mcs_eyepair_small.xml @@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es 22 5 - 0 133 0 diff --git a/data/haarcascades/haarcascade_mcs_leftear.xml b/data/haarcascades/haarcascade_mcs_leftear.xml index 1040ba9..e20b95a 100644 --- a/data/haarcascades/haarcascade_mcs_leftear.xml +++ b/data/haarcascades/haarcascade_mcs_leftear.xml @@ -67,7 +67,6 @@ mcastrillon@iusiani.ulpgc.es 12 20 - 0 65 0 diff --git a/data/haarcascades/haarcascade_mcs_lefteye.xml b/data/haarcascades/haarcascade_mcs_lefteye.xml index 2edcb9b..00112cc 100644 --- a/data/haarcascades/haarcascade_mcs_lefteye.xml +++ b/data/haarcascades/haarcascade_mcs_lefteye.xml @@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es 18 12 - 0 279 0 diff --git a/data/haarcascades/haarcascade_mcs_mouth.xml b/data/haarcascades/haarcascade_mcs_mouth.xml index 98c7ac3..729aeb0 100644 --- a/data/haarcascades/haarcascade_mcs_mouth.xml +++ b/data/haarcascades/haarcascade_mcs_mouth.xml @@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es 25 15 - 0 218 0 diff --git a/data/haarcascades/haarcascade_mcs_nose.xml b/data/haarcascades/haarcascade_mcs_nose.xml index 5b05e6c..53a79f6 100644 --- a/data/haarcascades/haarcascade_mcs_nose.xml +++ b/data/haarcascades/haarcascade_mcs_nose.xml @@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es 18 15 - 0 377 0 diff --git a/data/haarcascades/haarcascade_mcs_rightear.xml b/data/haarcascades/haarcascade_mcs_rightear.xml index 2ea1820..ced31c4 100644 --- a/data/haarcascades/haarcascade_mcs_rightear.xml +++ b/data/haarcascades/haarcascade_mcs_rightear.xml @@ -67,7 +67,6 @@ mcastrillon@iusiani.ulpgc.es 12 20 - 0 61 0 diff --git a/data/haarcascades/haarcascade_mcs_righteye.xml b/data/haarcascades/haarcascade_mcs_righteye.xml index 2421436..5f04afc 100644 --- a/data/haarcascades/haarcascade_mcs_righteye.xml +++ b/data/haarcascades/haarcascade_mcs_righteye.xml @@ -87,7 +87,6 @@ mcastrillon@iusiani.ulpgc.es 18 12 - 0 415 0 diff --git a/data/haarcascades/haarcascade_mcs_upperbody.xml b/data/haarcascades/haarcascade_mcs_upperbody.xml index abd693c..f0f920c 100644 --- a/data/haarcascades/haarcascade_mcs_upperbody.xml +++ b/data/haarcascades/haarcascade_mcs_upperbody.xml @@ -85,7 +85,6 @@ mcastrillon@iusiani.ulpgc.es 22 20 - 0 334 0 diff --git a/data/haarcascades/haarcascade_profileface.xml b/data/haarcascades/haarcascade_profileface.xml index 29bbffa..486d8e3 100644 --- a/data/haarcascades/haarcascade_profileface.xml +++ b/data/haarcascades/haarcascade_profileface.xml @@ -48,7 +48,6 @@ 20 20 - 0 195 0 diff --git a/data/haarcascades/haarcascade_righteye_2splits.xml b/data/haarcascades/haarcascade_righteye_2splits.xml index 3ab910d..db4571c 100644 --- a/data/haarcascades/haarcascade_righteye_2splits.xml +++ b/data/haarcascades/haarcascade_righteye_2splits.xml @@ -49,7 +49,6 @@ 20 20 - 0 34 0 diff --git a/data/haarcascades/haarcascade_smile.xml b/data/haarcascades/haarcascade_smile.xml index 2777afa..b7df221 100644 --- a/data/haarcascades/haarcascade_smile.xml +++ b/data/haarcascades/haarcascade_smile.xml @@ -50,7 +50,6 @@ 36 18 - 0 53 0 diff --git a/data/haarcascades/haarcascade_upperbody.xml b/data/haarcascades/haarcascade_upperbody.xml index 2f00c6c..778687f 100644 --- a/data/haarcascades/haarcascade_upperbody.xml +++ b/data/haarcascades/haarcascade_upperbody.xml @@ -142,7 +142,6 @@ Thanks to Martin Spengler, ETH Zurich, for providing the demo movie. 22 18 - 0 152 0 diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 2b3e939..d363bfa 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -954,7 +954,7 @@ int CascadeClassifierImpl::runAt( Ptr& evaluator, Point pt, do if( !evaluator->setWindow(pt) ) return -1; - if( data.isStumpBased ) + if( data.isStumpBased() ) { if( data.featureType == FeatureEvaluator::HAAR ) return predictOrderedStump( *this, evaluator, weight ); @@ -1133,6 +1133,7 @@ bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processin bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize, int yStep, double factor, Size sumSize0 ) { + const int VECTOR_SIZE = 4; Ptr haar = featureEvaluator.dynamicCast(); if( haar.empty() ) return false; @@ -1142,7 +1143,7 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce if( cascadeKernel.empty() ) { cascadeKernel.create("runHaarClassifierStump", ocl::objdetect::haarobjectdetect_oclsrc, - format("-D MAX_FACES=%d", MAX_FACES)); + format("-D VECTOR_SIZE=%d", VECTOR_SIZE)); if( cascadeKernel.empty() ) return false; } @@ -1150,9 +1151,7 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce if( ustages.empty() ) { copyVectorToUMat(data.stages, ustages); - copyVectorToUMat(data.classifiers, uclassifiers); - copyVectorToUMat(data.nodes, unodes); - copyVectorToUMat(data.leaves, uleaves); + copyVectorToUMat(data.stumps, ustumps); } std::vector bufs; @@ -1162,7 +1161,7 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce Rect normrect = haar->getNormRect(); //processingRectSize = Size(yStep, yStep); - size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep }; + size_t globalsize[] = { (processingRectSize.width/yStep + VECTOR_SIZE-1)/VECTOR_SIZE, processingRectSize.height/yStep }; cascadeKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum @@ -1171,14 +1170,12 @@ bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size proce // cascade classifier (int)data.stages.size(), ocl::KernelArg::PtrReadOnly(ustages), - ocl::KernelArg::PtrReadOnly(uclassifiers), - ocl::KernelArg::PtrReadOnly(unodes), - ocl::KernelArg::PtrReadOnly(uleaves), + ocl::KernelArg::PtrReadOnly(ustumps), ocl::KernelArg::PtrWriteOnly(ufacepos), // positions processingRectSize, yStep, (float)factor, - normrect, data.origWinSize); + normrect, data.origWinSize, MAX_FACES); bool ok = cascadeKernel.run(2, globalsize, 0, true); //CV_Assert(ok); return ok; @@ -1243,7 +1240,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: bool use_ocl = ocl::useOpenCL() && getFeatureType() == FeatureEvaluator::HAAR && !isOldFormatCascade() && - data.isStumpBased && + data.isStumpBased() && maskGenerator.empty() && !outputRejectLevels && tryOpenCL; @@ -1345,7 +1342,6 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: Mat facepos = ufacepos.getMat(ACCESS_READ); const int* fptr = facepos.ptr(); int i, nfaces = fptr[0]; - printf("nfaces = %d\n", nfaces); for( i = 0; i < nfaces; i++ ) { candidates.push_back(Rect(fptr[i*4+1], fptr[i*4+2], fptr[i*4+3], fptr[i*4+4])); @@ -1428,6 +1424,12 @@ void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector 1 ) - isStumpBased = false; - + maxNodesPerTree = std::max(maxNodesPerTree, tree.nodeCount); + classifiers.push_back(tree); nodes.reserve(nodes.size() + tree.nodeCount); @@ -1536,6 +1538,24 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root) leaves.push_back((float)*internalNodesIter); } } + + if( isStumpBased() ) + { + int nodeOfs = 0, leafOfs = 0; + size_t nstages = stages.size(); + for( size_t stageIdx = 0; stageIdx < nstages; stageIdx++ ) + { + const Stage& stage = stages[stageIdx]; + + int ntrees = stage.ntrees; + for( int i = 0; i < ntrees; i++, nodeOfs++, leafOfs+= 2 ) + { + const DTreeNode& node = nodes[nodeOfs]; + stumps.push_back(Stump(node.featureIdx, node.threshold, + leaves[leafOfs], leaves[leafOfs+1])); + } + } + } return true; } @@ -1546,9 +1566,7 @@ bool CascadeClassifierImpl::read_(const FileNode& root) tryOpenCL = true; cascadeKernel = ocl::Kernel(); ustages.release(); - uclassifiers.release(); - unodes.release(); - uleaves.release(); + ustumps.release(); if( !data.read(root) ) return false; diff --git a/modules/objdetect/src/cascadedetect.hpp b/modules/objdetect/src/cascadedetect.hpp index bbe4f08..01f194c 100644 --- a/modules/objdetect/src/cascadedetect.hpp +++ b/modules/objdetect/src/cascadedetect.hpp @@ -48,7 +48,7 @@ public: Ptr getMaskGenerator(); protected: - enum { SUM_ALIGN = 16 }; + enum { SUM_ALIGN = 64 }; bool detectSingleScale( InputArray image, Size processingRectSize, int yStep, double factor, std::vector& candidates, @@ -109,14 +109,29 @@ protected: int ntrees; float threshold; }; + + struct Stump + { + Stump() {}; + Stump(int _featureIdx, float _threshold, float _left, float _right) + : featureIdx(_featureIdx), threshold(_threshold), left(_left), right(_right) {} + + int featureIdx; + float threshold; + float left; + float right; + }; + + Data(); bool read(const FileNode &node); - bool isStumpBased; + bool isStumpBased() const { return maxNodesPerTree == 1; } int stageType; int featureType; int ncategories; + int maxNodesPerTree; Size origWinSize; std::vector stages; @@ -124,6 +139,7 @@ protected: std::vector nodes; std::vector leaves; std::vector subsets; + std::vector stumps; }; Data data; @@ -132,7 +148,7 @@ protected: Ptr maskGenerator; UMat ugrayImage, uimageBuffer; - UMat ufacepos, ustages, uclassifiers, unodes, uleaves, usubsets; + UMat ufacepos, ustages, ustumps, usubsets; ocl::Kernel cascadeKernel; bool tryOpenCL; @@ -592,30 +608,36 @@ template inline int predictOrderedStump( CascadeClassifierImpl& cascade, Ptr &_featureEvaluator, double& sum ) { - int nodeOfs = 0, leafOfs = 0; + CV_Assert(!cascade.data.stumps.empty()); FEval& featureEvaluator = (FEval&)*_featureEvaluator; - float* cascadeLeaves = &cascade.data.leaves[0]; - CascadeClassifierImpl::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0]; - CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0]; + const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0]; + const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0]; int nstages = (int)cascade.data.stages.size(); + double tmp = 0; + for( int stageIdx = 0; stageIdx < nstages; stageIdx++ ) { - CascadeClassifierImpl::Data::Stage& stage = cascadeStages[stageIdx]; - sum = 0.0; + const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[stageIdx]; + tmp = 0; int ntrees = stage.ntrees; - for( int i = 0; i < ntrees; i++, nodeOfs++, leafOfs+= 2 ) + for( int i = 0; i < ntrees; i++ ) { - CascadeClassifierImpl::Data::DTreeNode& node = cascadeNodes[nodeOfs]; - double value = featureEvaluator(node.featureIdx); - sum += cascadeLeaves[ value < node.threshold ? leafOfs : leafOfs + 1 ]; + const CascadeClassifierImpl::Data::Stump& stump = cascadeStumps[i]; + double value = featureEvaluator(stump.featureIdx); + tmp += value < stump.threshold ? stump.left : stump.right; } - if( sum < stage.threshold ) + if( tmp < stage.threshold ) + { + sum = (double)tmp; return -stageIdx; + } + cascadeStumps += ntrees; } + sum = (double)tmp; return 1; } @@ -623,56 +645,44 @@ template inline int predictCategoricalStump( CascadeClassifierImpl& cascade, Ptr &_featureEvaluator, double& sum ) { + CV_Assert(!cascade.data.stumps.empty()); int nstages = (int)cascade.data.stages.size(); - int nodeOfs = 0, leafOfs = 0; FEval& featureEvaluator = (FEval&)*_featureEvaluator; size_t subsetSize = (cascade.data.ncategories + 31)/32; - int* cascadeSubsets = &cascade.data.subsets[0]; - float* cascadeLeaves = &cascade.data.leaves[0]; - CascadeClassifierImpl::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0]; - CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0]; + const int* cascadeSubsets = &cascade.data.subsets[0]; + const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0]; + const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0]; #ifdef HAVE_TEGRA_OPTIMIZATION float tmp = 0; // float accumulator -- float operations are quicker +#else + double tmp = 0; #endif for( int si = 0; si < nstages; si++ ) { - CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si]; + const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si]; int wi, ntrees = stage.ntrees; -#ifdef HAVE_TEGRA_OPTIMIZATION tmp = 0; -#else - sum = 0; -#endif for( wi = 0; wi < ntrees; wi++ ) { - CascadeClassifierImpl::Data::DTreeNode& node = cascadeNodes[nodeOfs]; - int c = featureEvaluator(node.featureIdx); - const int* subset = &cascadeSubsets[nodeOfs*subsetSize]; -#ifdef HAVE_TEGRA_OPTIMIZATION - tmp += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1]; -#else - sum += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1]; -#endif - nodeOfs++; - leafOfs += 2; + const CascadeClassifierImpl::Data::Stump& stump = cascadeStumps[wi]; + int c = featureEvaluator(stump.featureIdx); + const int* subset = &cascadeSubsets[wi*subsetSize]; + tmp += (subset[c>>5] & (1 << (c & 31))) ? stump.left : stump.right; } -#ifdef HAVE_TEGRA_OPTIMIZATION - if( tmp < stage.threshold ) { + + if( tmp < stage.threshold ) + { sum = (double)tmp; return -si; } -#else - if( sum < stage.threshold ) - return -si; -#endif + + cascadeStumps += ntrees; + cascadeSubsets += ntrees*subsetSize; } -#ifdef HAVE_TEGRA_OPTIMIZATION sum = (double)tmp; -#endif - return 1; } } diff --git a/modules/objdetect/src/cascadedetect_convert.cpp b/modules/objdetect/src/cascadedetect_convert.cpp index eed5815..9b4695c 100644 --- a/modules/objdetect/src/cascadedetect_convert.cpp +++ b/modules/objdetect/src/cascadedetect_convert.cpp @@ -209,7 +209,6 @@ static bool convert(const String& oldcascade, const String& newcascade) << "height" << cascadesize.width << "width" << cascadesize.height << "stageParams" << "{" - << "maxDepth" << maxdepth << "maxWeakCount" << (int)maxWeakCount << "}" << "featureParams" << "{" diff --git a/modules/objdetect/src/opencl/haarobjectdetect.cl b/modules/objdetect/src/opencl/haarobjectdetect.cl index 5e46474..581f023 100644 --- a/modules/objdetect/src/opencl/haarobjectdetect.cl +++ b/modules/objdetect/src/opencl/haarobjectdetect.cl @@ -1,43 +1,5 @@ -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Niko Li, newlife20080214@gmail.com -// Wang Weiyan, wangweiyanster@gmail.com -// Jia Haipeng, jiahaipeng95@gmail.com -// Nathan, liujun@multicorewareinc.com -// Peng Xiao, pengxiao@outlook.com -// Erping Pang, erping@multicorewareinc.com -// Vadim Pisarevsky, vadim.pisarevsky@itseez.com -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -// +///////////////////////////// OpenCL kernels for face detection ////////////////////////////// +////////////////////////////// see the opencv/doc/license.txt /////////////////////////////// typedef struct __attribute__((aligned(4))) OptFeature { @@ -46,20 +8,14 @@ typedef struct __attribute__((aligned(4))) OptFeature } OptFeature; -typedef struct __attribute__((aligned(4))) DTreeNode +typedef struct __attribute__((aligned(4))) Stump { int featureIdx __attribute__((aligned (4))); float threshold __attribute__((aligned (4))); // for ordered features only - int left __attribute__((aligned (4))); - int right __attribute__((aligned (4))); + float left __attribute__((aligned (4))); + float right __attribute__((aligned (4))); } -DTreeNode; - -typedef struct __attribute__((aligned (4))) DTree -{ - int nodeCount __attribute__((aligned (4))); -} -DTree; +Stump; typedef struct __attribute__((aligned (4))) Stage { @@ -78,25 +34,23 @@ __kernel void runHaarClassifierStump( int nstages, __global const Stage* stages, - __global const DTree* trees, - __global const DTreeNode* nodes, - __global const float* leaves, + __global const Stump* stumps, volatile __global int* facepos, int2 imgsize, int xyscale, float factor, - int4 normrect, int2 windowsize) + int4 normrect, int2 windowsize, int maxFaces) { - int ix = get_global_id(0)*xyscale; + int ix = get_global_id(0)*xyscale*VECTOR_SIZE; int iy = get_global_id(1)*xyscale; sumstep /= sizeof(int); sqsumstep /= sizeof(int); if( ix < imgsize.x && iy < imgsize.y ) { - int ntrees, nodeOfs = 0, leafOfs = 0; + int ntrees; int stageIdx, i; float s = 0.f; - __global const DTreeNode* node; + __global const Stump* stump = stumps; __global const OptFeature* f; __global const int* psum = sum + mad24(iy, sumstep, ix); @@ -107,19 +61,17 @@ __kernel void runHaarClassifierStump( pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea; float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea; float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f)); - float4 weight; - int4 ofs; + float4 weight, vsval; + int4 ofs, ofs0, ofs1, ofs2; nf = nf > 0 ? nf : 1.f; for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) { ntrees = stages[stageIdx].ntrees; s = 0.f; - for( i = 0; i < ntrees; i++, nodeOfs++, leafOfs += 2 ) + for( i = 0; i < ntrees; i++, stump++ ) { - node = nodes + nodeOfs; - f = optfeatures + node->featureIdx; - + f = optfeatures + stump->featureIdx; weight = f->weight; ofs = f->ofs[0]; @@ -131,7 +83,8 @@ __kernel void runHaarClassifierStump( ofs = f->ofs[2]; sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z; } - s += leaves[ sval < node->threshold*nf ? leafOfs : leafOfs + 1 ]; + + s += (sval < stump->threshold*nf) ? stump->left : stump->right; } if( s < stages[stageIdx].threshold ) @@ -142,7 +95,84 @@ __kernel void runHaarClassifierStump( { int nfaces = atomic_inc(facepos); //printf("detected face #d!!!!\n", nfaces); - if( nfaces < MAX_FACES ) + if( nfaces < maxFaces ) + { + volatile __global int* face = facepos + 1 + nfaces*4; + face[0] = convert_int_rte(ix*factor); + face[1] = convert_int_rte(iy*factor); + face[2] = convert_int_rte(windowsize.x*factor); + face[3] = convert_int_rte(windowsize.y*factor); + } + } + } +} + +#if 0 +__kernel void runLBPClassifierStump( + __global const int* sum, + int sumstep, int sumoffset, + __global const int* sqsum, + int sqsumstep, int sqsumoffset, + __global const OptFeature* optfeatures, + + int nstages, + __global const Stage* stages, + __global const Stump* stumps, + __global const int* bitsets, + int bitsetSize, + + volatile __global int* facepos, + int2 imgsize, int xyscale, float factor, + int4 normrect, int2 windowsize, int maxFaces) +{ + int ix = get_global_id(0)*xyscale*VECTOR_SIZE; + int iy = get_global_id(1)*xyscale; + sumstep /= sizeof(int); + sqsumstep /= sizeof(int); + + if( ix < imgsize.x && iy < imgsize.y ) + { + int ntrees; + int stageIdx, i; + float s = 0.f; + __global const Stump* stump = stumps; + __global const int* bitset = bitsets; + __global const OptFeature* f; + + __global const int* psum = sum + mad24(iy, sumstep, ix); + __global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x); + int normarea = normrect.z * normrect.w; + float invarea = 1.f/normarea; + float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] + + pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea; + float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea; + float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f)); + float4 weight; + int4 ofs; + nf = nf > 0 ? nf : 1.f; + + for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) + { + ntrees = stages[stageIdx].ntrees; + s = 0.f; + for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize ) + { + f = optfeatures + stump->featureIdx; + + weight = f->weight; + + // compute LBP feature to val + s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right; + } + + if( s < stages[stageIdx].threshold ) + break; + } + + if( stageIdx == nstages ) + { + int nfaces = atomic_inc(facepos); + if( nfaces < maxFaces ) { volatile __global int* face = facepos + 1 + nfaces*4; face[0] = convert_int_rte(ix*factor); @@ -153,3 +183,5 @@ __kernel void runHaarClassifierStump( } } } +#endif + -- 2.7.4