From: Valentina Kustikova Date: Tue, 8 Feb 2011 07:34:25 +0000 (+0000) Subject: Parallel version of Latent SVM. X-Git-Tag: accepted/2.0/20130307.220821~3535 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d03b89f1630e7078c4c6c3db129f59bc0e0cfbc5;p=profile%2Fivi%2Fopencv.git Parallel version of Latent SVM. --- diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 6fd834e..c5aff62 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -127,6 +127,7 @@ CV_INLINE IppiSize ippiSize(int width, int height) #include "tbb/tbb_stddef.h" #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 #include "tbb/tbb.h" + #include "tbb/task.h" #undef min #undef max #else diff --git a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp index 9afd46a..a1de0ee 100644 --- a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp @@ -247,7 +247,8 @@ CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector); // CvSeq* cvLatentSvmDetectObjects(const IplImage* image, // CvLatentSvmDetector* detector, // CvMemStorage* storage, -// float overlap_threshold = 0.5f); +// float overlap_threshold = 0.5f, +// int numThreads = -1); // INPUT // image - image to detect objects in // detector - Latent SVM detector in internal representation @@ -261,7 +262,8 @@ CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector); CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image, CvLatentSvmDetector* detector, CvMemStorage* storage, - float overlap_threshold CV_DEFAULT(0.5f)); + float overlap_threshold CV_DEFAULT(0.5f), + int numThreads CV_DEFAULT(-1)); #ifdef __cplusplus } diff --git a/modules/objdetect/src/_latentsvm.h b/modules/objdetect/src/_latentsvm.h index c3368c2..e9be3d4 100644 --- a/modules/objdetect/src/_latentsvm.h +++ b/modules/objdetect/src/_latentsvm.h @@ -248,7 +248,8 @@ int searchObjectThreshold(const CvLSVMFeaturePyramid *H, int maxXBorder, int maxYBorder, float scoreThreshold, CvPoint **points, int **levels, int *kPoints, - float **score, CvPoint ***partsDisplacement); + float **score, CvPoint ***partsDisplacement, + int numThreads CV_DEFAULT(-1)); /* // Computation root filters displacement and values of score function @@ -283,7 +284,7 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, int kComponents, const int *kPartFilters, const float *b, float scoreThreshold, CvPoint **points, CvPoint **oppPoints, - float **score, int *kPoints); + float **score, int *kPoints, int numThreads); /* // Compute opposite point for filter box diff --git a/modules/objdetect/src/_lsvm_error.h b/modules/objdetect/src/_lsvm_error.h index 1d6fb63..a3a10b2 100644 --- a/modules/objdetect/src/_lsvm_error.h +++ b/modules/objdetect/src/_lsvm_error.h @@ -10,8 +10,10 @@ #define LATENT_SVM_SEARCH_OBJECT_FAILED -5 #define LATENT_SVM_FAILED_SUPERPOSITION -6 #define FILTER_OUT_OF_BOUNDARIES -7 +#define LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED -8 +#define LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT -9 #define FFT_OK 2 -#define FFT_ERROR -8 -#define LSVM_PARSER_FILE_NOT_FOUND -9 +#define FFT_ERROR -10 +#define LSVM_PARSER_FILE_NOT_FOUND -11 #endif diff --git a/modules/objdetect/src/_lsvm_matching.h b/modules/objdetect/src/_lsvm_matching.h index 1b51fc5..5c8bb43 100644 --- a/modules/objdetect/src/_lsvm_matching.h +++ b/modules/objdetect/src/_lsvm_matching.h @@ -11,6 +11,10 @@ #include "_lsvm_fft.h" #include "_lsvm_routine.h" +#ifdef HAVE_TBB +#include "_lsvm_tbbversion.h" +#endif + //extern "C" { /* // Function for convolution computation @@ -352,6 +356,46 @@ int thresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, CvPoint **points, int **levels, int *kPoints, CvPoint ***partsDisplacement); +#ifdef HAVE_TBB +/* +// int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, + const CvLSVMFeaturePyramid *H, + const float b, + const int maxXBorder, const int maxYBorder, + const float scoreThreshold, + const int threadsNum, + float **score, + CvPoint **points, int **levels, int *kPoints, + CvPoint ***partsDisplacement); +// INPUT +// all_F - the set of filters (the first element is root filter, + the other - part filters) +// n - the number of part filters +// H - feature pyramid +// b - linear term of the score function +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// scoreThreshold - score threshold +// threadsNum - number of threads that will be created using TBB version +// OUTPUT +// score - score function values that exceed threshold +// points - the set of root filter positions (in the block space) +// levels - the set of levels +// kPoints - number of root filter positions +// partsDisplacement - displacement of part filters (in the block space) +// RESULT +// Error status +*/ +int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, + const CvLSVMFeaturePyramid *H, + const float b, + const int maxXBorder, const int maxYBorder, + const float scoreThreshold, + const int threadsNum, + float **score, + CvPoint **points, int **levels, int *kPoints, + CvPoint ***partsDisplacement); +#endif /* // Perform non-maximum suppression algorithm (described in original paper) diff --git a/modules/objdetect/src/_lsvm_tbbversion.h b/modules/objdetect/src/_lsvm_tbbversion.h new file mode 100644 index 0000000..1df210b --- /dev/null +++ b/modules/objdetect/src/_lsvm_tbbversion.h @@ -0,0 +1,52 @@ +#ifndef _LSVM_TBBVERSION_H +#define _LSVM_TBBVERSION_H + +#include "_lsvm_matching.h" +#include "precomp.hpp" + +/* +// Computation score function using TBB tasks +// +// API +// int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, + const CvLSVMFeaturePyramid *H, const float b, + const int maxXBorder, const int maxYBorder, + const float scoreThreshold, + int *kLevels, int **procLevels, + const int threadsNum, + float **score, CvPoint ***points, + int *kPoints, + CvPoint ****partsDisplacement); +// INPUT +// filters - the set of filters (the first element is root filter, + the other - part filters) +// n - the number of part filters +// H - feature pyramid +// b - linear term of the score function +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// scoreThreshold - score threshold +// kLevels - array that contains number of levels processed + by each thread +// procLevels - array that contains lists of levels processed + by each thread +// threadsNum - the number of created threads +// OUTPUT +// score - score function values that exceed threshold +// points - the set of root filter positions (in the block space) +// kPoints - number of root filter positions +// partsDisplacement - displacement of part filters (in the block space) +// RESULT +// +*/ +int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, + const CvLSVMFeaturePyramid *H, const float b, + const int maxXBorder, const int maxYBorder, + const float scoreThreshold, + int *kLevels, int **procLevels, + const int threadsNum, + float **score, CvPoint ***points, + int *kPoints, + CvPoint ****partsDisplacement); + +#endif \ No newline at end of file diff --git a/modules/objdetect/src/latentsvm.cpp b/modules/objdetect/src/latentsvm.cpp index 9f26edb..c6501fa 100644 --- a/modules/objdetect/src/latentsvm.cpp +++ b/modules/objdetect/src/latentsvm.cpp @@ -271,17 +271,30 @@ int searchObjectThreshold(const CvLSVMFeaturePyramid *H, int maxXBorder, int maxYBorder, float scoreThreshold, CvPoint **points, int **levels, int *kPoints, - float **score, CvPoint ***partsDisplacement) + float **score, CvPoint ***partsDisplacement, + int numThreads) { int opResult; // Matching +#ifdef HAVE_TBB + if (numThreads <= 0) + { + opResult = LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT; + return opResult; + } + opResult = tbbThresholdFunctionalScore(all_F, n, H, b, maxXBorder, maxYBorder, + scoreThreshold, numThreads, score, + points, levels, kPoints, + partsDisplacement); +#else opResult = thresholdFunctionalScore(all_F, n, H, b, maxXBorder, maxYBorder, scoreThreshold, score, points, levels, kPoints, partsDisplacement); +#endif if (opResult != LATENT_SVM_OK) { return LATENT_SVM_SEARCH_OBJECT_FAILED; @@ -537,7 +550,8 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, int kComponents, const int *kPartFilters, const float *b, float scoreThreshold, CvPoint **points, CvPoint **oppPoints, - float **score, int *kPoints) + float **score, int *kPoints, + int numThreads) { int error = 0; int i, j, s, f, componentIndex; @@ -561,10 +575,17 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, // For each component perform searching for (i = 0; i < kComponents; i++) { +#ifdef HAVE_TBB + searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i], + b[i], maxXBorder, maxYBorder, scoreThreshold, + &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), + &(scoreArr[i]), &(partsDisplacementArr[i]), numThreads); +#else searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i], b[i], maxXBorder, maxYBorder, scoreThreshold, &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), &(scoreArr[i]), &(partsDisplacementArr[i])); +#endif estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i], filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i])); componentIndex += (kPartFilters[i] + 1); diff --git a/modules/objdetect/src/latentsvmdetector.cpp b/modules/objdetect/src/latentsvmdetector.cpp index 34a1a49..28ebb76 100644 --- a/modules/objdetect/src/latentsvmdetector.cpp +++ b/modules/objdetect/src/latentsvmdetector.cpp @@ -69,7 +69,8 @@ void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector) // CvSeq* cvLatentSvmDetectObjects(const IplImage* image, // CvLatentSvmDetector* detector, // CvMemStorage* storage, -// float overlap_threshold = 0.5f); +// float overlap_threshold = 0.5f, + int numThreads = -1); // INPUT // image - image to detect objects in // detector - Latent SVM detector in internal representation @@ -82,7 +83,7 @@ void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector) CvSeq* cvLatentSvmDetectObjects(IplImage* image, CvLatentSvmDetector* detector, CvMemStorage* storage, - float overlap_threshold) + float overlap_threshold, int numThreads) { CvLSVMFeaturePyramid *H = 0; CvPoint *points = 0, *oppPoints = 0; @@ -103,7 +104,7 @@ CvSeq* cvLatentSvmDetectObjects(IplImage* image, // Search object searchObjectThresholdSomeComponents(H, (const CvLSVMFilterObject**)(detector->filters), detector->num_components, detector->num_part_filters, detector->b, detector->score_threshold, - &points, &oppPoints, &score, &kPoints); + &points, &oppPoints, &score, &kPoints, numThreads); // Clipping boxes clippingBoxes(image->width, image->height, points, kPoints); clippingBoxes(image->width, image->height, oppPoints, kPoints); diff --git a/modules/objdetect/src/lsvmtbbversion.cpp b/modules/objdetect/src/lsvmtbbversion.cpp new file mode 100644 index 0000000..a9e3cf5 --- /dev/null +++ b/modules/objdetect/src/lsvmtbbversion.cpp @@ -0,0 +1,121 @@ +#include "_lsvm_tbbversion.h" + + +#ifdef HAVE_TBB +/* +// Task class +*/ +class ScoreComputation : public tbb::task +{ +private: + const CvLSVMFilterObject **filters; + const int n; + const CvLSVMFeaturePyramid *H; + const float b; + const int maxXBorder; + const int maxYBorder; + const float scoreThreshold; + const int kLevels; + const int *procLevels; +public: + float **score; + CvPoint ***points; + CvPoint ****partsDisplacement; + int *kPoints; +public: + ScoreComputation(const CvLSVMFilterObject **_filters, int _n, + const CvLSVMFeaturePyramid *_H, + float _b, int _maxXBorder, int _maxYBorder, + float _scoreThreshold, int _kLevels, const int *_procLevels, + float **_score, CvPoint ***_points, int *_kPoints, + CvPoint ****_partsDisplacement) : + n(_n), b(_b), maxXBorder(_maxXBorder), + maxYBorder(_maxYBorder), scoreThreshold(_scoreThreshold), + kLevels(_kLevels), score(_score), points(_points), kPoints(_kPoints), + partsDisplacement(_partsDisplacement) + { + filters = _filters; + H = _H; + procLevels = _procLevels; + }; + + task* execute() + { + int i, level, partsLevel, res; + for (i = 0; i < kLevels; i++) + { + level = procLevels[i]; + partsLevel = level - H->lambda; + res = thresholdFunctionalScoreFixedLevel( + filters, n, H, level, b, + maxXBorder, maxYBorder, scoreThreshold, &(score[partsLevel]), + points[partsLevel], &(kPoints[partsLevel]), + partsDisplacement[partsLevel]); + if (res != LATENT_SVM_OK) + { + continue; + } + } + return NULL; + } +}; + +/* +// Computation score function using TBB tasks +// +// API +// int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, + const CvLSVMFeatureMap *H, const float b, + const int maxXBorder, const int maxYBorder, + const float scoreThreshold, + int *kLevels, int **procLevels, + const int threadsNum, + float **score, CvPoint ***points, + int *kPoints, + CvPoint ****partsDisplacement); +// INPUT +// filters - the set of filters (the first element is root filter, + the other - part filters) +// n - the number of part filters +// H - feature pyramid +// b - linear term of the score function +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// scoreThreshold - score threshold +// kLevels - array that contains number of levels processed + by each thread +// procLevels - array that contains lists of levels processed + by each thread +// threadsNum - the number of created threads +// OUTPUT +// score - score function values that exceed threshold +// points - the set of root filter positions (in the block space) +// kPoints - number of root filter positions +// partsDisplacement - displacement of part filters (in the block space) +// RESULT +// +*/ +int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, + const CvLSVMFeaturePyramid *H, const float b, + const int maxXBorder, const int maxYBorder, + const float scoreThreshold, + int *kLevels, int **procLevels, + const int threadsNum, + float **score, CvPoint ***points, + int *kPoints, + CvPoint ****partsDisplacement) +{ + tbb::task_list tasks; + int i; + for (i = 0; i < threadsNum; i++) + { + ScoreComputation& sc = + *new(tbb::task::allocate_root()) ScoreComputation(filters, n, H, b, + maxXBorder, maxYBorder, scoreThreshold, kLevels[i], procLevels[i], + score, points, kPoints, partsDisplacement); + tasks.push_back(sc); + } + tbb::task::spawn_root_and_wait(tasks); + return LATENT_SVM_OK; +}; +#endif \ No newline at end of file diff --git a/modules/objdetect/src/matching.cpp b/modules/objdetect/src/matching.cpp index a49144c..f04e6fc 100644 --- a/modules/objdetect/src/matching.cpp +++ b/modules/objdetect/src/matching.cpp @@ -24,42 +24,68 @@ int convolution(const CvLSVMFilterObject *Fi, const CvLSVMFeatureMap *map, float *f) { int n1, m1, n2, m2, p, size, diff1, diff2; - int i1, i2, j1, j2, k; - - n1 = map->sizeY; - m1 = map->sizeX; - n2 = Fi->sizeY; - m2 = Fi->sizeX; - p = map->p; - if (n1 < n2 || m1 < m2) - { - return FILTER_OUT_OF_BOUNDARIES; - } - - // Computation number of positions for the filter - diff1 = n1 - n2 + 1; - diff2 = m1 - m2 + 1; - size = diff1 * diff2; + int i1, i2, j1, j2, k; + float tmp_f1, tmp_f2, tmp_f3, tmp_f4; + float *pMap = NULL; + float *pH = NULL; + + n1 = map->sizeY; + m1 = map->sizeX; + n2 = Fi->sizeY; + m2 = Fi->sizeX; + p = map->p; + + diff1 = n1 - n2 + 1; + diff2 = m1 - m2 + 1; + size = diff1 * diff2; + for (j1 = diff2 - 1; j1 >= 0; j1--) + { + + for (i1 = diff1 - 1; i1 >= 0; i1--) + { + tmp_f1 = 0.0f; + tmp_f2 = 0.0f; + tmp_f3 = 0.0f; + tmp_f4 = 0.0f; + for (i2 = 0; i2 < n2; i2++) + { + for (j2 = 0; j2 < m2; j2++) + { + pMap = map->Map + (i1 + i2) * m1 * p + (j1 + j2) * p;//sm2 + pH = Fi->H + (i2 * m2 + j2) * p;//sm2 + for (k = 0; k < p/4; k++) + { - for (i1 = 0; i1 < diff1; i1++) - { - for (j1 = 0; j1 < diff2; j1++) - { - f[i1 * diff2 + j1] = 0.0; - for (i2 = 0; i2 < n2; i2++) - { - for (j2 = 0; j2 < m2; j2++) - { - for (k = 0; k < p; k++) - { - f[i1 * diff2 + j1] += map->Map[(i1 + i2) * m1 * p + - (j1 + j2) * p + k] * - Fi->H[(i2 * m2 + j2) * p + k]; - } - } - } - } - } + tmp_f1 += pMap[4*k]*pH[4*k];//sm2 + tmp_f2 += pMap[4*k+1]*pH[4*k+1]; + tmp_f3 += pMap[4*k+2]*pH[4*k+2]; + tmp_f4 += pMap[4*k+3]*pH[4*k+3]; + } + + if (p%4==1) + { + tmp_f1 += pH[p-1]*pMap[p-1]; + } + else + { + if (p%4==2) + { + tmp_f1 += pH[p-2]*pMap[p-2] + pH[p-1]*pMap[p-1]; + } + else + { + if (p%4==3) + { + tmp_f1 += pH[p-3]*pMap[p-3] + pH[p-2]*pMap[p-2] + pH[p-1]*pMap[p-1]; + } + } + } + + } + } + f[i1 * diff2 + j1] = tmp_f1 + tmp_f2 + tmp_f3 + tmp_f4;//sm1 + } + } return LATENT_SVM_OK; } @@ -1341,6 +1367,320 @@ int thresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, return LATENT_SVM_OK; } +/* +// Creating schedule of pyramid levels processing +// +// API +// int createSchedule(const featurePyramid *H, const filterObject **all_F, + const int n, const int bx, const int by, + const int threadsNum, int *kLevels, + int **processingLevels) +// INPUT +// H - feature pyramid +// all_F - the set of filters (the first element is root filter, + the other - part filters) +// n - the number of part filters +// bx - size of nullable border (X direction) +// by - size of nullable border (Y direction) +// threadsNum - number of threads that will be created in TBB version +// OUTPUT +// kLevels - array that contains number of levels processed + by each thread +// processingLevels - array that contains lists of levels processed + by each thread +// RESULT +// Error status +*/ +int createSchedule(const CvLSVMFeaturePyramid *H, const CvLSVMFilterObject **all_F, + const int n, const int bx, const int by, + const int threadsNum, int *kLevels, int **processingLevels) +{ + int rootFilterDim, sumPartFiltersDim, i, numLevels, dbx, dby, numDotProducts; + int averNumDotProd, j, minValue, argMin, tmp, lambda, maxValue, k; + int *dotProd, *weights, *disp; + if (H == NULL || all_F == NULL) + { + return LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED; + } + // Number of feature vectors in root filter + rootFilterDim = all_F[0]->sizeX * all_F[0]->sizeY; + // Number of feature vectors in all part filters + sumPartFiltersDim = 0; + for (i = 1; i <= n; i++) + { + sumPartFiltersDim += all_F[i]->sizeX * all_F[i]->sizeY; + } + // Number of levels which are used for computation of score function + numLevels = H->countLevel - H->lambda; + // Allocation memory for saving number of dot products that will be + // computed for each level of feature pyramid + dotProd = (int *)malloc(sizeof(int) * numLevels); + // Size of nullable border that's used in computing convolution + // of feature map with part filter + dbx = 2 * bx; + dby = 2 * by; + // Total number of dot products for all levels + numDotProducts = 0; + lambda = H->lambda; + for (i = 0; i < numLevels; i++) + { + dotProd[i] = H->pyramid[i + lambda]->sizeX * + H->pyramid[i + lambda]->sizeY * rootFilterDim + + (H->pyramid[i]->sizeX + dbx) * + (H->pyramid[i]->sizeY + dby) * sumPartFiltersDim; + numDotProducts += dotProd[i]; + } + // Average number of dot products that would be performed at the best + averNumDotProd = numDotProducts / threadsNum; + // Allocation memory for saving dot product number performed by each thread + weights = (int *)malloc(sizeof(int) * threadsNum); + // Allocation memory for saving dispertion + disp = (int *)malloc(sizeof(int) * threadsNum); + // At the first step we think of first threadsNum levels will be processed + // by different threads + for (i = 0; i < threadsNum; i++) + { + kLevels[i] = 1; + weights[i] = dotProd[i]; + disp[i] = 0; + } + // Computation number of levels that will be processed by each thread + for (i = threadsNum; i < numLevels; i++) + { + // Search number of thread that will process level number i + for (j = 0; j < threadsNum; j++) + { + weights[j] += dotProd[i]; + minValue = weights[0]; + maxValue = weights[0]; + for (k = 1; k < threadsNum; k++) + { + minValue = min(minValue, weights[k]); + maxValue = max(maxValue, weights[k]); + } + disp[j] = maxValue - minValue; + weights[j] -= dotProd[i]; + } + minValue = disp[0]; + argMin = 0; + for (j = 1; j < threadsNum; j++) + { + if (disp[j] < minValue) + { + minValue = disp[j]; + argMin = j; + } + } + // Addition new level + kLevels[argMin]++; + weights[argMin] += dotProd[i]; + } + for (i = 0; i < threadsNum; i++) + { + // Allocation memory for saving list of levels for each level + processingLevels[i] = (int *)malloc(sizeof(int) * kLevels[i]); + // At the first step we think of first threadsNum levels will be processed + // by different threads + processingLevels[i][0] = lambda + i; + kLevels[i] = 1; + weights[i] = dotProd[i]; + } + // Creating list of levels + for (i = threadsNum; i < numLevels; i++) + { + for (j = 0; j < threadsNum; j++) + { + weights[j] += dotProd[i]; + minValue = weights[0]; + maxValue = weights[0]; + for (k = 1; k < threadsNum; k++) + { + minValue = min(minValue, weights[k]); + maxValue = max(maxValue, weights[k]); + } + disp[j] = maxValue - minValue; + weights[j] -= dotProd[i]; + } + minValue = disp[0]; + argMin = 0; + for (j = 1; j < threadsNum; j++) + { + if (disp[j] < minValue) + { + minValue = disp[j]; + argMin = j; + } + } + processingLevels[argMin][kLevels[argMin]] = lambda + i; + kLevels[argMin]++; + weights[argMin] += dotProd[i]; + } + // Release allocated memory + free(weights); + free(dotProd); + free(disp); + return LATENT_SVM_OK; +} + +#ifdef HAVE_TBB +/* +// int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, + const CvLSVMFeaturePyramid *H, + const float b, + const int maxXBorder, const int maxYBorder, + const float scoreThreshold, + const int threadsNum, + float **score, + CvPoint **points, int **levels, int *kPoints, + CvPoint ***partsDisplacement); +// INPUT +// all_F - the set of filters (the first element is root filter, + the other - part filters) +// n - the number of part filters +// H - feature pyramid +// b - linear term of the score function +// maxXBorder - the largest root filter size (X-direction) +// maxYBorder - the largest root filter size (Y-direction) +// scoreThreshold - score threshold +// threadsNum - number of threads that will be created using TBB version +// OUTPUT +// score - score function values that exceed threshold +// points - the set of root filter positions (in the block space) +// levels - the set of levels +// kPoints - number of root filter positions +// partsDisplacement - displacement of part filters (in the block space) +// RESULT +// Error status +*/ +int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, + const CvLSVMFeaturePyramid *H, + const float b, + const int maxXBorder, const int maxYBorder, + const float scoreThreshold, + const int threadsNum, + float **score, + CvPoint **points, int **levels, int *kPoints, + CvPoint ***partsDisplacement) +{ + int i, j, s, f, level, numLevels; + float **tmpScore; + CvPoint ***tmpPoints; + CvPoint ****tmpPartsDisplacement; + int *tmpKPoints; + int res; + + int *kLevels, **procLevels; + int bx, by; + + // Computation the number of levels for seaching object, + // first lambda-levels are used for computation values + // of score function for each position of root filter + numLevels = H->countLevel - H->lambda; + + kLevels = (int *)malloc(sizeof(int) * threadsNum); + procLevels = (int **)malloc(sizeof(int*) * threadsNum); + computeBorderSize(maxXBorder, maxYBorder, &bx, &by); + res = createSchedule(H, all_F, n, bx, by, threadsNum, kLevels, procLevels); + if (res != LATENT_SVM_OK) + { + for (i = 0; i < threadsNum; i++) + { + if (procLevels[i] != NULL) + { + free(procLevels[i]); + } + } + free(procLevels); + free(kLevels); + return res; + } + + // Allocation memory for values of score function for each level + // that exceed threshold + tmpScore = (float **)malloc(sizeof(float*) * numLevels); + // Allocation memory for the set of points that corresponds + // to the maximum of score function + tmpPoints = (CvPoint ***)malloc(sizeof(CvPoint **) * numLevels); + for (i = 0; i < numLevels; i++) + { + tmpPoints[i] = (CvPoint **)malloc(sizeof(CvPoint *)); + } + // Allocation memory for memory for saving parts displacement on each level + tmpPartsDisplacement = (CvPoint ****)malloc(sizeof(CvPoint ***) * numLevels); + for (i = 0; i < numLevels; i++) + { + tmpPartsDisplacement[i] = (CvPoint ***)malloc(sizeof(CvPoint **)); + } + // Number of points that corresponds to the maximum + // of score function on each level + tmpKPoints = (int *)malloc(sizeof(int) * numLevels); + for (i = 0; i < numLevels; i++) + { + tmpKPoints[i] = 0; + } + + // Computation maxima of score function on each level + // and getting the maximum on all levels using TBB tasks + tbbTasksThresholdFunctionalScore(all_F, n, H, b, maxXBorder, maxYBorder, + scoreThreshold, kLevels, procLevels, + threadsNum, tmpScore, tmpPoints, + tmpKPoints, tmpPartsDisplacement); + (*kPoints) = 0; + for (i = 0; i < numLevels; i++) + { + (*kPoints) += tmpKPoints[i]; + } + + // Allocation memory for levels + (*levels) = (int *)malloc(sizeof(int) * (*kPoints)); + // Allocation memory for the set of points + (*points) = (CvPoint *)malloc(sizeof(CvPoint) * (*kPoints)); + // Allocation memory for parts displacement + (*partsDisplacement) = (CvPoint **)malloc(sizeof(CvPoint *) * (*kPoints)); + // Allocation memory for score function values + (*score) = (float *)malloc(sizeof(float) * (*kPoints)); + + // Filling the set of points, levels and parts displacement + s = 0; + f = 0; + for (i = 0; i < numLevels; i++) + { + // Computation the number of level + level = i + H->lambda; + + // Addition a set of points + f += tmpKPoints[i]; + for (j = s; j < f; j++) + { + (*levels)[j] = level; + (*points)[j] = (*tmpPoints[i])[j - s]; + (*score)[j] = tmpScore[i][j - s]; + (*partsDisplacement)[j] = (*(tmpPartsDisplacement[i]))[j - s]; + } + s = f; + } + + // Release allocated memory + for (i = 0; i < numLevels; i++) + { + free(tmpPoints[i]); + free(tmpPartsDisplacement[i]); + } + for (i = 0; i < threadsNum; i++) + { + free(procLevels[i]); + } + free(procLevels); + free(kLevels); + free(tmpPoints); + free(tmpScore); + free(tmpKPoints); + free(tmpPartsDisplacement); + + return LATENT_SVM_OK; +} +#endif + void sort(int n, const float* x, int* indices) { int i, j; diff --git a/samples/c/latentsvmdetect.cpp b/samples/c/latentsvmdetect.cpp index ee1dab6..9f0ef9c 100644 --- a/samples/c/latentsvmdetect.cpp +++ b/samples/c/latentsvmdetect.cpp @@ -2,6 +2,13 @@ #include "opencv2/highgui/highgui.hpp" #include +#ifdef HAVE_CONFIG_H +#include +#endif +#ifdef HAVE_TBB +#include "tbb/task_scheduler_init.h" +#endif + using namespace cv; void help() @@ -9,26 +16,43 @@ void help() printf( "This program demonstrated the use of the latentSVM detector.\n" "It reads in a trained object model and then uses that to detect the object in an image\n" "Call:\n" - "./latentsvmdetect [ []]\n" " The defaults for image_filename and model_filename are cat.jpg and cat.xml respectively\n" " Press any key to quit.\n"); } const char* model_filename = "cat.xml"; const char* image_filename = "cat.jpg"; +int tbbNumThreads = -1; -void detect_and_draw_objects( IplImage* image, CvLatentSvmDetector* detector) +void detect_and_draw_objects( IplImage* image, CvLatentSvmDetector* detector, int numThreads = -1) { CvMemStorage* storage = cvCreateMemStorage(0); CvSeq* detections = 0; int i = 0; int64 start = 0, finish = 0; +#ifdef HAVE_TBB + tbb::task_scheduler_init init(tbb::task_scheduler_init::deferred); + if (numThreads > 0) + { + init.initialize(numThreads); + printf("Number of threads %i\n", numThreads); + } + else + { + printf("Number of threads is not correct for TBB version"); + return; + } +#endif start = cvGetTickCount(); - detections = cvLatentSvmDetectObjects(image, detector, storage); + detections = cvLatentSvmDetectObjects(image, detector, storage, 0.5f, numThreads); finish = cvGetTickCount(); printf("detection time = %.3f\n", (float)(finish - start) / (float)(cvGetTickFrequency() * 1000000.0)); +#ifdef HAVE_TBB + init.terminate(); +#endif for( i = 0; i < detections->total; i++ ) { CvObjectDetection detection = *(CvObjectDetection*)cvGetSeqElem( detections, i ); @@ -48,6 +72,10 @@ int main(int argc, char* argv[]) { image_filename = argv[1]; model_filename = argv[2]; + if (argc > 3) + { + tbbNumThreads = atoi(argv[3]); + } } IplImage* image = cvLoadImage(image_filename); if (!image) @@ -64,7 +92,7 @@ int main(int argc, char* argv[]) cvReleaseImage( &image ); return -1; } - detect_and_draw_objects( image, detector ); + detect_and_draw_objects( image, detector, tbbNumThreads ); cvNamedWindow( "test", 0 ); cvShowImage( "test", image ); cvWaitKey(0);