#include "tbb/tbb_stddef.h"
#if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
#include "tbb/tbb.h"
+ #include "tbb/task.h"
#undef min
#undef max
#else
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
// CvLatentSvmDetector* detector,
// CvMemStorage* storage,
-// float overlap_threshold = 0.5f);
+// float overlap_threshold = 0.5f,
+// int numThreads = -1);
// INPUT
// image - image to detect objects in
// detector - Latent SVM detector in internal representation
CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
CvLatentSvmDetector* detector,
CvMemStorage* storage,
- float overlap_threshold CV_DEFAULT(0.5f));
+ float overlap_threshold CV_DEFAULT(0.5f),
+ int numThreads CV_DEFAULT(-1));
#ifdef __cplusplus
}
int maxXBorder, int maxYBorder, \r
float scoreThreshold,\r
CvPoint **points, int **levels, int *kPoints, \r
- float **score, CvPoint ***partsDisplacement);\r
+ float **score, CvPoint ***partsDisplacement,\r
+ int numThreads CV_DEFAULT(-1));\r
\r
/*\r
// Computation root filters displacement and values of score function\r
int kComponents, const int *kPartFilters,\r
const float *b, float scoreThreshold,\r
CvPoint **points, CvPoint **oppPoints,\r
- float **score, int *kPoints);\r
+ float **score, int *kPoints, int numThreads);\r
\r
/*\r
// Compute opposite point for filter box\r
#define LATENT_SVM_SEARCH_OBJECT_FAILED -5
#define LATENT_SVM_FAILED_SUPERPOSITION -6
#define FILTER_OUT_OF_BOUNDARIES -7
+#define LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED -8
+#define LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT -9
#define FFT_OK 2
-#define FFT_ERROR -8
-#define LSVM_PARSER_FILE_NOT_FOUND -9
+#define FFT_ERROR -10
+#define LSVM_PARSER_FILE_NOT_FOUND -11
#endif
#include "_lsvm_fft.h"
#include "_lsvm_routine.h"
+#ifdef HAVE_TBB
+#include "_lsvm_tbbversion.h"
+#endif
+
//extern "C" {
/*
// Function for convolution computation
CvPoint **points, int **levels, int *kPoints,
CvPoint ***partsDisplacement);
+#ifdef HAVE_TBB
+/*
+// int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n,
+ const CvLSVMFeaturePyramid *H,
+ const float b,
+ const int maxXBorder, const int maxYBorder,
+ const float scoreThreshold,
+ const int threadsNum,
+ float **score,
+ CvPoint **points, int **levels, int *kPoints,
+ CvPoint ***partsDisplacement);
+// INPUT
+// all_F - the set of filters (the first element is root filter,
+ the other - part filters)
+// n - the number of part filters
+// H - feature pyramid
+// b - linear term of the score function
+// maxXBorder - the largest root filter size (X-direction)
+// maxYBorder - the largest root filter size (Y-direction)
+// scoreThreshold - score threshold
+// threadsNum - number of threads that will be created using TBB version
+// OUTPUT
+// score - score function values that exceed threshold
+// points - the set of root filter positions (in the block space)
+// levels - the set of levels
+// kPoints - number of root filter positions
+// partsDisplacement - displacement of part filters (in the block space)
+// RESULT
+// Error status
+*/
+int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n,
+ const CvLSVMFeaturePyramid *H,
+ const float b,
+ const int maxXBorder, const int maxYBorder,
+ const float scoreThreshold,
+ const int threadsNum,
+ float **score,
+ CvPoint **points, int **levels, int *kPoints,
+ CvPoint ***partsDisplacement);
+#endif
/*
// Perform non-maximum suppression algorithm (described in original paper)
--- /dev/null
+#ifndef _LSVM_TBBVERSION_H\r
+#define _LSVM_TBBVERSION_H\r
+\r
+#include "_lsvm_matching.h"\r
+#include "precomp.hpp"\r
+\r
+/*\r
+// Computation score function using TBB tasks\r
+//\r
+// API\r
+// int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, \r
+ const CvLSVMFeaturePyramid *H, const float b,\r
+ const int maxXBorder, const int maxYBorder,\r
+ const float scoreThreshold,\r
+ int *kLevels, int **procLevels,\r
+ const int threadsNum,\r
+ float **score, CvPoint ***points, \r
+ int *kPoints,\r
+ CvPoint ****partsDisplacement);\r
+// INPUT\r
+// filters - the set of filters (the first element is root filter, \r
+ the other - part filters)\r
+// n - the number of part filters\r
+// H - feature pyramid\r
+// b - linear term of the score function\r
+// maxXBorder - the largest root filter size (X-direction)\r
+// maxYBorder - the largest root filter size (Y-direction)\r
+// scoreThreshold - score threshold\r
+// kLevels - array that contains number of levels processed \r
+ by each thread\r
+// procLevels - array that contains lists of levels processed \r
+ by each thread\r
+// threadsNum - the number of created threads\r
+// OUTPUT\r
+// score - score function values that exceed threshold\r
+// points - the set of root filter positions (in the block space)\r
+// kPoints - number of root filter positions\r
+// partsDisplacement - displacement of part filters (in the block space)\r
+// RESULT\r
+//\r
+*/\r
+int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, \r
+ const CvLSVMFeaturePyramid *H, const float b,\r
+ const int maxXBorder, const int maxYBorder,\r
+ const float scoreThreshold,\r
+ int *kLevels, int **procLevels,\r
+ const int threadsNum,\r
+ float **score, CvPoint ***points, \r
+ int *kPoints,\r
+ CvPoint ****partsDisplacement);\r
+\r
+#endif
\ No newline at end of file
int maxXBorder, int maxYBorder, \r
float scoreThreshold,\r
CvPoint **points, int **levels, int *kPoints, \r
- float **score, CvPoint ***partsDisplacement)\r
+ float **score, CvPoint ***partsDisplacement,\r
+ int numThreads)\r
{\r
int opResult;\r
\r
\r
// Matching\r
+#ifdef HAVE_TBB\r
+ if (numThreads <= 0)\r
+ {\r
+ opResult = LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT;\r
+ return opResult;\r
+ }\r
+ opResult = tbbThresholdFunctionalScore(all_F, n, H, b, maxXBorder, maxYBorder,\r
+ scoreThreshold, numThreads, score, \r
+ points, levels, kPoints, \r
+ partsDisplacement);\r
+#else\r
opResult = thresholdFunctionalScore(all_F, n, H, b, \r
maxXBorder, maxYBorder, \r
scoreThreshold, \r
score, points, levels, \r
kPoints, partsDisplacement);\r
+#endif\r
if (opResult != LATENT_SVM_OK)\r
{\r
return LATENT_SVM_SEARCH_OBJECT_FAILED;\r
int kComponents, const int *kPartFilters,\r
const float *b, float scoreThreshold,\r
CvPoint **points, CvPoint **oppPoints,\r
- float **score, int *kPoints)\r
+ float **score, int *kPoints,\r
+ int numThreads)\r
{\r
int error = 0;\r
int i, j, s, f, componentIndex;\r
// For each component perform searching\r
for (i = 0; i < kComponents; i++)\r
{\r
+#ifdef HAVE_TBB\r
+ searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],\r
+ b[i], maxXBorder, maxYBorder, scoreThreshold,\r
+ &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), \r
+ &(scoreArr[i]), &(partsDisplacementArr[i]), numThreads);\r
+#else\r
searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],\r
b[i], maxXBorder, maxYBorder, scoreThreshold, \r
&(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), \r
&(scoreArr[i]), &(partsDisplacementArr[i]));\r
+#endif\r
estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i], \r
filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i])); \r
componentIndex += (kPartFilters[i] + 1);\r
// CvSeq* cvLatentSvmDetectObjects(const IplImage* image, \r
// CvLatentSvmDetector* detector, \r
// CvMemStorage* storage, \r
-// float overlap_threshold = 0.5f);\r
+// float overlap_threshold = 0.5f,\r
+ int numThreads = -1);\r
// INPUT\r
// image - image to detect objects in\r
// detector - Latent SVM detector in internal representation\r
CvSeq* cvLatentSvmDetectObjects(IplImage* image, \r
CvLatentSvmDetector* detector, \r
CvMemStorage* storage, \r
- float overlap_threshold)\r
+ float overlap_threshold, int numThreads)\r
{\r
CvLSVMFeaturePyramid *H = 0;\r
CvPoint *points = 0, *oppPoints = 0;\r
// Search object\r
searchObjectThresholdSomeComponents(H, (const CvLSVMFilterObject**)(detector->filters), detector->num_components, \r
detector->num_part_filters, detector->b, detector->score_threshold, \r
- &points, &oppPoints, &score, &kPoints);\r
+ &points, &oppPoints, &score, &kPoints, numThreads);\r
// Clipping boxes\r
clippingBoxes(image->width, image->height, points, kPoints);\r
clippingBoxes(image->width, image->height, oppPoints, kPoints);\r
--- /dev/null
+#include "_lsvm_tbbversion.h"\r
+\r
+\r
+#ifdef HAVE_TBB\r
+/*\r
+// Task class\r
+*/\r
+class ScoreComputation : public tbb::task\r
+{\r
+private:\r
+ const CvLSVMFilterObject **filters;\r
+ const int n;\r
+ const CvLSVMFeaturePyramid *H;\r
+ const float b;\r
+ const int maxXBorder;\r
+ const int maxYBorder;\r
+ const float scoreThreshold;\r
+ const int kLevels;\r
+ const int *procLevels;\r
+public:\r
+ float **score;\r
+ CvPoint ***points;\r
+ CvPoint ****partsDisplacement;\r
+ int *kPoints;\r
+public:\r
+ ScoreComputation(const CvLSVMFilterObject **_filters, int _n, \r
+ const CvLSVMFeaturePyramid *_H,\r
+ float _b, int _maxXBorder, int _maxYBorder,\r
+ float _scoreThreshold, int _kLevels, const int *_procLevels,\r
+ float **_score, CvPoint ***_points, int *_kPoints,\r
+ CvPoint ****_partsDisplacement) :\r
+ n(_n), b(_b), maxXBorder(_maxXBorder), \r
+ maxYBorder(_maxYBorder), scoreThreshold(_scoreThreshold),\r
+ kLevels(_kLevels), score(_score), points(_points), kPoints(_kPoints),\r
+ partsDisplacement(_partsDisplacement)\r
+ {\r
+ filters = _filters;\r
+ H = _H;\r
+ procLevels = _procLevels;\r
+ };\r
+\r
+ task* execute()\r
+ {\r
+ int i, level, partsLevel, res;\r
+ for (i = 0; i < kLevels; i++)\r
+ {\r
+ level = procLevels[i];\r
+ partsLevel = level - H->lambda;\r
+ res = thresholdFunctionalScoreFixedLevel(\r
+ filters, n, H, level, b,\r
+ maxXBorder, maxYBorder, scoreThreshold, &(score[partsLevel]), \r
+ points[partsLevel], &(kPoints[partsLevel]), \r
+ partsDisplacement[partsLevel]);\r
+ if (res != LATENT_SVM_OK)\r
+ {\r
+ continue;\r
+ }\r
+ }\r
+ return NULL;\r
+ }\r
+};\r
+\r
+/*\r
+// Computation score function using TBB tasks\r
+//\r
+// API\r
+// int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, \r
+ const CvLSVMFeatureMap *H, const float b,\r
+ const int maxXBorder, const int maxYBorder,\r
+ const float scoreThreshold,\r
+ int *kLevels, int **procLevels,\r
+ const int threadsNum,\r
+ float **score, CvPoint ***points, \r
+ int *kPoints,\r
+ CvPoint ****partsDisplacement);\r
+// INPUT\r
+// filters - the set of filters (the first element is root filter, \r
+ the other - part filters)\r
+// n - the number of part filters\r
+// H - feature pyramid\r
+// b - linear term of the score function\r
+// maxXBorder - the largest root filter size (X-direction)\r
+// maxYBorder - the largest root filter size (Y-direction)\r
+// scoreThreshold - score threshold\r
+// kLevels - array that contains number of levels processed \r
+ by each thread\r
+// procLevels - array that contains lists of levels processed \r
+ by each thread\r
+// threadsNum - the number of created threads\r
+// OUTPUT\r
+// score - score function values that exceed threshold\r
+// points - the set of root filter positions (in the block space)\r
+// kPoints - number of root filter positions\r
+// partsDisplacement - displacement of part filters (in the block space)\r
+// RESULT\r
+//\r
+*/\r
+int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, \r
+ const CvLSVMFeaturePyramid *H, const float b,\r
+ const int maxXBorder, const int maxYBorder,\r
+ const float scoreThreshold,\r
+ int *kLevels, int **procLevels,\r
+ const int threadsNum,\r
+ float **score, CvPoint ***points, \r
+ int *kPoints,\r
+ CvPoint ****partsDisplacement)\r
+{\r
+ tbb::task_list tasks;\r
+ int i;\r
+ for (i = 0; i < threadsNum; i++)\r
+ {\r
+ ScoreComputation& sc = \r
+ *new(tbb::task::allocate_root()) ScoreComputation(filters, n, H, b,\r
+ maxXBorder, maxYBorder, scoreThreshold, kLevels[i], procLevels[i], \r
+ score, points, kPoints, partsDisplacement);\r
+ tasks.push_back(sc);\r
+ }\r
+ tbb::task::spawn_root_and_wait(tasks);\r
+ return LATENT_SVM_OK;\r
+};\r
+#endif
\ No newline at end of file
int convolution(const CvLSVMFilterObject *Fi, const CvLSVMFeatureMap *map, float *f)\r
{\r
int n1, m1, n2, m2, p, size, diff1, diff2;\r
- int i1, i2, j1, j2, k; \r
- \r
- n1 = map->sizeY;\r
- m1 = map->sizeX;\r
- n2 = Fi->sizeY;\r
- m2 = Fi->sizeX;\r
- p = map->p;\r
- if (n1 < n2 || m1 < m2)\r
- {\r
- return FILTER_OUT_OF_BOUNDARIES;\r
- }\r
-\r
- // Computation number of positions for the filter\r
- diff1 = n1 - n2 + 1;\r
- diff2 = m1 - m2 + 1;\r
- size = diff1 * diff2;\r
+ int i1, i2, j1, j2, k;\r
+ float tmp_f1, tmp_f2, tmp_f3, tmp_f4;\r
+ float *pMap = NULL;\r
+ float *pH = NULL;\r
+ \r
+ n1 = map->sizeY;\r
+ m1 = map->sizeX;\r
+ n2 = Fi->sizeY;\r
+ m2 = Fi->sizeX;\r
+ p = map->p;\r
+\r
+ diff1 = n1 - n2 + 1;\r
+ diff2 = m1 - m2 + 1;\r
+ size = diff1 * diff2;\r
+ for (j1 = diff2 - 1; j1 >= 0; j1--)\r
+ {\r
+ \r
+ for (i1 = diff1 - 1; i1 >= 0; i1--)\r
+ {\r
+ tmp_f1 = 0.0f;\r
+ tmp_f2 = 0.0f;\r
+ tmp_f3 = 0.0f;\r
+ tmp_f4 = 0.0f;\r
+ for (i2 = 0; i2 < n2; i2++)\r
+ {\r
+ for (j2 = 0; j2 < m2; j2++)\r
+ {\r
+ pMap = map->Map + (i1 + i2) * m1 * p + (j1 + j2) * p;//sm2\r
+ pH = Fi->H + (i2 * m2 + j2) * p;//sm2\r
+ for (k = 0; k < p/4; k++)\r
+ {\r
\r
- for (i1 = 0; i1 < diff1; i1++)\r
- {\r
- for (j1 = 0; j1 < diff2; j1++)\r
- {\r
- f[i1 * diff2 + j1] = 0.0;\r
- for (i2 = 0; i2 < n2; i2++)\r
- {\r
- for (j2 = 0; j2 < m2; j2++)\r
- {\r
- for (k = 0; k < p; k++)\r
- {\r
- f[i1 * diff2 + j1] += map->Map[(i1 + i2) * m1 * p + \r
- (j1 + j2) * p + k] * \r
- Fi->H[(i2 * m2 + j2) * p + k];\r
- }\r
- }\r
- }\r
- }\r
- }\r
+ tmp_f1 += pMap[4*k]*pH[4*k];//sm2\r
+ tmp_f2 += pMap[4*k+1]*pH[4*k+1];\r
+ tmp_f3 += pMap[4*k+2]*pH[4*k+2];\r
+ tmp_f4 += pMap[4*k+3]*pH[4*k+3];\r
+ }\r
+ \r
+ if (p%4==1)\r
+ {\r
+ tmp_f1 += pH[p-1]*pMap[p-1];\r
+ }\r
+ else\r
+ {\r
+ if (p%4==2)\r
+ {\r
+ tmp_f1 += pH[p-2]*pMap[p-2] + pH[p-1]*pMap[p-1];\r
+ }\r
+ else \r
+ {\r
+ if (p%4==3)\r
+ {\r
+ tmp_f1 += pH[p-3]*pMap[p-3] + pH[p-2]*pMap[p-2] + pH[p-1]*pMap[p-1];\r
+ }\r
+ }\r
+ }\r
+ \r
+ }\r
+ }\r
+ f[i1 * diff2 + j1] = tmp_f1 + tmp_f2 + tmp_f3 + tmp_f4;//sm1\r
+ }\r
+ }\r
return LATENT_SVM_OK;\r
}\r
\r
return LATENT_SVM_OK; \r
}\r
\r
+/*\r
+// Creating schedule of pyramid levels processing \r
+//\r
+// API\r
+// int createSchedule(const featurePyramid *H, const filterObject **all_F,\r
+ const int n, const int bx, const int by,\r
+ const int threadsNum, int *kLevels, \r
+ int **processingLevels)\r
+// INPUT\r
+// H - feature pyramid\r
+// all_F - the set of filters (the first element is root filter, \r
+ the other - part filters)\r
+// n - the number of part filters\r
+// bx - size of nullable border (X direction)\r
+// by - size of nullable border (Y direction)\r
+// threadsNum - number of threads that will be created in TBB version\r
+// OUTPUT\r
+// kLevels - array that contains number of levels processed \r
+ by each thread\r
+// processingLevels - array that contains lists of levels processed \r
+ by each thread\r
+// RESULT\r
+// Error status\r
+*/\r
+int createSchedule(const CvLSVMFeaturePyramid *H, const CvLSVMFilterObject **all_F,\r
+ const int n, const int bx, const int by,\r
+ const int threadsNum, int *kLevels, int **processingLevels)\r
+{\r
+ int rootFilterDim, sumPartFiltersDim, i, numLevels, dbx, dby, numDotProducts;\r
+ int averNumDotProd, j, minValue, argMin, tmp, lambda, maxValue, k;\r
+ int *dotProd, *weights, *disp;\r
+ if (H == NULL || all_F == NULL)\r
+ {\r
+ return LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED;\r
+ }\r
+ // Number of feature vectors in root filter\r
+ rootFilterDim = all_F[0]->sizeX * all_F[0]->sizeY;\r
+ // Number of feature vectors in all part filters\r
+ sumPartFiltersDim = 0;\r
+ for (i = 1; i <= n; i++)\r
+ {\r
+ sumPartFiltersDim += all_F[i]->sizeX * all_F[i]->sizeY;\r
+ }\r
+ // Number of levels which are used for computation of score function\r
+ numLevels = H->countLevel - H->lambda;\r
+ // Allocation memory for saving number of dot products that will be\r
+ // computed for each level of feature pyramid\r
+ dotProd = (int *)malloc(sizeof(int) * numLevels);\r
+ // Size of nullable border that's used in computing convolution\r
+ // of feature map with part filter\r
+ dbx = 2 * bx;\r
+ dby = 2 * by;\r
+ // Total number of dot products for all levels\r
+ numDotProducts = 0;\r
+ lambda = H->lambda;\r
+ for (i = 0; i < numLevels; i++)\r
+ {\r
+ dotProd[i] = H->pyramid[i + lambda]->sizeX * \r
+ H->pyramid[i + lambda]->sizeY * rootFilterDim +\r
+ (H->pyramid[i]->sizeX + dbx) * \r
+ (H->pyramid[i]->sizeY + dby) * sumPartFiltersDim;\r
+ numDotProducts += dotProd[i];\r
+ }\r
+ // Average number of dot products that would be performed at the best\r
+ averNumDotProd = numDotProducts / threadsNum;\r
+ // Allocation memory for saving dot product number performed by each thread\r
+ weights = (int *)malloc(sizeof(int) * threadsNum);\r
+ // Allocation memory for saving dispertion\r
+ disp = (int *)malloc(sizeof(int) * threadsNum);\r
+ // At the first step we think of first threadsNum levels will be processed\r
+ // by different threads\r
+ for (i = 0; i < threadsNum; i++)\r
+ {\r
+ kLevels[i] = 1;\r
+ weights[i] = dotProd[i];\r
+ disp[i] = 0;\r
+ }\r
+ // Computation number of levels that will be processed by each thread\r
+ for (i = threadsNum; i < numLevels; i++)\r
+ {\r
+ // Search number of thread that will process level number i\r
+ for (j = 0; j < threadsNum; j++)\r
+ {\r
+ weights[j] += dotProd[i];\r
+ minValue = weights[0];\r
+ maxValue = weights[0];\r
+ for (k = 1; k < threadsNum; k++)\r
+ {\r
+ minValue = min(minValue, weights[k]);\r
+ maxValue = max(maxValue, weights[k]);\r
+ }\r
+ disp[j] = maxValue - minValue;\r
+ weights[j] -= dotProd[i];\r
+ }\r
+ minValue = disp[0];\r
+ argMin = 0;\r
+ for (j = 1; j < threadsNum; j++)\r
+ {\r
+ if (disp[j] < minValue)\r
+ {\r
+ minValue = disp[j];\r
+ argMin = j;\r
+ }\r
+ }\r
+ // Addition new level\r
+ kLevels[argMin]++;\r
+ weights[argMin] += dotProd[i];\r
+ }\r
+ for (i = 0; i < threadsNum; i++)\r
+ {\r
+ // Allocation memory for saving list of levels for each level\r
+ processingLevels[i] = (int *)malloc(sizeof(int) * kLevels[i]);\r
+ // At the first step we think of first threadsNum levels will be processed\r
+ // by different threads\r
+ processingLevels[i][0] = lambda + i;\r
+ kLevels[i] = 1;\r
+ weights[i] = dotProd[i];\r
+ }\r
+ // Creating list of levels\r
+ for (i = threadsNum; i < numLevels; i++)\r
+ {\r
+ for (j = 0; j < threadsNum; j++)\r
+ {\r
+ weights[j] += dotProd[i];\r
+ minValue = weights[0];\r
+ maxValue = weights[0];\r
+ for (k = 1; k < threadsNum; k++)\r
+ {\r
+ minValue = min(minValue, weights[k]);\r
+ maxValue = max(maxValue, weights[k]);\r
+ }\r
+ disp[j] = maxValue - minValue;\r
+ weights[j] -= dotProd[i];\r
+ }\r
+ minValue = disp[0];\r
+ argMin = 0;\r
+ for (j = 1; j < threadsNum; j++)\r
+ {\r
+ if (disp[j] < minValue)\r
+ {\r
+ minValue = disp[j];\r
+ argMin = j;\r
+ }\r
+ }\r
+ processingLevels[argMin][kLevels[argMin]] = lambda + i;\r
+ kLevels[argMin]++;\r
+ weights[argMin] += dotProd[i];\r
+ }\r
+ // Release allocated memory\r
+ free(weights);\r
+ free(dotProd);\r
+ free(disp);\r
+ return LATENT_SVM_OK;\r
+}\r
+\r
+#ifdef HAVE_TBB\r
+/*\r
+// int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, \r
+ const CvLSVMFeaturePyramid *H, \r
+ const float b, \r
+ const int maxXBorder, const int maxYBorder,\r
+ const float scoreThreshold,\r
+ const int threadsNum,\r
+ float **score, \r
+ CvPoint **points, int **levels, int *kPoints,\r
+ CvPoint ***partsDisplacement);\r
+// INPUT\r
+// all_F - the set of filters (the first element is root filter, \r
+ the other - part filters)\r
+// n - the number of part filters\r
+// H - feature pyramid\r
+// b - linear term of the score function\r
+// maxXBorder - the largest root filter size (X-direction)\r
+// maxYBorder - the largest root filter size (Y-direction)\r
+// scoreThreshold - score threshold\r
+// threadsNum - number of threads that will be created using TBB version\r
+// OUTPUT\r
+// score - score function values that exceed threshold\r
+// points - the set of root filter positions (in the block space)\r
+// levels - the set of levels\r
+// kPoints - number of root filter positions\r
+// partsDisplacement - displacement of part filters (in the block space)\r
+// RESULT\r
+// Error status\r
+*/\r
+int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, \r
+ const CvLSVMFeaturePyramid *H, \r
+ const float b, \r
+ const int maxXBorder, const int maxYBorder,\r
+ const float scoreThreshold,\r
+ const int threadsNum,\r
+ float **score, \r
+ CvPoint **points, int **levels, int *kPoints,\r
+ CvPoint ***partsDisplacement)\r
+{\r
+ int i, j, s, f, level, numLevels;\r
+ float **tmpScore;\r
+ CvPoint ***tmpPoints;\r
+ CvPoint ****tmpPartsDisplacement; \r
+ int *tmpKPoints;\r
+ int res;\r
+\r
+ int *kLevels, **procLevels;\r
+ int bx, by;\r
+ \r
+ // Computation the number of levels for seaching object,\r
+ // first lambda-levels are used for computation values\r
+ // of score function for each position of root filter\r
+ numLevels = H->countLevel - H->lambda;\r
+\r
+ kLevels = (int *)malloc(sizeof(int) * threadsNum);\r
+ procLevels = (int **)malloc(sizeof(int*) * threadsNum);\r
+ computeBorderSize(maxXBorder, maxYBorder, &bx, &by);\r
+ res = createSchedule(H, all_F, n, bx, by, threadsNum, kLevels, procLevels);\r
+ if (res != LATENT_SVM_OK)\r
+ {\r
+ for (i = 0; i < threadsNum; i++)\r
+ {\r
+ if (procLevels[i] != NULL) \r
+ {\r
+ free(procLevels[i]);\r
+ }\r
+ }\r
+ free(procLevels);\r
+ free(kLevels);\r
+ return res;\r
+ }\r
+ \r
+ // Allocation memory for values of score function for each level\r
+ // that exceed threshold\r
+ tmpScore = (float **)malloc(sizeof(float*) * numLevels); \r
+ // Allocation memory for the set of points that corresponds \r
+ // to the maximum of score function\r
+ tmpPoints = (CvPoint ***)malloc(sizeof(CvPoint **) * numLevels);\r
+ for (i = 0; i < numLevels; i++)\r
+ {\r
+ tmpPoints[i] = (CvPoint **)malloc(sizeof(CvPoint *));\r
+ }\r
+ // Allocation memory for memory for saving parts displacement on each level\r
+ tmpPartsDisplacement = (CvPoint ****)malloc(sizeof(CvPoint ***) * numLevels);\r
+ for (i = 0; i < numLevels; i++)\r
+ {\r
+ tmpPartsDisplacement[i] = (CvPoint ***)malloc(sizeof(CvPoint **));\r
+ }\r
+ // Number of points that corresponds to the maximum \r
+ // of score function on each level\r
+ tmpKPoints = (int *)malloc(sizeof(int) * numLevels);\r
+ for (i = 0; i < numLevels; i++)\r
+ {\r
+ tmpKPoints[i] = 0;\r
+ }\r
+\r
+ // Computation maxima of score function on each level\r
+ // and getting the maximum on all levels using TBB tasks\r
+ tbbTasksThresholdFunctionalScore(all_F, n, H, b, maxXBorder, maxYBorder,\r
+ scoreThreshold, kLevels, procLevels, \r
+ threadsNum, tmpScore, tmpPoints, \r
+ tmpKPoints, tmpPartsDisplacement);\r
+ (*kPoints) = 0;\r
+ for (i = 0; i < numLevels; i++)\r
+ {\r
+ (*kPoints) += tmpKPoints[i];\r
+ }\r
+ \r
+ // Allocation memory for levels\r
+ (*levels) = (int *)malloc(sizeof(int) * (*kPoints));\r
+ // Allocation memory for the set of points\r
+ (*points) = (CvPoint *)malloc(sizeof(CvPoint) * (*kPoints)); \r
+ // Allocation memory for parts displacement\r
+ (*partsDisplacement) = (CvPoint **)malloc(sizeof(CvPoint *) * (*kPoints));\r
+ // Allocation memory for score function values\r
+ (*score) = (float *)malloc(sizeof(float) * (*kPoints));\r
+\r
+ // Filling the set of points, levels and parts displacement\r
+ s = 0;\r
+ f = 0;\r
+ for (i = 0; i < numLevels; i++)\r
+ {\r
+ // Computation the number of level\r
+ level = i + H->lambda; \r
+\r
+ // Addition a set of points\r
+ f += tmpKPoints[i];\r
+ for (j = s; j < f; j++)\r
+ {\r
+ (*levels)[j] = level;\r
+ (*points)[j] = (*tmpPoints[i])[j - s];\r
+ (*score)[j] = tmpScore[i][j - s];\r
+ (*partsDisplacement)[j] = (*(tmpPartsDisplacement[i]))[j - s];\r
+ } \r
+ s = f;\r
+ }\r
+\r
+ // Release allocated memory\r
+ for (i = 0; i < numLevels; i++)\r
+ {\r
+ free(tmpPoints[i]);\r
+ free(tmpPartsDisplacement[i]);\r
+ }\r
+ for (i = 0; i < threadsNum; i++)\r
+ {\r
+ free(procLevels[i]);\r
+ }\r
+ free(procLevels);\r
+ free(kLevels);\r
+ free(tmpPoints);\r
+ free(tmpScore);\r
+ free(tmpKPoints);\r
+ free(tmpPartsDisplacement);\r
+\r
+ return LATENT_SVM_OK;\r
+}\r
+#endif\r
+\r
void sort(int n, const float* x, int* indices)\r
{\r
int i, j;\r
#include "opencv2/highgui/highgui.hpp"\r
#include <stdio.h>\r
\r
+#ifdef HAVE_CONFIG_H \r
+#include <cvconfig.h> \r
+#endif\r
+#ifdef HAVE_TBB\r
+#include "tbb/task_scheduler_init.h"\r
+#endif\r
+\r
using namespace cv;\r
\r
void help()\r
printf( "This program demonstrated the use of the latentSVM detector.\n"\r
"It reads in a trained object model and then uses that to detect the object in an image\n"\r
"Call:\n"\r
- "./latentsvmdetect [<image_filename> <model_filename]\n"\r
+ "./latentsvmdetect [<image_filename> <model_filename> [<threads_number>]]\n"\r
" The defaults for image_filename and model_filename are cat.jpg and cat.xml respectively\n"\r
" Press any key to quit.\n");\r
}\r
\r
const char* model_filename = "cat.xml";\r
const char* image_filename = "cat.jpg";\r
+int tbbNumThreads = -1;\r
\r
-void detect_and_draw_objects( IplImage* image, CvLatentSvmDetector* detector)\r
+void detect_and_draw_objects( IplImage* image, CvLatentSvmDetector* detector, int numThreads = -1)\r
{\r
CvMemStorage* storage = cvCreateMemStorage(0);\r
CvSeq* detections = 0;\r
int i = 0;\r
int64 start = 0, finish = 0;\r
+#ifdef HAVE_TBB\r
+ tbb::task_scheduler_init init(tbb::task_scheduler_init::deferred);\r
+ if (numThreads > 0)\r
+ {\r
+ init.initialize(numThreads);\r
+ printf("Number of threads %i\n", numThreads);\r
+ }\r
+ else\r
+ {\r
+ printf("Number of threads is not correct for TBB version");\r
+ return;\r
+ }\r
+#endif\r
\r
start = cvGetTickCount();\r
- detections = cvLatentSvmDetectObjects(image, detector, storage);\r
+ detections = cvLatentSvmDetectObjects(image, detector, storage, 0.5f, numThreads);\r
finish = cvGetTickCount();\r
printf("detection time = %.3f\n", (float)(finish - start) / (float)(cvGetTickFrequency() * 1000000.0));\r
\r
+#ifdef HAVE_TBB\r
+ init.terminate();\r
+#endif\r
for( i = 0; i < detections->total; i++ )\r
{\r
CvObjectDetection detection = *(CvObjectDetection*)cvGetSeqElem( detections, i );\r
{\r
image_filename = argv[1];\r
model_filename = argv[2];\r
+ if (argc > 3)\r
+ {\r
+ tbbNumThreads = atoi(argv[3]);\r
+ }\r
}\r
IplImage* image = cvLoadImage(image_filename);\r
if (!image)\r
cvReleaseImage( &image );\r
return -1;\r
}\r
- detect_and_draw_objects( image, detector );\r
+ detect_and_draw_objects( image, detector, tbbNumThreads );\r
cvNamedWindow( "test", 0 );\r
cvShowImage( "test", image );\r
cvWaitKey(0);\r