Parallel version of Latent SVM.
authorValentina Kustikova <no@email>
Tue, 8 Feb 2011 07:34:25 +0000 (07:34 +0000)
committerValentina Kustikova <no@email>
Tue, 8 Feb 2011 07:34:25 +0000 (07:34 +0000)
modules/core/include/opencv2/core/internal.hpp
modules/objdetect/include/opencv2/objdetect/objdetect.hpp
modules/objdetect/src/_latentsvm.h
modules/objdetect/src/_lsvm_error.h
modules/objdetect/src/_lsvm_matching.h
modules/objdetect/src/_lsvm_tbbversion.h [new file with mode: 0644]
modules/objdetect/src/latentsvm.cpp
modules/objdetect/src/latentsvmdetector.cpp
modules/objdetect/src/lsvmtbbversion.cpp [new file with mode: 0644]
modules/objdetect/src/matching.cpp
samples/c/latentsvmdetect.cpp

index 6fd834e..c5aff62 100644 (file)
@@ -127,6 +127,7 @@ CV_INLINE IppiSize ippiSize(int width, int height)
     #include "tbb/tbb_stddef.h"
     #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
         #include "tbb/tbb.h"
+        #include "tbb/task.h"
         #undef min
         #undef max
     #else
index 9afd46a..a1de0ee 100644 (file)
@@ -247,7 +247,8 @@ CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
 // CvSeq* cvLatentSvmDetectObjects(const IplImage* image, 
 //                                                                     CvLatentSvmDetector* detector, 
 //                                                                     CvMemStorage* storage, 
-//                                                                     float overlap_threshold = 0.5f);
+//                                                                     float overlap_threshold = 0.5f,
+//                                  int numThreads = -1);
 // INPUT
 // image                               - image to detect objects in
 // detector                            - Latent SVM detector in internal representation
@@ -261,7 +262,8 @@ CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
 CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image, 
                                                                CvLatentSvmDetector* detector, 
                                                                CvMemStorage* storage, 
-                                                               float overlap_threshold CV_DEFAULT(0.5f));
+                                                               float overlap_threshold CV_DEFAULT(0.5f),
+                                int numThreads CV_DEFAULT(-1));
 
 #ifdef __cplusplus
 }
index c3368c2..e9be3d4 100644 (file)
@@ -248,7 +248,8 @@ int searchObjectThreshold(const CvLSVMFeaturePyramid *H,
                           int maxXBorder, int maxYBorder, \r
                           float scoreThreshold,\r
                           CvPoint **points, int **levels, int *kPoints, \r
-                          float **score, CvPoint ***partsDisplacement);\r
+                          float **score, CvPoint ***partsDisplacement,\r
+                          int numThreads CV_DEFAULT(-1));\r
 \r
 /*\r
 // Computation root filters displacement and values of score function\r
@@ -283,7 +284,7 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H,
                                         int kComponents, const int *kPartFilters,\r
                                         const float *b, float scoreThreshold,\r
                                         CvPoint **points, CvPoint **oppPoints,\r
-                                        float **score, int *kPoints);\r
+                                        float **score, int *kPoints, int numThreads);\r
 \r
 /*\r
 // Compute opposite point for filter box\r
index 1d6fb63..a3a10b2 100644 (file)
 #define LATENT_SVM_SEARCH_OBJECT_FAILED -5
 #define LATENT_SVM_FAILED_SUPERPOSITION -6
 #define FILTER_OUT_OF_BOUNDARIES -7
+#define LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED -8
+#define LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT -9
 #define FFT_OK 2
-#define FFT_ERROR -8
-#define LSVM_PARSER_FILE_NOT_FOUND -9
+#define FFT_ERROR -10
+#define LSVM_PARSER_FILE_NOT_FOUND -11
 
 #endif
index 1b51fc5..5c8bb43 100644 (file)
 #include "_lsvm_fft.h"
 #include "_lsvm_routine.h"
 
+#ifdef HAVE_TBB
+#include "_lsvm_tbbversion.h"
+#endif
+
 //extern "C" {
 /*
 // Function for convolution computation
@@ -352,6 +356,46 @@ int thresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n,
                              CvPoint **points, int **levels, int *kPoints,
                              CvPoint ***partsDisplacement);
 
+#ifdef HAVE_TBB
+/*
+// int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, 
+                                   const CvLSVMFeaturePyramid *H, 
+                                   const float b, 
+                                   const int maxXBorder, const int maxYBorder,
+                                   const float scoreThreshold,
+                                   const int threadsNum,
+                                   float **score, 
+                                   CvPoint **points, int **levels, int *kPoints,
+                                   CvPoint ***partsDisplacement);
+// INPUT
+// all_F             - the set of filters (the first element is root filter, 
+                       the other - part filters)
+// n                 - the number of part filters
+// H                 - feature pyramid
+// b                 - linear term of the score function
+// maxXBorder        - the largest root filter size (X-direction)
+// maxYBorder        - the largest root filter size (Y-direction)
+// scoreThreshold    - score threshold
+// threadsNum        - number of threads that will be created using TBB version
+// OUTPUT
+// score             - score function values that exceed threshold
+// points            - the set of root filter positions (in the block space)
+// levels            - the set of levels
+// kPoints           - number of root filter positions
+// partsDisplacement - displacement of part filters (in the block space)
+// RESULT
+// Error status
+*/
+int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, 
+                                const CvLSVMFeaturePyramid *H, 
+                                const float b, 
+                                const int maxXBorder, const int maxYBorder,
+                                const float scoreThreshold,
+                                const int threadsNum,
+                                float **score, 
+                                CvPoint **points, int **levels, int *kPoints,
+                                CvPoint ***partsDisplacement);
+#endif
 
 /*
 // Perform non-maximum suppression algorithm (described in original paper)
diff --git a/modules/objdetect/src/_lsvm_tbbversion.h b/modules/objdetect/src/_lsvm_tbbversion.h
new file mode 100644 (file)
index 0000000..1df210b
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef _LSVM_TBBVERSION_H\r
+#define _LSVM_TBBVERSION_H\r
+\r
+#include "_lsvm_matching.h"\r
+#include "precomp.hpp"\r
+\r
+/*\r
+// Computation score function using TBB tasks\r
+//\r
+// API\r
+// int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, \r
+                                        const CvLSVMFeaturePyramid *H, const float b,\r
+                                        const int maxXBorder, const int maxYBorder,\r
+                                        const float scoreThreshold,\r
+                                        int *kLevels, int **procLevels,\r
+                                        const int threadsNum,\r
+                                        float **score, CvPoint ***points, \r
+                                        int *kPoints,\r
+                                        CvPoint ****partsDisplacement);\r
+// INPUT\r
+// filters           - the set of filters (the first element is root filter, \r
+                       the other - part filters)\r
+// n                 - the number of part filters\r
+// H                 - feature pyramid\r
+// b                 - linear term of the score function\r
+// maxXBorder        - the largest root filter size (X-direction)\r
+// maxYBorder        - the largest root filter size (Y-direction)\r
+// scoreThreshold    - score threshold\r
+// kLevels           - array that contains number of levels processed \r
+                       by each thread\r
+// procLevels        - array that contains lists of levels processed \r
+                       by each thread\r
+// threadsNum        - the number of created threads\r
+// OUTPUT\r
+// score             - score function values that exceed threshold\r
+// points            - the set of root filter positions (in the block space)\r
+// kPoints           - number of root filter positions\r
+// partsDisplacement - displacement of part filters (in the block space)\r
+// RESULT\r
+//\r
+*/\r
+int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, \r
+                                     const CvLSVMFeaturePyramid *H, const float b,\r
+                                     const int maxXBorder, const int maxYBorder,\r
+                                     const float scoreThreshold,\r
+                                     int *kLevels, int **procLevels,\r
+                                     const int threadsNum,\r
+                                     float **score, CvPoint ***points, \r
+                                     int *kPoints,\r
+                                     CvPoint ****partsDisplacement);\r
+\r
+#endif
\ No newline at end of file
index 9f26edb..c6501fa 100644 (file)
@@ -271,17 +271,30 @@ int searchObjectThreshold(const CvLSVMFeaturePyramid *H,
                           int maxXBorder, int maxYBorder, \r
                           float scoreThreshold,\r
                           CvPoint **points, int **levels, int *kPoints, \r
-                          float **score, CvPoint ***partsDisplacement)\r
+                          float **score, CvPoint ***partsDisplacement,\r
+                          int numThreads)\r
 {\r
     int opResult;\r
 \r
 \r
     // Matching\r
+#ifdef HAVE_TBB\r
+    if (numThreads <= 0)\r
+    {\r
+        opResult = LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT;\r
+        return opResult;\r
+    }\r
+    opResult = tbbThresholdFunctionalScore(all_F, n, H, b, maxXBorder, maxYBorder,\r
+                                           scoreThreshold, numThreads, score, \r
+                                           points, levels, kPoints, \r
+                                           partsDisplacement);\r
+#else\r
     opResult = thresholdFunctionalScore(all_F, n, H, b, \r
                                         maxXBorder, maxYBorder, \r
                                         scoreThreshold, \r
                                         score, points, levels, \r
                                         kPoints, partsDisplacement);\r
+#endif\r
     if (opResult != LATENT_SVM_OK)\r
     {\r
         return LATENT_SVM_SEARCH_OBJECT_FAILED;\r
@@ -537,7 +550,8 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H,
                                         int kComponents, const int *kPartFilters,\r
                                         const float *b, float scoreThreshold,\r
                                         CvPoint **points, CvPoint **oppPoints,\r
-                                        float **score, int *kPoints)\r
+                                        float **score, int *kPoints,\r
+                                        int numThreads)\r
 {\r
     int error = 0;\r
     int i, j, s, f, componentIndex;\r
@@ -561,10 +575,17 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H,
     // For each component perform searching\r
     for (i = 0; i < kComponents; i++)\r
     {\r
+#ifdef HAVE_TBB\r
+        searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],\r
+            b[i], maxXBorder, maxYBorder, scoreThreshold,\r
+            &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), \r
+            &(scoreArr[i]), &(partsDisplacementArr[i]), numThreads);\r
+#else\r
         searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],\r
             b[i], maxXBorder, maxYBorder, scoreThreshold, \r
             &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), \r
             &(scoreArr[i]), &(partsDisplacementArr[i]));\r
+#endif\r
         estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i], \r
             filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i]));        \r
         componentIndex += (kPartFilters[i] + 1);\r
index 34a1a49..28ebb76 100644 (file)
@@ -69,7 +69,8 @@ void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector)
 // CvSeq* cvLatentSvmDetectObjects(const IplImage* image, \r
 //                                                                     CvLatentSvmDetector* detector, \r
 //                                                                     CvMemStorage* storage, \r
-//                                                                     float overlap_threshold = 0.5f);\r
+//                                                                     float overlap_threshold = 0.5f,\r
+                                    int numThreads = -1);\r
 // INPUT\r
 // image                               - image to detect objects in\r
 // detector                            - Latent SVM detector in internal representation\r
@@ -82,7 +83,7 @@ void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector)
 CvSeq* cvLatentSvmDetectObjects(IplImage* image, \r
                                                                CvLatentSvmDetector* detector, \r
                                                                CvMemStorage* storage, \r
-                                                               float overlap_threshold)\r
+                                                               float overlap_threshold, int numThreads)\r
 {\r
        CvLSVMFeaturePyramid *H = 0;\r
     CvPoint *points = 0, *oppPoints = 0;\r
@@ -103,7 +104,7 @@ CvSeq* cvLatentSvmDetectObjects(IplImage* image,
     // Search object\r
     searchObjectThresholdSomeComponents(H, (const CvLSVMFilterObject**)(detector->filters), detector->num_components, \r
                detector->num_part_filters, detector->b, detector->score_threshold, \r
-                &points, &oppPoints, &score, &kPoints);\r
+                &points, &oppPoints, &score, &kPoints, numThreads);\r
     // Clipping boxes\r
     clippingBoxes(image->width, image->height, points, kPoints);\r
     clippingBoxes(image->width, image->height, oppPoints, kPoints);\r
diff --git a/modules/objdetect/src/lsvmtbbversion.cpp b/modules/objdetect/src/lsvmtbbversion.cpp
new file mode 100644 (file)
index 0000000..a9e3cf5
--- /dev/null
@@ -0,0 +1,121 @@
+#include "_lsvm_tbbversion.h"\r
+\r
+\r
+#ifdef HAVE_TBB\r
+/*\r
+// Task class\r
+*/\r
+class ScoreComputation : public tbb::task\r
+{\r
+private:\r
+    const CvLSVMFilterObject **filters;\r
+    const int n;\r
+    const CvLSVMFeaturePyramid *H;\r
+    const float b;\r
+    const int maxXBorder;\r
+    const int maxYBorder;\r
+    const float scoreThreshold;\r
+    const int kLevels;\r
+    const int *procLevels;\r
+public:\r
+    float **score;\r
+    CvPoint ***points;\r
+    CvPoint ****partsDisplacement;\r
+    int *kPoints;\r
+public:\r
+    ScoreComputation(const CvLSVMFilterObject **_filters, int _n, \r
+                     const CvLSVMFeaturePyramid *_H,\r
+                     float _b, int _maxXBorder, int _maxYBorder,\r
+                     float _scoreThreshold, int _kLevels, const int *_procLevels,\r
+                     float **_score, CvPoint ***_points, int *_kPoints,\r
+                     CvPoint ****_partsDisplacement) :\r
+    n(_n), b(_b), maxXBorder(_maxXBorder), \r
+        maxYBorder(_maxYBorder), scoreThreshold(_scoreThreshold),\r
+        kLevels(_kLevels), score(_score), points(_points), kPoints(_kPoints),\r
+        partsDisplacement(_partsDisplacement)\r
+    {\r
+        filters = _filters;\r
+        H = _H;\r
+        procLevels = _procLevels;\r
+    };\r
+\r
+    task* execute()\r
+    {\r
+        int i, level, partsLevel, res;\r
+        for (i = 0; i < kLevels; i++)\r
+        {\r
+            level = procLevels[i];\r
+            partsLevel = level - H->lambda;\r
+            res = thresholdFunctionalScoreFixedLevel(\r
+                filters, n, H, level, b,\r
+                maxXBorder, maxYBorder, scoreThreshold, &(score[partsLevel]), \r
+                points[partsLevel], &(kPoints[partsLevel]), \r
+                partsDisplacement[partsLevel]);\r
+            if (res != LATENT_SVM_OK)\r
+            {\r
+                continue;\r
+            }\r
+        }\r
+        return NULL;\r
+    }\r
+};\r
+\r
+/*\r
+// Computation score function using TBB tasks\r
+//\r
+// API\r
+// int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, \r
+                                        const CvLSVMFeatureMap *H, const float b,\r
+                                        const int maxXBorder, const int maxYBorder,\r
+                                        const float scoreThreshold,\r
+                                        int *kLevels, int **procLevels,\r
+                                        const int threadsNum,\r
+                                        float **score, CvPoint ***points, \r
+                                        int *kPoints,\r
+                                        CvPoint ****partsDisplacement);\r
+// INPUT\r
+// filters           - the set of filters (the first element is root filter, \r
+                       the other - part filters)\r
+// n                 - the number of part filters\r
+// H                 - feature pyramid\r
+// b                 - linear term of the score function\r
+// maxXBorder        - the largest root filter size (X-direction)\r
+// maxYBorder        - the largest root filter size (Y-direction)\r
+// scoreThreshold    - score threshold\r
+// kLevels           - array that contains number of levels processed \r
+                       by each thread\r
+// procLevels        - array that contains lists of levels processed \r
+                       by each thread\r
+// threadsNum        - the number of created threads\r
+// OUTPUT\r
+// score             - score function values that exceed threshold\r
+// points            - the set of root filter positions (in the block space)\r
+// kPoints           - number of root filter positions\r
+// partsDisplacement - displacement of part filters (in the block space)\r
+// RESULT\r
+//\r
+*/\r
+int tbbTasksThresholdFunctionalScore(const CvLSVMFilterObject **filters, const int n, \r
+                                     const CvLSVMFeaturePyramid *H, const float b,\r
+                                     const int maxXBorder, const int maxYBorder,\r
+                                     const float scoreThreshold,\r
+                                     int *kLevels, int **procLevels,\r
+                                     const int threadsNum,\r
+                                     float **score, CvPoint ***points, \r
+                                     int *kPoints,\r
+                                     CvPoint ****partsDisplacement)\r
+{\r
+    tbb::task_list tasks;\r
+    int i;\r
+    for (i = 0; i < threadsNum; i++)\r
+    {\r
+        ScoreComputation& sc = \r
+            *new(tbb::task::allocate_root()) ScoreComputation(filters, n, H, b,\r
+            maxXBorder, maxYBorder, scoreThreshold, kLevels[i], procLevels[i], \r
+            score, points, kPoints, partsDisplacement);\r
+        tasks.push_back(sc);\r
+    }\r
+    tbb::task::spawn_root_and_wait(tasks);\r
+    return LATENT_SVM_OK;\r
+};\r
+#endif
\ No newline at end of file
index a49144c..f04e6fc 100644 (file)
 int convolution(const CvLSVMFilterObject *Fi, const CvLSVMFeatureMap *map, float *f)\r
 {\r
     int n1, m1, n2, m2, p, size, diff1, diff2;\r
-    int i1, i2, j1, j2, k; \r
-    \r
-    n1 = map->sizeY;\r
-    m1 = map->sizeX;\r
-    n2 = Fi->sizeY;\r
-    m2 = Fi->sizeX;\r
-    p = map->p;\r
-    if (n1 < n2 || m1 < m2)\r
-    {\r
-        return FILTER_OUT_OF_BOUNDARIES;\r
-    }\r
-\r
-    // Computation number of positions for the filter\r
-    diff1 = n1 - n2 + 1;\r
-    diff2 = m1 - m2 + 1;\r
-    size = diff1 * diff2;\r
+       int i1, i2, j1, j2, k;\r
+       float tmp_f1, tmp_f2, tmp_f3, tmp_f4;\r
+       float *pMap = NULL;\r
+       float *pH = NULL;\r
+           \r
+       n1 = map->sizeY;\r
+       m1 = map->sizeX;\r
+       n2 = Fi->sizeY;\r
+       m2 = Fi->sizeX;\r
+       p = map->p;\r
+\r
+       diff1 = n1 - n2 + 1;\r
+       diff2 = m1 - m2 + 1;\r
+       size = diff1 * diff2;\r
+       for (j1 = diff2 - 1; j1 >= 0; j1--)\r
+       {\r
+               \r
+               for (i1 = diff1 - 1; i1 >= 0; i1--)\r
+               {\r
+                       tmp_f1 = 0.0f;\r
+                       tmp_f2 = 0.0f;\r
+                       tmp_f3 = 0.0f;\r
+                       tmp_f4 = 0.0f;\r
+                       for (i2 = 0; i2 < n2; i2++)\r
+                       {\r
+                               for (j2 = 0; j2 < m2; j2++)\r
+                               {\r
+                                       pMap = map->Map + (i1 + i2) * m1 * p + (j1 + j2) * p;//sm2\r
+                                       pH = Fi->H + (i2 * m2 + j2) * p;//sm2\r
+                                       for (k = 0; k < p/4; k++)\r
+                                       {\r
 \r
-    for (i1 = 0; i1 < diff1; i1++)\r
-    {\r
-        for (j1 = 0; j1 < diff2; j1++)\r
-        {\r
-            f[i1 * diff2 + j1] = 0.0;\r
-            for (i2 = 0; i2 < n2; i2++)\r
-            {\r
-                for (j2 = 0; j2 < m2; j2++)\r
-                {\r
-                    for (k = 0; k < p; k++)\r
-                    {\r
-                        f[i1 * diff2 + j1] += map->Map[(i1 + i2) * m1 * p + \r
-                                                       (j1 + j2) * p + k] * \r
-                                              Fi->H[(i2 * m2 + j2) * p + k];\r
-                    }\r
-                }\r
-            }\r
-        }\r
-    }\r
+                                               tmp_f1 += pMap[4*k]*pH[4*k];//sm2\r
+                                               tmp_f2 += pMap[4*k+1]*pH[4*k+1];\r
+                                               tmp_f3 += pMap[4*k+2]*pH[4*k+2];\r
+                                               tmp_f4 += pMap[4*k+3]*pH[4*k+3];\r
+                                       }\r
+                       \r
+                                       if (p%4==1)\r
+                                       {\r
+                                               tmp_f1 += pH[p-1]*pMap[p-1];\r
+                                       }\r
+                                       else\r
+                                       {\r
+                                               if (p%4==2)\r
+                                               {\r
+                                                       tmp_f1 += pH[p-2]*pMap[p-2] + pH[p-1]*pMap[p-1];\r
+                                               }\r
+                                               else \r
+                                               {\r
+                                                       if (p%4==3)\r
+                                                       {\r
+                                                               tmp_f1 += pH[p-3]*pMap[p-3] + pH[p-2]*pMap[p-2] + pH[p-1]*pMap[p-1];\r
+                                                       }\r
+                                               }\r
+                                       }\r
+                                       \r
+                               }\r
+                       }\r
+                       f[i1 * diff2 + j1] = tmp_f1 + tmp_f2 + tmp_f3 + tmp_f4;//sm1\r
+               }\r
+       }\r
     return LATENT_SVM_OK;\r
 }\r
 \r
@@ -1341,6 +1367,320 @@ int thresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n,
     return LATENT_SVM_OK;  \r
 }\r
 \r
+/*\r
+// Creating schedule of pyramid levels processing \r
+//\r
+// API\r
+// int createSchedule(const featurePyramid *H, const filterObject **all_F,\r
+                      const int n, const int bx, const int by,\r
+                      const int threadsNum, int *kLevels, \r
+                      int **processingLevels)\r
+// INPUT\r
+// H                 - feature pyramid\r
+// all_F             - the set of filters (the first element is root filter, \r
+                       the other - part filters)\r
+// n                 - the number of part filters\r
+// bx                - size of nullable border (X direction)\r
+// by                - size of nullable border (Y direction)\r
+// threadsNum        - number of threads that will be created in TBB version\r
+// OUTPUT\r
+// kLevels           - array that contains number of levels processed \r
+                       by each thread\r
+// processingLevels  - array that contains lists of levels processed \r
+                       by each thread\r
+// RESULT\r
+// Error status\r
+*/\r
+int createSchedule(const CvLSVMFeaturePyramid *H, const CvLSVMFilterObject **all_F,\r
+                   const int n, const int bx, const int by,\r
+                   const int threadsNum, int *kLevels, int **processingLevels)\r
+{\r
+    int rootFilterDim, sumPartFiltersDim, i, numLevels, dbx, dby, numDotProducts;\r
+    int averNumDotProd, j, minValue, argMin, tmp, lambda, maxValue, k;\r
+    int *dotProd, *weights, *disp;\r
+    if (H == NULL || all_F == NULL)\r
+    {\r
+        return LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED;\r
+    }\r
+    // Number of feature vectors in root filter\r
+    rootFilterDim = all_F[0]->sizeX * all_F[0]->sizeY;\r
+    // Number of feature vectors in all part filters\r
+    sumPartFiltersDim = 0;\r
+    for (i = 1; i <= n; i++)\r
+    {\r
+        sumPartFiltersDim += all_F[i]->sizeX * all_F[i]->sizeY;\r
+    }\r
+    // Number of levels which are used for computation of score function\r
+    numLevels = H->countLevel - H->lambda;\r
+    // Allocation memory for saving number of dot products that will be\r
+    // computed for each level of feature pyramid\r
+    dotProd = (int *)malloc(sizeof(int) * numLevels);\r
+    // Size of nullable border that's used in computing convolution\r
+    // of feature map with part filter\r
+    dbx = 2 * bx;\r
+    dby = 2 * by;\r
+    // Total number of dot products for all levels\r
+    numDotProducts = 0;\r
+    lambda = H->lambda;\r
+    for (i = 0; i < numLevels; i++)\r
+    {\r
+        dotProd[i] = H->pyramid[i + lambda]->sizeX * \r
+                     H->pyramid[i + lambda]->sizeY * rootFilterDim +\r
+                     (H->pyramid[i]->sizeX + dbx) * \r
+                     (H->pyramid[i]->sizeY + dby) * sumPartFiltersDim;\r
+        numDotProducts += dotProd[i];\r
+    }\r
+    // Average number of dot products that would be performed at the best\r
+    averNumDotProd = numDotProducts / threadsNum;\r
+    // Allocation memory for saving dot product number performed by each thread\r
+    weights = (int *)malloc(sizeof(int) * threadsNum);\r
+    // Allocation memory for saving dispertion\r
+    disp = (int *)malloc(sizeof(int) * threadsNum);\r
+    // At the first step we think of first threadsNum levels will be processed\r
+    // by different threads\r
+    for (i = 0; i < threadsNum; i++)\r
+    {\r
+        kLevels[i] = 1;\r
+        weights[i] = dotProd[i];\r
+        disp[i] = 0;\r
+    }\r
+    // Computation number of levels that will be processed by each thread\r
+    for (i = threadsNum; i < numLevels; i++)\r
+    {\r
+        // Search number of thread that will process level number i\r
+        for (j = 0; j < threadsNum; j++)\r
+        {\r
+            weights[j] += dotProd[i];\r
+            minValue = weights[0];\r
+            maxValue = weights[0];\r
+            for (k = 1; k < threadsNum; k++)\r
+            {\r
+                minValue = min(minValue, weights[k]);\r
+                maxValue = max(maxValue, weights[k]);\r
+            }\r
+            disp[j] = maxValue - minValue;\r
+            weights[j] -= dotProd[i];\r
+        }\r
+        minValue = disp[0];\r
+        argMin = 0;\r
+        for (j = 1; j < threadsNum; j++)\r
+        {\r
+            if (disp[j] < minValue)\r
+            {\r
+                minValue = disp[j];\r
+                argMin = j;\r
+            }\r
+        }\r
+        // Addition new level\r
+        kLevels[argMin]++;\r
+        weights[argMin] += dotProd[i];\r
+    }\r
+    for (i = 0; i < threadsNum; i++)\r
+    {\r
+        // Allocation memory for saving list of levels for each level\r
+        processingLevels[i] = (int *)malloc(sizeof(int) * kLevels[i]);\r
+        // At the first step we think of first threadsNum levels will be processed\r
+        // by different threads\r
+        processingLevels[i][0] = lambda + i;\r
+        kLevels[i] = 1;\r
+        weights[i] = dotProd[i];\r
+    }\r
+    // Creating list of levels\r
+    for (i = threadsNum; i < numLevels; i++)\r
+    {\r
+        for (j = 0; j < threadsNum; j++)\r
+        {\r
+            weights[j] += dotProd[i];\r
+            minValue = weights[0];\r
+            maxValue = weights[0];\r
+            for (k = 1; k < threadsNum; k++)\r
+            {\r
+                minValue = min(minValue, weights[k]);\r
+                maxValue = max(maxValue, weights[k]);\r
+            }\r
+            disp[j] = maxValue - minValue;\r
+            weights[j] -= dotProd[i];\r
+        }\r
+        minValue = disp[0];\r
+        argMin = 0;\r
+        for (j = 1; j < threadsNum; j++)\r
+        {\r
+            if (disp[j] < minValue)\r
+            {\r
+                minValue = disp[j];\r
+                argMin = j;\r
+            }\r
+        }\r
+        processingLevels[argMin][kLevels[argMin]] = lambda + i;\r
+        kLevels[argMin]++;\r
+        weights[argMin] += dotProd[i];\r
+    }\r
+    // Release allocated memory\r
+    free(weights);\r
+    free(dotProd);\r
+    free(disp);\r
+    return LATENT_SVM_OK;\r
+}\r
+\r
+#ifdef HAVE_TBB\r
+/*\r
+// int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, \r
+                                   const CvLSVMFeaturePyramid *H, \r
+                                   const float b, \r
+                                   const int maxXBorder, const int maxYBorder,\r
+                                   const float scoreThreshold,\r
+                                   const int threadsNum,\r
+                                   float **score, \r
+                                   CvPoint **points, int **levels, int *kPoints,\r
+                                   CvPoint ***partsDisplacement);\r
+// INPUT\r
+// all_F             - the set of filters (the first element is root filter, \r
+                       the other - part filters)\r
+// n                 - the number of part filters\r
+// H                 - feature pyramid\r
+// b                 - linear term of the score function\r
+// maxXBorder        - the largest root filter size (X-direction)\r
+// maxYBorder        - the largest root filter size (Y-direction)\r
+// scoreThreshold    - score threshold\r
+// threadsNum        - number of threads that will be created using TBB version\r
+// OUTPUT\r
+// score             - score function values that exceed threshold\r
+// points            - the set of root filter positions (in the block space)\r
+// levels            - the set of levels\r
+// kPoints           - number of root filter positions\r
+// partsDisplacement - displacement of part filters (in the block space)\r
+// RESULT\r
+// Error status\r
+*/\r
+int tbbThresholdFunctionalScore(const CvLSVMFilterObject **all_F, int n, \r
+                                const CvLSVMFeaturePyramid *H, \r
+                                const float b, \r
+                                const int maxXBorder, const int maxYBorder,\r
+                                const float scoreThreshold,\r
+                                const int threadsNum,\r
+                                float **score, \r
+                                CvPoint **points, int **levels, int *kPoints,\r
+                                CvPoint ***partsDisplacement)\r
+{\r
+    int i, j, s, f, level, numLevels;\r
+    float **tmpScore;\r
+    CvPoint ***tmpPoints;\r
+    CvPoint ****tmpPartsDisplacement;   \r
+    int *tmpKPoints;\r
+    int res;\r
+\r
+    int *kLevels, **procLevels;\r
+    int bx, by;\r
+    \r
+    // Computation the number of levels for seaching object,\r
+    // first lambda-levels are used for computation values\r
+    // of score function for each position of root filter\r
+    numLevels = H->countLevel - H->lambda;\r
+\r
+    kLevels = (int *)malloc(sizeof(int) * threadsNum);\r
+    procLevels = (int **)malloc(sizeof(int*) * threadsNum);\r
+    computeBorderSize(maxXBorder, maxYBorder, &bx, &by);\r
+    res = createSchedule(H, all_F, n, bx, by, threadsNum, kLevels, procLevels);\r
+    if (res != LATENT_SVM_OK)\r
+    {\r
+        for (i = 0; i < threadsNum; i++)\r
+        {\r
+            if (procLevels[i] != NULL) \r
+            {\r
+                free(procLevels[i]);\r
+            }\r
+        }\r
+        free(procLevels);\r
+        free(kLevels);\r
+        return res;\r
+    }\r
+    \r
+    // Allocation memory for values of score function for each level\r
+    // that exceed threshold\r
+    tmpScore = (float **)malloc(sizeof(float*) * numLevels);        \r
+    // Allocation memory for the set of points that corresponds \r
+    // to the maximum of score function\r
+    tmpPoints = (CvPoint ***)malloc(sizeof(CvPoint **) * numLevels);\r
+    for (i = 0; i < numLevels; i++)\r
+    {\r
+        tmpPoints[i] = (CvPoint **)malloc(sizeof(CvPoint *));\r
+    }\r
+    // Allocation memory for memory for saving parts displacement on each level\r
+    tmpPartsDisplacement = (CvPoint ****)malloc(sizeof(CvPoint ***) * numLevels);\r
+    for (i = 0; i < numLevels; i++)\r
+    {\r
+        tmpPartsDisplacement[i] = (CvPoint ***)malloc(sizeof(CvPoint **));\r
+    }\r
+    // Number of points that corresponds to the maximum \r
+    // of score function on each level\r
+    tmpKPoints = (int *)malloc(sizeof(int) * numLevels);\r
+    for (i = 0; i < numLevels; i++)\r
+    {\r
+        tmpKPoints[i] = 0;\r
+    }\r
+\r
+    // Computation maxima of score function on each level\r
+    // and getting the maximum on all levels using TBB tasks\r
+    tbbTasksThresholdFunctionalScore(all_F, n, H, b, maxXBorder, maxYBorder,\r
+        scoreThreshold, kLevels, procLevels, \r
+        threadsNum, tmpScore, tmpPoints, \r
+        tmpKPoints, tmpPartsDisplacement);\r
+    (*kPoints) = 0;\r
+    for (i = 0; i < numLevels; i++)\r
+    {\r
+        (*kPoints) += tmpKPoints[i];\r
+    }\r
+        \r
+    // Allocation memory for levels\r
+    (*levels) = (int *)malloc(sizeof(int) * (*kPoints));\r
+    // Allocation memory for the set of points\r
+    (*points) = (CvPoint *)malloc(sizeof(CvPoint) * (*kPoints));   \r
+    // Allocation memory for parts displacement\r
+    (*partsDisplacement) = (CvPoint **)malloc(sizeof(CvPoint *) * (*kPoints));\r
+    // Allocation memory for score function values\r
+    (*score) = (float *)malloc(sizeof(float) * (*kPoints));\r
+\r
+    // Filling the set of points, levels and parts displacement\r
+    s = 0;\r
+    f = 0;\r
+    for (i = 0; i < numLevels; i++)\r
+    {\r
+        // Computation the number of level\r
+        level = i + H->lambda; \r
+\r
+        // Addition a set of points\r
+        f += tmpKPoints[i];\r
+        for (j = s; j < f; j++)\r
+        {\r
+            (*levels)[j] = level;\r
+            (*points)[j] = (*tmpPoints[i])[j - s];\r
+            (*score)[j] = tmpScore[i][j - s];\r
+            (*partsDisplacement)[j] = (*(tmpPartsDisplacement[i]))[j - s];\r
+        }            \r
+        s = f;\r
+    }\r
+\r
+    // Release allocated memory\r
+    for (i = 0; i < numLevels; i++)\r
+    {\r
+        free(tmpPoints[i]);\r
+        free(tmpPartsDisplacement[i]);\r
+    }\r
+    for (i = 0; i < threadsNum; i++)\r
+    {\r
+        free(procLevels[i]);\r
+    }\r
+    free(procLevels);\r
+    free(kLevels);\r
+    free(tmpPoints);\r
+    free(tmpScore);\r
+    free(tmpKPoints);\r
+    free(tmpPartsDisplacement);\r
+\r
+    return LATENT_SVM_OK;\r
+}\r
+#endif\r
+\r
 void sort(int n, const float* x, int* indices)\r
 {\r
        int i, j;\r
index ee1dab6..9f0ef9c 100644 (file)
@@ -2,6 +2,13 @@
 #include "opencv2/highgui/highgui.hpp"\r
 #include <stdio.h>\r
 \r
+#ifdef HAVE_CONFIG_H \r
+#include <cvconfig.h> \r
+#endif\r
+#ifdef HAVE_TBB\r
+#include "tbb/task_scheduler_init.h"\r
+#endif\r
+\r
 using namespace cv;\r
 \r
 void help()\r
@@ -9,26 +16,43 @@ void help()
        printf( "This program demonstrated the use of the latentSVM detector.\n"\r
                        "It reads in a trained object model and then uses that to detect the object in an image\n"\r
                        "Call:\n"\r
-                       "./latentsvmdetect [<image_filename> <model_filename]\n"\r
+            "./latentsvmdetect [<image_filename> <model_filename> [<threads_number>]]\n"\r
                        "  The defaults for image_filename and model_filename are cat.jpg and cat.xml respectively\n"\r
                        "  Press any key to quit.\n");\r
 }\r
 \r
 const char* model_filename = "cat.xml";\r
 const char* image_filename = "cat.jpg";\r
+int   tbbNumThreads = -1;\r
 \r
-void detect_and_draw_objects( IplImage* image, CvLatentSvmDetector* detector)\r
+void detect_and_draw_objects( IplImage* image, CvLatentSvmDetector* detector, int numThreads = -1)\r
 {\r
     CvMemStorage* storage = cvCreateMemStorage(0);\r
     CvSeq* detections = 0;\r
     int i = 0;\r
        int64 start = 0, finish = 0;\r
+#ifdef HAVE_TBB\r
+    tbb::task_scheduler_init init(tbb::task_scheduler_init::deferred);\r
+       if (numThreads > 0)\r
+       {\r
+               init.initialize(numThreads);\r
+        printf("Number of threads %i\n", numThreads);\r
+       }\r
+       else\r
+       {\r
+               printf("Number of threads is not correct for TBB version");\r
+               return;\r
+       }\r
+#endif\r
 \r
        start = cvGetTickCount();\r
-    detections = cvLatentSvmDetectObjects(image, detector, storage);\r
+    detections = cvLatentSvmDetectObjects(image, detector, storage, 0.5f, numThreads);\r
        finish = cvGetTickCount();\r
        printf("detection time = %.3f\n", (float)(finish - start) / (float)(cvGetTickFrequency() * 1000000.0));\r
 \r
+#ifdef HAVE_TBB\r
+    init.terminate();\r
+#endif\r
     for( i = 0; i < detections->total; i++ )\r
     {\r
         CvObjectDetection detection = *(CvObjectDetection*)cvGetSeqElem( detections, i );\r
@@ -48,6 +72,10 @@ int main(int argc, char* argv[])
        {\r
                image_filename = argv[1];\r
                model_filename = argv[2];\r
+        if (argc > 3)\r
+        {\r
+            tbbNumThreads = atoi(argv[3]);\r
+        }\r
        }\r
        IplImage* image = cvLoadImage(image_filename);\r
        if (!image)\r
@@ -64,7 +92,7 @@ int main(int argc, char* argv[])
                cvReleaseImage( &image );\r
                return -1;\r
        }\r
-    detect_and_draw_objects( image, detector );\r
+    detect_and_draw_objects( image, detector, tbbNumThreads );\r
     cvNamedWindow( "test", 0 );\r
     cvShowImage( "test", image );\r
     cvWaitKey(0);\r