some changes
authorKonstantin Matskevich <konstantin.matskevich@itseez.com>
Fri, 31 Jan 2014 04:46:27 +0000 (08:46 +0400)
committerKonstantin Matskevich <konstantin.matskevich@itseez.com>
Mon, 3 Feb 2014 10:46:53 +0000 (14:46 +0400)
modules/objdetect/include/opencv2/objdetect.hpp
modules/objdetect/perf/opencl/perf_hogdetect.cpp
modules/objdetect/src/hog.cpp
modules/objdetect/test/opencl/test_hogdetector.cpp

index e811c8b..4857897 100644 (file)
@@ -327,7 +327,7 @@ public:
                         Size padding = Size(),
                         const std::vector<Point>& searchLocations=std::vector<Point>()) const;
     //ocl
-    virtual bool ocl_detect(const UMat& img, std::vector<Point> &hits,
+    virtual bool ocl_detect(InputArray img, std::vector<Point> &hits,
                        double hitThreshold = 0, Size winStride = Size()) const;
     //with result weights output
     CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
@@ -360,7 +360,7 @@ public:
     CV_PROP double L2HysThreshold;
     CV_PROP bool gammaCorrection;
     CV_PROP std::vector<float> svmDetector;
-    CV_PROP std::vector<float> oclSvmDetector;
+    UMat oclSvmDetector;
     CV_PROP int nlevels;
 
 
index cdb9ed2..5d1d095 100644 (file)
@@ -85,7 +85,7 @@ OCL_PERF_TEST(HOGFixture, HOG)
     OCL_TEST_CYCLE() hog.detectMultiScale(src, found_locations);
 
     std::sort(found_locations.begin(), found_locations.end(), RectLess());
-//    SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
+    SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
 }
 
 }
index ae24bdd..3b33225 100644 (file)
@@ -113,10 +113,7 @@ void HOGDescriptor::setSVMDetector(InputArray _svmDetector)
 {
     _svmDetector.getMat().convertTo(svmDetector, CV_32F);
 
-    std::vector<float> detector;
-    _svmDetector.getMat().copyTo(detector);
-
-    std::vector<float> detector_reordered(detector.size());
+    Mat detector_reordered(1, (int)svmDetector.size(), CV_32FC1);
 
     size_t block_hist_size = getBlockHistogramSize(blockSize, cellSize, nbins);
     cv::Size blocks_per_img = numPartsWithin(winSize, blockSize, blockStride);
@@ -124,12 +121,12 @@ void HOGDescriptor::setSVMDetector(InputArray _svmDetector)
     for (int i = 0; i < blocks_per_img.height; ++i)
         for (int j = 0; j < blocks_per_img.width; ++j)
         {
-            const float *src = &detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
-            float *dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
+            const float *src = &svmDetector[0] + (j * blocks_per_img.height + i) * block_hist_size;
+            float *dst = (float*)detector_reordered.data + (i * blocks_per_img.width + j) * block_hist_size;
             for (size_t k = 0; k < block_hist_size; ++k)
                 dst[k] = src[k];
         }
-    Mat(detector_reordered).convertTo(oclSvmDetector, CV_32F);
+    detector_reordered.copyTo(oclSvmDetector);
     CV_Assert(checkDetectorSize());
 }
 
@@ -1119,14 +1116,16 @@ static bool ocl_computeGradient(InputArray img, UMat grad, UMat qangle, int nbin
 static bool ocl_compute_hists(int nbins, int block_stride_x, int block_stride_y, int height, int width,
                               UMat grad, UMat qangle, UMat gauss_w_lut, UMat block_hists, size_t block_hist_size)
 {
+    ocl::Kernel k("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc);
+    if(k.empty())
+        return false;
     bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
     cv::String opts;
     if(is_cpu)
        opts = "-D CPU ";
     else
-       opts = cv::format("-D WAVE_SIZE=%d", 32);
-
-    ocl::Kernel k("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
+        opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
+    k.create("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
     if(k.empty())
         return false;
 
@@ -1177,13 +1176,6 @@ static int power_2up(unsigned int n)
 static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_y,
                                 int height, int width, UMat block_hists, float threshold)
 {
-    bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
-    cv::String opts;
-    if(is_cpu)
-       opts = "-D CPU ";
-    else
-       opts = cv::format("-D WAVE_SIZE=%d", 32);
-
     int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
     int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
         / block_stride_x;
@@ -1194,12 +1186,22 @@ static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_
     size_t localThreads[3] = { 1, 1, 1  };
 
     int idx = 0;
+    bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
+    cv::String opts;
     ocl::Kernel k;
     if ( nbins == 9 )
     {
+        k.create("normalize_hists_36_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+        if(k.empty())
+            return false;
+        if(is_cpu)
+           opts = "-D CPU ";
+        else
+            opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
         k.create("normalize_hists_36_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
         if(k.empty())
             return false;
+
         int blocks_in_group = NTHREADS / block_hist_size;
         nthreads = blocks_in_group * block_hist_size;
         int num_groups = (img_block_width * img_block_height + blocks_in_group - 1)/blocks_in_group;
@@ -1208,9 +1210,17 @@ static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_
     }
     else
     {
+        k.create("normalize_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+        if(k.empty())
+            return false;
+        if(is_cpu)
+           opts = "-D CPU ";
+        else
+            opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
         k.create("normalize_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
         if(k.empty())
             return false;
+
         nthreads = power_2up(block_hist_size);
         globalThreads[0] = img_block_width * nthreads;
         globalThreads[1] = img_block_height;
@@ -1377,9 +1387,8 @@ void HOGDescriptor::compute(InputArray _img, std::vector<float>& descriptors,
     padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
     Size paddedImgSize(imgSize.width + padding.width*2, imgSize.height + padding.height*2);
 
-    if(ocl::useOpenCL() && _img.dims() <= 2 && _img.type() == CV_8UC1 && _img.isUMat() &&
+    CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && _img.isUMat(),
         ocl_compute(_img, winStride, descriptors, DESCR_FORMAT_COL_BY_COL))
-        return;
 
     Mat img = _img.getMat();
     HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
@@ -1605,16 +1614,12 @@ private:
 
 static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
                                int win_stride_y, int win_stride_x, int height, int width,
-                               const UMat& block_hists, const std::vector<float>& _detector,
+                               const UMat& block_hists, UMat detector,
                                float free_coef, float threshold, UMat& labels, Size descr_size, int block_hist_size)
 {
     int nthreads;
     bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
     cv::String opts;
-    if(is_cpu)
-       opts = "-D CPU ";
-    else
-       opts = cv::format("-D WAVE_SIZE=%d", 32);
 
     ocl::Kernel k;
     int idx = 0;
@@ -1622,22 +1627,45 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y
     {
     case 180:
         nthreads = 180;
+        k.create("classify_hists_180_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+        if(k.empty())
+            return false;
+        if(is_cpu)
+           opts = "-D CPU ";
+        else
+            opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
         k.create("classify_hists_180_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
         if(k.empty())
             return false;
         idx = k.set(idx, descr_size.width);
         idx = k.set(idx, descr_size.height);
         break;
+
     case 252:
         nthreads = 256;
+        k.create("classify_hists_252_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+        if(k.empty())
+            return false;
+        if(is_cpu)
+           opts = "-D CPU ";
+        else
+            opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
         k.create("classify_hists_252_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
         if(k.empty())
             return false;
         idx = k.set(idx, descr_size.width);
         idx = k.set(idx, descr_size.height);
         break;
+
     default:
         nthreads = 256;
+        k.create("classify_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+        if(k.empty())
+            return false;
+        if(is_cpu)
+           opts = "-D CPU ";
+        else
+            opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
         k.create("classify_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
         if(k.empty())
             return false;
@@ -1655,8 +1683,6 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y
     size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 };
     size_t localThreads[3] = { nthreads, 1, 1 };
 
-    UMat detector(_detector, true);
-
     idx = k.set(idx, block_hist_size);
     idx = k.set(idx, img_win_width);
     idx = k.set(idx, img_block_width);
@@ -1671,7 +1697,7 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y
     return k.run(2, globalThreads, localThreads, false);
 }
 
-bool HOGDescriptor::ocl_detect(const UMat& img, std::vector<Point> &hits,
+bool HOGDescriptor::ocl_detect(InputArray img, std::vector<Point> &hits,
                        double hit_threshold, Size win_stride) const
 {
     hits.clear();
@@ -1743,20 +1769,21 @@ bool HOGDescriptor::ocl_detectMultiScale(InputArray _img, std::vector<Rect> &fou
 {
     std::vector<Rect> all_candidates;
     std::vector<Point> locations;
-    UMat img = _img.getUMat(), image_scale;
-    image_scale.create(img.size(), img.type());
+    UMat image_scale;
+    Size imgSize = _img.size();
+    image_scale.create(imgSize, _img.type());
 
     for (size_t i = 0; i<level_scale.size() ; i++)
     {
         double scale = level_scale[i];
-        Size effect_size = Size(cvRound(img.cols / scale), cvRound(img.rows / scale));
-        if (effect_size == img.size())
+        Size effect_size = Size(cvRound(imgSize.width / scale), cvRound(imgSize.height / scale));
+        if (effect_size == imgSize)
         {
-            if(!ocl_detect(img, locations, hit_threshold, win_stride)) return false;
+            if(!ocl_detect(_img, locations, hit_threshold, win_stride)) return false;
         }
         else
         {
-            resize(img, image_scale, effect_size);
+            resize(_img, image_scale, effect_size);
             if(!ocl_detect(image_scale, locations, hit_threshold, win_stride)) return false;
         }
         Size scaled_win_size(cvRound(winSize.width * scale),
@@ -1791,10 +1818,12 @@ void HOGDescriptor::detectMultiScale(
     levels = std::max(levels, 1);
     levelScale.resize(levels);
 
-    if(ocl::useOpenCL() && _img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 &&
-        winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat() &&
-        ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold))
-        return;
+    if(winStride == Size())
+        winStride = blockStride;
+
+    CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 &&
+        winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat(),
+        ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold));
 
     std::vector<Rect> allCandidates;
     std::vector<double> tempScales;
index a9e4e9f..8568352 100644 (file)
@@ -110,20 +110,6 @@ OCL_TEST_P(HOG, Detect)
     OCL_OFF(hog.detectMultiScale(img, cpu_found, 0, Size(8, 8), Size(0, 0), 1.05, 6));
     OCL_ON(hog.detectMultiScale(uimg, gpu_found, 0, Size(8, 8), Size(0, 0), 1.05, 6));
 
-    for (size_t i = 0; i < cpu_found.size(); i++)
-    {
-        Rect r = cpu_found[i];
-        rectangle(img, r.tl(), r.br(), Scalar(0, 255, 0), 3);
-    }
-    imshow("cpu", img);
-    Mat imgs(img);
-    for (size_t i = 0; i < gpu_found.size(); i++)
-    {
-        Rect r = cpu_found[i];
-        rectangle(imgs, r.tl(), r.br(), Scalar(0, 255, 0), 3);
-    }
-    imshow("gpu", imgs);
-    waitKey(0);
     EXPECT_LT(checkRectSimilarity(img.size(), cpu_found, gpu_found), 1.0);
 }