fixed extra memory allocations.
authorAnatoly Baksheev <no@email>
Wed, 30 Mar 2011 11:42:23 +0000 (11:42 +0000)
committerAnatoly Baksheev <no@email>
Wed, 30 Mar 2011 11:42:23 +0000 (11:42 +0000)
modules/gpu/include/opencv2/gpu/gpu.hpp
modules/gpu/src/hog.cpp

index 2bca086..f487ac6 100644 (file)
@@ -1353,14 +1353,20 @@ namespace cv
             GpuMat detector;\r
 \r
             // Results of the last classification step\r
-            GpuMat labels;\r
+            GpuMat labels, labels_buf;\r
             Mat labels_host;\r
 \r
             // Results of the last histogram evaluation step\r
-            GpuMat block_hists;\r
+            GpuMat block_hists, block_hists_buf;\r
 \r
             // Gradients conputation results\r
-            GpuMat grad, qangle;\r
+            GpuMat grad, qangle, grad_buf, qangle_buf;\r
+\r
+                       // returns subbuffer with required size, reallocates buffer if nessesary.\r
+                       static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);\r
+                       static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);\r
+\r
+                       std::vector<GpuMat> image_scales;\r
         };\r
 \r
 \r
index 8095938..5aec630 100644 (file)
@@ -95,9 +95,8 @@ void resize_8UC4(const cv::gpu::DevMem2D& src, cv::gpu::DevMem2D dst);
 }}}\r
 \r
     \r
-cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, \r
-                                      Size cell_size, int nbins, double win_sigma, double threshold_L2hys,\r
-                                      bool gamma_correction, int nlevels)\r
+cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, Size cell_size, \r
+                                                                         int nbins, double win_sigma, double threshold_L2hys, bool gamma_correction, int nlevels)\r
         : win_size(win_size), \r
           block_size(block_size), \r
           block_stride(block_stride), \r
@@ -108,55 +107,45 @@ cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block
           gamma_correction(gamma_correction),\r
           nlevels(nlevels)\r
 {\r
-    CV_Assert((win_size.width - block_size.width) % block_stride.width == 0 && \r
+    CV_Assert((win_size.width  - block_size.width ) % block_stride.width  == 0 && \r
               (win_size.height - block_size.height) % block_stride.height == 0);\r
 \r
-    CV_Assert(block_size.width % cell_size.width == 0 && \r
-              block_size.height % cell_size.height == 0);\r
+    CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0);\r
 \r
     CV_Assert(block_stride == cell_size);\r
 \r
     CV_Assert(cell_size == Size(8, 8));\r
 \r
-    Size cells_per_block = Size(block_size.width / cell_size.width, \r
-                                block_size.height / cell_size.height);\r
+    Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);\r
     CV_Assert(cells_per_block == Size(2, 2));\r
 \r
     cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);\r
-    hog::set_up_constants(nbins, block_stride.width, block_stride.height, \r
-                          blocks_per_win.width, blocks_per_win.height);\r
+    hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);\r
 }       \r
 \r
-\r
 size_t cv::gpu::HOGDescriptor::getDescriptorSize() const\r
 {\r
-    return numPartsWithin(win_size, block_size, block_stride).area() * \r
-           getBlockHistogramSize();\r
+    return numPartsWithin(win_size, block_size, block_stride).area() * getBlockHistogramSize();\r
 }\r
 \r
-\r
-size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const {\r
-    Size cells_per_block = Size(block_size.width / cell_size.width, \r
-                                block_size.height / cell_size.height);\r
+size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const \r
+{\r
+    Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);\r
     return (size_t)(nbins * cells_per_block.area());\r
 }\r
 \r
-\r
 double cv::gpu::HOGDescriptor::getWinSigma() const\r
 {\r
     return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0;\r
 }\r
 \r
-\r
 bool cv::gpu::HOGDescriptor::checkDetectorSize() const\r
 {\r
     size_t detector_size = detector.rows * detector.cols;\r
     size_t descriptor_size = getDescriptorSize();\r
-    return detector_size == 0 || detector_size == descriptor_size || \r
-           detector_size == descriptor_size + 1;\r
+    return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;\r
 }\r
 \r
-\r
 void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& detector)\r
 {\r
     std::vector<float> detector_reordered(detector.size());\r
@@ -181,16 +170,36 @@ void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>& detector)
     CV_Assert(checkDetectorSize());\r
 }\r
 \r
+cv::gpu::GpuMat cv::gpu::HOGDescriptor::getBuffer(const Size& sz, int type, GpuMat& buf)\r
+{\r
+       if (buf.empty() || buf.type() != type)\r
+               buf.create(sz, type);\r
+       else\r
+               if (buf.cols < sz.width || buf.rows < sz.width)\r
+                       buf.create(std::max(buf.rows, sz.height), std::max(buf.cols, sz.width), type);  \r
+\r
+       return buf(Rect(Point(0,0), sz));\r
+}\r
+\r
+cv::gpu::GpuMat cv::gpu::HOGDescriptor::getBuffer(int rows, int cols, int type, GpuMat& buf)\r
+{ \r
+       return getBuffer(Size(cols, rows), type, buf); \r
+}\r
+\r
 \r
 void cv::gpu::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle)\r
 {\r
     CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);\r
+       \r
+    //   grad.create(img.size(), CV_32FC2);\r
+       grad = getBuffer(img.size(), CV_32FC2, grad_buf);    \r
 \r
-    grad.create(img.size(), CV_32FC2);\r
-    qangle.create(img.size(), CV_8UC2);\r
+    //   qangle.create(img.size(), CV_8UC2);\r
+       qangle = getBuffer(img.size(), CV_8UC2, qangle_buf);  \r
 \r
     float angleScale = (float)(nbins / CV_PI);\r
-    switch (img.type()) {\r
+    switch (img.type()) \r
+       {\r
         case CV_8UC1:\r
             hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);\r
             break;\r
@@ -207,11 +216,12 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
 \r
     size_t block_hist_size = getBlockHistogramSize();\r
     Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);\r
-    block_hists.create(1, block_hist_size * blocks_per_img.area(), CV_32F);\r
 \r
-    hog::compute_hists(nbins, block_stride.width, block_stride.height,\r
-                       img.rows, img.cols, grad, qangle, (float)getWinSigma(), \r
-                       block_hists.ptr<float>());\r
+       //   block_hists.create(1, block_hist_size * blocks_per_img.area(), CV_32F);\r
+       block_hists = getBuffer(1, block_hist_size * blocks_per_img.area(), CV_32F, block_hists_buf);\r
+    \r
+    hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, \r
+                                               grad, qangle, (float)getWinSigma(), block_hists.ptr<float>());\r
 \r
     hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, \r
                          block_hists.ptr<float>(), (float)threshold_L2hys);\r
@@ -220,14 +230,13 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
 \r
 void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format)\r
 {\r
-    CV_Assert(win_stride.width % block_stride.width == 0 &&\r
-              win_stride.height % block_stride.height == 0);\r
+    CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);\r
 \r
     computeBlockHistograms(img);\r
 \r
     const int block_hist_size = getBlockHistogramSize();\r
     Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);\r
-    Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);\r
+    Size wins_per_img   = numPartsWithin(img.size(), win_size, win_stride);\r
 \r
     descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F);\r
 \r
@@ -235,13 +244,11 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
     {\r
     case DESCR_FORMAT_ROW_BY_ROW:\r
         hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, \r
-                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), \r
-                                    descriptors);\r
+                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), descriptors);\r
         break;\r
     case DESCR_FORMAT_COL_BY_COL:\r
         hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, \r
-                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), \r
-                                    descriptors);\r
+                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), descriptors);\r
         break;\r
     default:\r
         CV_Error(CV_StsBadArg, "Unknown descriptor format");\r
@@ -249,8 +256,7 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
 }\r
 \r
 \r
-void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold, \r
-                                    Size win_stride, Size padding)\r
+void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)\r
 {\r
     CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);\r
     CV_Assert(padding == Size(0, 0));\r
@@ -264,11 +270,11 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, doub
     if (win_stride == Size())\r
         win_stride = block_stride;\r
     else\r
-        CV_Assert(win_stride.width % block_stride.width == 0 &&\r
-                  win_stride.height % block_stride.height == 0);\r
+        CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);\r
 \r
     Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);\r
-    labels.create(1, wins_per_img.area(), CV_8U);\r
+    //   labels.create(1, wins_per_img.area(), CV_8U);\r
+       labels = getBuffer(1, wins_per_img.area(), CV_8U, labels_buf);\r
 \r
     hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, \r
                         win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), \r
@@ -286,11 +292,12 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, doub
 }\r
 \r
 \r
-void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& found_locations, \r
-                                              double hit_threshold, Size win_stride, Size padding\r
-                                              double scale0, int group_threshold)\r
+\r
+void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& found_locations, double hit_threshold\r
+                                                                                         Size win_stride, Size padding, double scale0, int group_threshold)\r
 {\r
-    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);\r
+\r
+       CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);\r
 \r
     vector<double> level_scale;\r
     double scale = 1.;\r
@@ -306,6 +313,7 @@ void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& f
     }\r
     levels = std::max(levels, 1);\r
     level_scale.resize(levels);\r
+       image_scales.resize(levels);\r
 \r
     std::vector<Rect> all_candidates;   \r
     vector<Point> locations;\r
@@ -319,12 +327,14 @@ void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& f
         if (sz == img.size())\r
             smaller_img = img;\r
         else\r
-        {\r
-            smaller_img.create(sz, img.type());\r
-            switch (img.type()) {\r
-                case CV_8UC1: hog::resize_8UC1(img, smaller_img); break;\r
-                case CV_8UC4: hog::resize_8UC4(img, smaller_img); break;\r
+        {                      \r
+            image_scales[i].create(sz, img.type());\r
+            switch (img.type()) \r
+                       {\r
+                case CV_8UC1: hog::resize_8UC1(img, image_scales[i]); break;\r
+                case CV_8UC4: hog::resize_8UC4(img, image_scales[i]); break;\r
             }\r
+                       smaller_img = image_scales[i];\r
         }\r
 \r
         detect(smaller_img, locations, hit_threshold, win_stride, padding);\r
@@ -337,18 +347,14 @@ void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, vector<Rect>& f
     groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);\r
 }\r
 \r
-\r
 int cv::gpu::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) \r
 {\r
     return (size - part_size + stride) / stride;\r
 }\r
 \r
-\r
-cv::Size cv::gpu::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, \r
-                                                cv::Size stride) \r
+cv::Size cv::gpu::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride) \r
 {\r
-    return Size(numPartsWithin(size.width, part_size.width, stride.width),\r
-                numPartsWithin(size.height, part_size.height, stride.height));\r
+    return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));\r
 }\r
 \r
 std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector()\r
@@ -356,7 +362,6 @@ std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector()
     return getPeopleDetector64x128();\r
 }\r
 \r
-\r
 std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96()\r
 {\r
     static const float detector[] = {\r