integrated patch: HOG confidence calculation. Thanks, Wongun.
authormarina.kolpakova <marina.kolpakova@itseez.com>
Wed, 25 Jul 2012 11:26:26 +0000 (15:26 +0400)
committermarina.kolpakova <marina.kolpakova@itseez.com>
Wed, 25 Jul 2012 11:26:26 +0000 (15:26 +0400)
modules/gpu/include/opencv2/gpu/gpu.hpp
modules/gpu/src/cuda/hog.cu
modules/gpu/src/hog.cpp
modules/objdetect/include/opencv2/objdetect/objdetect.hpp
modules/objdetect/src/hog.cpp

index 660a6df..515a4a2 100644 (file)
@@ -1142,6 +1142,13 @@ private:
 \r
 \r
 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////\r
+struct CV_EXPORTS HOGConfidence\r
+{\r
+   double scale;\r
+   vector<Point> locations;\r
+   vector<double> confidences;\r
+   vector<double> part_scores[4];\r
+};\r
 \r
 struct CV_EXPORTS HOGDescriptor\r
 {\r
@@ -1173,6 +1180,13 @@ struct CV_EXPORTS HOGDescriptor
                           Size padding=Size(), double scale0=1.05,\r
                           int group_threshold=2);\r
 \r
+    void computeConfidence(const GpuMat& img, vector<Point>& hits, double hit_threshold,\r
+                                                Size win_stride, Size padding, vector<Point>& locations, vector<double>& confidences);\r
+\r
+    void computeConfidenceMultiScale(const GpuMat& img, vector<Rect>& found_locations,\r
+                                                                    double hit_threshold, Size win_stride, Size padding,\r
+                                                                    vector<HOGConfidence> &conf_out, int group_threshold);\r
+\r
     void getDescriptors(const GpuMat& img, Size win_stride,\r
                         GpuMat& descriptors,\r
                         int descr_format=DESCR_FORMAT_COL_BY_COL);\r
index 8150bf9..eff6d20 100644 (file)
@@ -326,6 +326,97 @@ namespace cv { namespace gpu { namespace device
         //  Linear SVM based classification\r
         //\r
 \r
+       // return confidence values not just positive location\r
+       template <int nthreads, // Number of threads per one histogram block\r
+                           int nblocks> // Number of histogram block processed by single GPU thread block\r
+       __global__ void compute_confidence_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,\r
+                                                                                                           const int win_block_stride_x, const int win_block_stride_y,\r
+                                                                                                           const float* block_hists, const float* coefs,\r
+                                                                                                           float free_coef, float threshold, float* confidences)\r
+       {\r
+               const int win_x = threadIdx.z;\r
+               if (blockIdx.x * blockDim.z + win_x >= img_win_width)\r
+                       return;\r
+\r
+               const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +\r
+                                                                                    blockIdx.x * win_block_stride_x * blockDim.z + win_x) *\r
+                                                                                   cblock_hist_size;\r
+\r
+               float product = 0.f;\r
+               for (int i = threadIdx.x; i < cdescr_size; i += nthreads)\r
+               {\r
+                       int offset_y = i / cdescr_width;\r
+                       int offset_x = i - offset_y * cdescr_width;\r
+                       product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];\r
+               }\r
+\r
+               __shared__ float products[nthreads * nblocks];\r
+\r
+               const int tid = threadIdx.z * nthreads + threadIdx.x;\r
+               products[tid] = product;\r
+\r
+               __syncthreads();\r
+\r
+               if (nthreads >= 512)\r
+               {\r
+                       if (threadIdx.x < 256) products[tid] = product = product + products[tid + 256];\r
+                       __syncthreads();\r
+               }\r
+               if (nthreads >= 256)\r
+               {\r
+                       if (threadIdx.x < 128) products[tid] = product = product + products[tid + 128];\r
+                       __syncthreads();\r
+               }\r
+               if (nthreads >= 128)\r
+               {\r
+                       if (threadIdx.x < 64) products[tid] = product = product + products[tid + 64];\r
+                       __syncthreads();\r
+               }\r
+\r
+               if (threadIdx.x < 32)\r
+               {\r
+                       volatile float* smem = products;\r
+                       if (nthreads >= 64) smem[tid] = product = product + smem[tid + 32];\r
+                       if (nthreads >= 32) smem[tid] = product = product + smem[tid + 16];\r
+                       if (nthreads >= 16) smem[tid] = product = product + smem[tid + 8];\r
+                       if (nthreads >= 8) smem[tid] = product = product + smem[tid + 4];\r
+                       if (nthreads >= 4) smem[tid] = product = product + smem[tid + 2];\r
+                       if (nthreads >= 2) smem[tid] = product = product + smem[tid + 1];\r
+               }\r
+\r
+               if (threadIdx.x == 0)\r
+                       confidences[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x]\r
+                               = (float)(product + free_coef);\r
+\r
+       }\r
+\r
+       void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,\r
+                                               int win_stride_y, int win_stride_x, int height, int width, float* block_hists,\r
+                                               float* coefs, float free_coef, float threshold, float *confidences)\r
+       {\r
+               const int nthreads = 256;\r
+               const int nblocks = 1;\r
+\r
+               int win_block_stride_x = win_stride_x / block_stride_x;\r
+               int win_block_stride_y = win_stride_y / block_stride_y;\r
+               int img_win_width = (width - win_width + win_stride_x) / win_stride_x;\r
+               int img_win_height = (height - win_height + win_stride_y) / win_stride_y;\r
+\r
+               dim3 threads(nthreads, 1, nblocks);\r
+               dim3 grid(divUp(img_win_width, nblocks), img_win_height);\r
+\r
+               cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,\r
+                                                                                       cudaFuncCachePreferL1));\r
+\r
+               int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /\r
+                                                           block_stride_x;\r
+               compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(\r
+                       img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,\r
+                       block_hists, coefs, free_coef, threshold, confidences);\r
+               cudaSafeCall(cudaThreadSynchronize());\r
+       }\r
+\r
+\r
 \r
         template <int nthreads, // Number of threads per one histogram block\r
                   int nblocks> // Number of histogram block processed by single GPU thread block\r
index 3d0b7e9..fafcce7 100644 (file)
@@ -57,6 +57,8 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) {
 std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }\r
 std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector<float>(); }\r
 std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector<float>(); }\r
+void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat&, vector<Point>&, double, Size, Size, vector<Point>&, vector<double>&) { throw_nogpu(); }\r
+void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, vector<Rect>&, double, Size, Size, vector<HOGConfidence>&, int) { throw_nogpu(); }\r
 \r
 #else\r
 \r
@@ -79,6 +81,10 @@ namespace cv { namespace gpu { namespace device
                             int width, float* block_hists, float* coefs, float free_coef,\r
                             float threshold, unsigned char* labels);\r
 \r
+        void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,\r
+                           int win_stride_y, int win_stride_x, int height, int width, float* block_hists,\r
+                           float* coefs, float free_coef, float threshold, float *confidences);\r
+\r
         void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,\r
                                     int win_stride_y, int win_stride_x, int height, int width, float* block_hists,\r
                                     cv::gpu::DevMem2Df descriptors);\r
@@ -258,6 +264,99 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
     }\r
 }\r
 \r
+void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat& img, vector<Point>& hits, double hit_threshold,\r
+                          Size win_stride, Size padding, vector<Point>& locations, vector<double>& confidences)\r
+{\r
+  CV_Assert(padding == Size(0, 0));\r
+\r
+  hits.clear();\r
+  if (detector.empty())\r
+    return;\r
+\r
+  computeBlockHistograms(img);\r
+\r
+  if (win_stride == Size())\r
+    win_stride = block_stride;\r
+  else\r
+    CV_Assert(win_stride.width % block_stride.width == 0 &&\r
+         win_stride.height % block_stride.height == 0);\r
+\r
+  Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);\r
+  labels.create(1, wins_per_img.area(), CV_32F);\r
+\r
+  hog::compute_confidence_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,\r
+               win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),\r
+               detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr<float>());\r
+\r
+  labels.download(labels_host);\r
+  float* vec = labels_host.ptr<float>();\r
+\r
+  // does not support roi for now..\r
+  locations.clear();\r
+  confidences.clear();\r
+  for (int i = 0; i < wins_per_img.area(); i++)\r
+    {\r
+      int y = i / wins_per_img.width;\r
+      int x = i - wins_per_img.width * y;\r
+      if (vec[i] >= hit_threshold)\r
+   hits.push_back(Point(x * win_stride.width, y * win_stride.height));\r
+\r
+      Point pt(win_stride.width * x, win_stride.height * y);\r
+      locations.push_back(pt);\r
+      confidences.push_back((double)vec[i]);\r
+    }\r
+}\r
+\r
+void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vector<Rect>& found_locations,\r
+                            double hit_threshold, Size win_stride, Size padding,\r
+                            vector<HOGConfidence> &conf_out, int group_threshold)\r
+{\r
+  vector<double> level_scale;\r
+  double scale = 1.;\r
+  int levels = 0;\r
+\r
+  for (levels = 0; levels < conf_out.size(); levels++)\r
+    {\r
+      scale = conf_out[levels].scale;\r
+      level_scale.push_back(scale);\r
+      if (cvRound(img.cols/scale) < win_size.width ||\r
+     cvRound(img.rows/scale) < win_size.height)\r
+   break;\r
+    }\r
+\r
+  levels = std::max(levels, 1);\r
+  level_scale.resize(levels);\r
+\r
+  std::vector<Rect> all_candidates;\r
+  vector<Point> locations;\r
+\r
+  for (size_t i = 0; i < level_scale.size(); i++)\r
+    {\r
+      double scale = level_scale[i];\r
+      Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));\r
+      GpuMat smaller_img;\r
+\r
+      if (sz == img.size())\r
+   smaller_img = img;\r
+      else\r
+   {\r
+     smaller_img.create(sz, img.type());\r
+     switch (img.type()) {\r
+     case CV_8UC1: hog::resize_8UC1(img, smaller_img); break;\r
+     case CV_8UC4: hog::resize_8UC4(img, smaller_img); break;\r
+     }\r
+   }\r
+\r
+      computeConfidence(smaller_img, locations, hit_threshold, win_stride, padding, conf_out[i].locations, conf_out[i].confidences);\r
+\r
+      Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));\r
+      for (size_t j = 0; j < locations.size(); j++)\r
+   all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));\r
+    }\r
+  found_locations.assign(all_candidates.begin(), all_candidates.end());\r
+  groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);\r
+}\r
+\r
 \r
 void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)\r
 {\r
index a754238..a39be74 100644 (file)
@@ -491,6 +491,17 @@ protected:
 
 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 
+// struct for detection region of interest (ROI)
+struct DetectionROI
+{
+   // scale(size) of the bounding box
+   double scale;
+   // set of requrested locations to be evaluated
+   vector<cv::Point> locations;
+   // vector that will contain confidence values for each location
+   vector<double> confidences;
+};
+
 struct CV_EXPORTS_W HOGDescriptor
 {
 public:
@@ -583,6 +594,23 @@ public:
     CV_PROP bool gammaCorrection;
     CV_PROP vector<float> svmDetector;
     CV_PROP int nlevels;
+
+
+   // evaluate specified ROI and return confidence value for each location
+   virtual void detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
+                                   CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
+                                   double hitThreshold = 0, cv::Size winStride = Size(),
+                                   cv::Size padding = Size()) const;
+
+   // evaluate specified ROI and return confidence value for each location in multiple scales
+   virtual void detectMultiScaleROI(const cv::Mat& img,
+                                                       CV_OUT std::vector<cv::Rect>& foundLocations,
+                                                       std::vector<DetectionROI>& locations,
+                                                       double hitThreshold = 0,
+                                                       int groupThreshold = 0) const;
+
+   // read/parse Dalal's alt model file
+   void readALTModel(std::string modelfile);
 };
 
 
index d96386d..485334c 100644 (file)
@@ -39,7 +39,7 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-
+#include <stdio.h>
 #include "precomp.hpp"
 #include <iterator>
 #ifdef HAVE_IPP
@@ -2382,4 +2382,238 @@ vector<float> HOGDescriptor::getDaimlerPeopleDetector()
         return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
 }
 
+struct HOGConfInvoker
+{
+       HOGConfInvoker( const HOGDescriptor* _hog, const Mat& _img,
+                               double _hitThreshold, Size _padding,
+                               std::vector<DetectionROI>* locs,
+                               ConcurrentRectVector* _vec )
+       {
+               hog = _hog;
+               img = _img;
+               hitThreshold = _hitThreshold;
+               padding = _padding;
+               locations = locs;
+               vec = _vec;
+       }
+
+       void operator()( const BlockedRange& range ) const
+       {
+               int i, i1 = range.begin(), i2 = range.end();
+
+               Size maxSz(cvCeil(img.cols/(*locations)[0].scale), cvCeil(img.rows/(*locations)[0].scale));
+               Mat smallerImgBuf(maxSz, img.type());
+               vector<Point> dets;
+
+               for( i = i1; i < i2; i++ )
+               {
+                       double scale = (*locations)[i].scale;
+
+                       Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
+                       Mat smallerImg(sz, img.type(), smallerImgBuf.data);
+
+                       if( sz == img.size() )
+                               smallerImg = Mat(sz, img.type(), img.data, img.step);
+                       else
+                               resize(img, smallerImg, sz);
+
+                       hog->detectROI(smallerImg, (*locations)[i].locations, dets, (*locations)[i].confidences, hitThreshold, Size(), padding);
+                       Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));
+                       for( size_t j = 0; j < dets.size(); j++ )
+                               vec->push_back(Rect(cvRound(dets[j].x*scale),
+                                                                       cvRound(dets[j].y*scale),
+                                                                       scaledWinSize.width, scaledWinSize.height));
+               }
+       }
+
+       const HOGDescriptor* hog;
+       Mat img;
+       double hitThreshold;
+       std::vector<DetectionROI>* locations;
+       Size padding;
+       ConcurrentRectVector* vec;
+};
+
+void HOGDescriptor::detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
+                                       CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
+                                       double hitThreshold, cv::Size winStride,
+                                       cv::Size padding) const
+{
+   foundLocations.clear();
+
+   confidences.clear();
+
+   if( svmDetector.empty() )
+       return;
+
+   if( locations.empty() )
+       return;
+
+   if( winStride == Size() )
+       winStride = cellSize;
+
+   Size cacheStride(gcd(winStride.width, blockStride.width),
+                                    gcd(winStride.height, blockStride.height));
+
+   size_t nwindows = locations.size();
+   padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
+   padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
+   Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);
+
+   // HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
+   HOGCache cache(this, img, padding, padding, true, cacheStride);
+   if( !nwindows )
+           nwindows = cache.windowsInImage(paddedImgSize, winStride).area();
+
+   const HOGCache::BlockData* blockData = &cache.blockData[0];
+
+   int nblocks = cache.nblocks.area();
+   int blockHistogramSize = cache.blockHistogramSize;
+   size_t dsize = getDescriptorSize();
+
+   double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;
+   vector<float> blockHist(blockHistogramSize);
+
+   for( size_t i = 0; i < nwindows; i++ )
+   {
+           Point pt0;
+           pt0 = locations[i];
+           if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
+                   pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
+           {
+               // out of image
+               confidences.push_back(-10.0);
+               continue;
+           }
+
+           double s = rho;
+           const float* svmVec = &svmDetector[0];
+           int j, k;
+
+           for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
+           {
+                   const HOGCache::BlockData& bj = blockData[j];
+                   Point pt = pt0 + bj.imgOffset;
+                   // need to devide this into 4 parts!
+                   const float* vec = cache.getBlock(pt, &blockHist[0]);
+                   for( k = 0; k <= blockHistogramSize - 4; k += 4 )
+                           s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
+                                   vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
+                   for( ; k < blockHistogramSize; k++ )
+                           s += vec[k]*svmVec[k];
+           }
+           // cv::waitKey();
+           confidences.push_back(s);
+
+           if( s >= hitThreshold )
+                   foundLocations.push_back(pt0);
+   }
+ }
+
+void HOGDescriptor::detectMultiScaleROI(const cv::Mat& img,
+                                                           CV_OUT std::vector<cv::Rect>& foundLocations,
+                                                           std::vector<DetectionROI>& locations,
+                                                           double hitThreshold,
+                                                           int groupThreshold) const
+{
+   ConcurrentRectVector allCandidates;
+
+   parallel_for(BlockedRange(0, (int)locations.size()),
+                        HOGConfInvoker(this, img, hitThreshold, Size(8, 8), &locations, &allCandidates));
+
+   foundLocations.resize(allCandidates.size());
+   std::copy(allCandidates.begin(), allCandidates.end(), foundLocations.begin());
+   cv::groupRectangles(foundLocations, groupThreshold, 0.2);
+}
+
+void HOGDescriptor::readALTModel(std::string modelfile)
+{
+   // read model from SVMlight format..
+   FILE *modelfl;
+   if ((modelfl = fopen(modelfile.c_str(), "rb")) == NULL)
+   {
+       std::string eerr("file not exist");
+       std::string efile(__FILE__);
+       std::string efunc(__FUNCTION__);
+       throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
+   }
+   char version_buffer[10];
+   if (!fread (&version_buffer,sizeof(char),10,modelfl))
+   {
+       std::string eerr("version?");
+       std::string efile(__FILE__);
+       std::string efunc(__FUNCTION__);
+       throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
+   }
+   if(strcmp(version_buffer,"V6.01")) {
+       std::string eerr("version doesnot match");
+       std::string efile(__FILE__);
+       std::string efunc(__FUNCTION__);
+       throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
+   }
+   /* read version number */
+   int version = 0;
+   if (!fread (&version,sizeof(int),1,modelfl))
+   { throw Exception(); }
+   if (version < 200)
+   {
+       std::string eerr("version doesnot match");
+       std::string efile(__FILE__);
+       std::string efunc(__FUNCTION__);
+       throw Exception();
+   }
+   int kernel_type;
+   int nread;
+   nread=fread(&(kernel_type),sizeof(int),1,modelfl);
+
+   {// ignore these
+       int poly_degree;
+       nread=fread(&(poly_degree),sizeof(int),1,modelfl);
+
+       double rbf_gamma;
+       nread=fread(&(rbf_gamma),sizeof(double), 1, modelfl);
+       double coef_lin;
+       nread=fread(&(coef_lin),sizeof(double),1,modelfl);
+       double coef_const;
+       nread=fread(&(coef_const),sizeof(double),1,modelfl);
+       int l;
+       nread=fread(&l,sizeof(int),1,modelfl);
+       char* custom = new char[l];
+       nread=fread(custom,sizeof(char),l,modelfl);
+       delete[] custom;
+   }
+   int totwords;
+   nread=fread(&(totwords),sizeof(int),1,modelfl);
+   {// ignore these
+       int totdoc;
+       nread=fread(&(totdoc),sizeof(int),1,modelfl);
+       int sv_num;
+       nread=fread(&(sv_num), sizeof(int),1,modelfl);
+   }
+
+   double linearbias;
+   nread=fread(&linearbias, sizeof(double), 1, modelfl);
+
+   std::vector<float> detector;
+   detector.clear();
+   if(kernel_type == 0) { /* linear kernel */
+       /* save linear wts also */
+       double *linearwt = new double[totwords+1];
+       int length = totwords;
+       nread = fread(linearwt, sizeof(double), totwords + 1, modelfl);
+       if(nread != length + 1)
+           throw Exception();
+
+       for(int i = 0; i < length; i++)
+           detector.push_back((float)linearwt[i]);
+
+       detector.push_back((float)-linearbias);
+       setSVMDetector(detector);
+       delete linearwt;
+   } else {
+       throw Exception();
+   }
+   fclose(modelfl);
+}
+
 }