\r
\r
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////\r
+struct CV_EXPORTS HOGConfidence\r
+{\r
+ double scale;\r
+ vector<Point> locations;\r
+ vector<double> confidences;\r
+ vector<double> part_scores[4];\r
+};\r
\r
struct CV_EXPORTS HOGDescriptor\r
{\r
Size padding=Size(), double scale0=1.05,\r
int group_threshold=2);\r
\r
+ void computeConfidence(const GpuMat& img, vector<Point>& hits, double hit_threshold,\r
+ Size win_stride, Size padding, vector<Point>& locations, vector<double>& confidences);\r
+\r
+ void computeConfidenceMultiScale(const GpuMat& img, vector<Rect>& found_locations,\r
+ double hit_threshold, Size win_stride, Size padding,\r
+ vector<HOGConfidence> &conf_out, int group_threshold);\r
+\r
void getDescriptors(const GpuMat& img, Size win_stride,\r
GpuMat& descriptors,\r
int descr_format=DESCR_FORMAT_COL_BY_COL);\r
// Linear SVM based classification\r
//\r
\r
+ // return confidence values not just positive location\r
+ template <int nthreads, // Number of threads per one histogram block\r
+ int nblocks> // Number of histogram block processed by single GPU thread block\r
+ __global__ void compute_confidence_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,\r
+ const int win_block_stride_x, const int win_block_stride_y,\r
+ const float* block_hists, const float* coefs,\r
+ float free_coef, float threshold, float* confidences)\r
+ {\r
+ const int win_x = threadIdx.z;\r
+ if (blockIdx.x * blockDim.z + win_x >= img_win_width)\r
+ return;\r
+\r
+ const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +\r
+ blockIdx.x * win_block_stride_x * blockDim.z + win_x) *\r
+ cblock_hist_size;\r
+\r
+ float product = 0.f;\r
+ for (int i = threadIdx.x; i < cdescr_size; i += nthreads)\r
+ {\r
+ int offset_y = i / cdescr_width;\r
+ int offset_x = i - offset_y * cdescr_width;\r
+ product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];\r
+ }\r
+\r
+ __shared__ float products[nthreads * nblocks];\r
+\r
+ const int tid = threadIdx.z * nthreads + threadIdx.x;\r
+ products[tid] = product;\r
+\r
+ __syncthreads();\r
+\r
+ if (nthreads >= 512)\r
+ {\r
+ if (threadIdx.x < 256) products[tid] = product = product + products[tid + 256];\r
+ __syncthreads();\r
+ }\r
+ if (nthreads >= 256)\r
+ {\r
+ if (threadIdx.x < 128) products[tid] = product = product + products[tid + 128];\r
+ __syncthreads();\r
+ }\r
+ if (nthreads >= 128)\r
+ {\r
+ if (threadIdx.x < 64) products[tid] = product = product + products[tid + 64];\r
+ __syncthreads();\r
+ }\r
+\r
+ if (threadIdx.x < 32)\r
+ {\r
+ volatile float* smem = products;\r
+ if (nthreads >= 64) smem[tid] = product = product + smem[tid + 32];\r
+ if (nthreads >= 32) smem[tid] = product = product + smem[tid + 16];\r
+ if (nthreads >= 16) smem[tid] = product = product + smem[tid + 8];\r
+ if (nthreads >= 8) smem[tid] = product = product + smem[tid + 4];\r
+ if (nthreads >= 4) smem[tid] = product = product + smem[tid + 2];\r
+ if (nthreads >= 2) smem[tid] = product = product + smem[tid + 1];\r
+ }\r
+\r
+ if (threadIdx.x == 0)\r
+ confidences[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x]\r
+ = (float)(product + free_coef);\r
+\r
+ }\r
+\r
+ void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,\r
+ int win_stride_y, int win_stride_x, int height, int width, float* block_hists,\r
+ float* coefs, float free_coef, float threshold, float *confidences)\r
+ {\r
+ const int nthreads = 256;\r
+ const int nblocks = 1;\r
+\r
+ int win_block_stride_x = win_stride_x / block_stride_x;\r
+ int win_block_stride_y = win_stride_y / block_stride_y;\r
+ int img_win_width = (width - win_width + win_stride_x) / win_stride_x;\r
+ int img_win_height = (height - win_height + win_stride_y) / win_stride_y;\r
+\r
+ dim3 threads(nthreads, 1, nblocks);\r
+ dim3 grid(divUp(img_win_width, nblocks), img_win_height);\r
+\r
+ cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,\r
+ cudaFuncCachePreferL1));\r
+\r
+ int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /\r
+ block_stride_x;\r
+ compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(\r
+ img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,\r
+ block_hists, coefs, free_coef, threshold, confidences);\r
+ cudaSafeCall(cudaThreadSynchronize());\r
+ }\r
+\r
+\r
\r
template <int nthreads, // Number of threads per one histogram block\r
int nblocks> // Number of histogram block processed by single GPU thread block\r
std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }\r
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector<float>(); }\r
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector<float>(); }\r
+void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat&, vector<Point>&, double, Size, Size, vector<Point>&, vector<double>&) { throw_nogpu(); }\r
+void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, vector<Rect>&, double, Size, Size, vector<HOGConfidence>&, int) { throw_nogpu(); }\r
\r
#else\r
\r
int width, float* block_hists, float* coefs, float free_coef,\r
float threshold, unsigned char* labels);\r
\r
+ void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,\r
+ int win_stride_y, int win_stride_x, int height, int width, float* block_hists,\r
+ float* coefs, float free_coef, float threshold, float *confidences);\r
+\r
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,\r
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,\r
cv::gpu::DevMem2Df descriptors);\r
}\r
}\r
\r
+void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat& img, vector<Point>& hits, double hit_threshold,\r
+ Size win_stride, Size padding, vector<Point>& locations, vector<double>& confidences)\r
+{\r
+ CV_Assert(padding == Size(0, 0));\r
+\r
+ hits.clear();\r
+ if (detector.empty())\r
+ return;\r
+\r
+ computeBlockHistograms(img);\r
+\r
+ if (win_stride == Size())\r
+ win_stride = block_stride;\r
+ else\r
+ CV_Assert(win_stride.width % block_stride.width == 0 &&\r
+ win_stride.height % block_stride.height == 0);\r
+\r
+ Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);\r
+ labels.create(1, wins_per_img.area(), CV_32F);\r
+\r
+ hog::compute_confidence_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,\r
+ win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),\r
+ detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr<float>());\r
+\r
+ labels.download(labels_host);\r
+ float* vec = labels_host.ptr<float>();\r
+\r
+ // does not support roi for now..\r
+ locations.clear();\r
+ confidences.clear();\r
+ for (int i = 0; i < wins_per_img.area(); i++)\r
+ {\r
+ int y = i / wins_per_img.width;\r
+ int x = i - wins_per_img.width * y;\r
+ if (vec[i] >= hit_threshold)\r
+ hits.push_back(Point(x * win_stride.width, y * win_stride.height));\r
+\r
+ Point pt(win_stride.width * x, win_stride.height * y);\r
+ locations.push_back(pt);\r
+ confidences.push_back((double)vec[i]);\r
+ }\r
+}\r
+\r
+void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vector<Rect>& found_locations,\r
+ double hit_threshold, Size win_stride, Size padding,\r
+ vector<HOGConfidence> &conf_out, int group_threshold)\r
+{\r
+ vector<double> level_scale;\r
+ double scale = 1.;\r
+ int levels = 0;\r
+\r
+ for (levels = 0; levels < conf_out.size(); levels++)\r
+ {\r
+ scale = conf_out[levels].scale;\r
+ level_scale.push_back(scale);\r
+ if (cvRound(img.cols/scale) < win_size.width ||\r
+ cvRound(img.rows/scale) < win_size.height)\r
+ break;\r
+ }\r
+\r
+ levels = std::max(levels, 1);\r
+ level_scale.resize(levels);\r
+\r
+ std::vector<Rect> all_candidates;\r
+ vector<Point> locations;\r
+\r
+ for (size_t i = 0; i < level_scale.size(); i++)\r
+ {\r
+ double scale = level_scale[i];\r
+ Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));\r
+ GpuMat smaller_img;\r
+\r
+ if (sz == img.size())\r
+ smaller_img = img;\r
+ else\r
+ {\r
+ smaller_img.create(sz, img.type());\r
+ switch (img.type()) {\r
+ case CV_8UC1: hog::resize_8UC1(img, smaller_img); break;\r
+ case CV_8UC4: hog::resize_8UC4(img, smaller_img); break;\r
+ }\r
+ }\r
+\r
+ computeConfidence(smaller_img, locations, hit_threshold, win_stride, padding, conf_out[i].locations, conf_out[i].confidences);\r
+\r
+ Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));\r
+ for (size_t j = 0; j < locations.size(); j++)\r
+ all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));\r
+ }\r
+ found_locations.assign(all_candidates.begin(), all_candidates.end());\r
+ groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);\r
+}\r
+\r
\r
void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)\r
{\r
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+// struct for detection region of interest (ROI)
+struct DetectionROI
+{
+ // scale(size) of the bounding box
+ double scale;
+ // set of requrested locations to be evaluated
+ vector<cv::Point> locations;
+ // vector that will contain confidence values for each location
+ vector<double> confidences;
+};
+
struct CV_EXPORTS_W HOGDescriptor
{
public:
CV_PROP bool gammaCorrection;
CV_PROP vector<float> svmDetector;
CV_PROP int nlevels;
+
+
+ // evaluate specified ROI and return confidence value for each location
+ virtual void detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
+ CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
+ double hitThreshold = 0, cv::Size winStride = Size(),
+ cv::Size padding = Size()) const;
+
+ // evaluate specified ROI and return confidence value for each location in multiple scales
+ virtual void detectMultiScaleROI(const cv::Mat& img,
+ CV_OUT std::vector<cv::Rect>& foundLocations,
+ std::vector<DetectionROI>& locations,
+ double hitThreshold = 0,
+ int groupThreshold = 0) const;
+
+ // read/parse Dalal's alt model file
+ void readALTModel(std::string modelfile);
};
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
-
+#include <stdio.h>
#include "precomp.hpp"
#include <iterator>
#ifdef HAVE_IPP
return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
}
+struct HOGConfInvoker
+{
+ HOGConfInvoker( const HOGDescriptor* _hog, const Mat& _img,
+ double _hitThreshold, Size _padding,
+ std::vector<DetectionROI>* locs,
+ ConcurrentRectVector* _vec )
+ {
+ hog = _hog;
+ img = _img;
+ hitThreshold = _hitThreshold;
+ padding = _padding;
+ locations = locs;
+ vec = _vec;
+ }
+
+ void operator()( const BlockedRange& range ) const
+ {
+ int i, i1 = range.begin(), i2 = range.end();
+
+ Size maxSz(cvCeil(img.cols/(*locations)[0].scale), cvCeil(img.rows/(*locations)[0].scale));
+ Mat smallerImgBuf(maxSz, img.type());
+ vector<Point> dets;
+
+ for( i = i1; i < i2; i++ )
+ {
+ double scale = (*locations)[i].scale;
+
+ Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
+ Mat smallerImg(sz, img.type(), smallerImgBuf.data);
+
+ if( sz == img.size() )
+ smallerImg = Mat(sz, img.type(), img.data, img.step);
+ else
+ resize(img, smallerImg, sz);
+
+ hog->detectROI(smallerImg, (*locations)[i].locations, dets, (*locations)[i].confidences, hitThreshold, Size(), padding);
+ Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));
+ for( size_t j = 0; j < dets.size(); j++ )
+ vec->push_back(Rect(cvRound(dets[j].x*scale),
+ cvRound(dets[j].y*scale),
+ scaledWinSize.width, scaledWinSize.height));
+ }
+ }
+
+ const HOGDescriptor* hog;
+ Mat img;
+ double hitThreshold;
+ std::vector<DetectionROI>* locations;
+ Size padding;
+ ConcurrentRectVector* vec;
+};
+
+void HOGDescriptor::detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
+ CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
+ double hitThreshold, cv::Size winStride,
+ cv::Size padding) const
+{
+ foundLocations.clear();
+
+ confidences.clear();
+
+ if( svmDetector.empty() )
+ return;
+
+ if( locations.empty() )
+ return;
+
+ if( winStride == Size() )
+ winStride = cellSize;
+
+ Size cacheStride(gcd(winStride.width, blockStride.width),
+ gcd(winStride.height, blockStride.height));
+
+ size_t nwindows = locations.size();
+ padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
+ padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
+ Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);
+
+ // HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
+ HOGCache cache(this, img, padding, padding, true, cacheStride);
+ if( !nwindows )
+ nwindows = cache.windowsInImage(paddedImgSize, winStride).area();
+
+ const HOGCache::BlockData* blockData = &cache.blockData[0];
+
+ int nblocks = cache.nblocks.area();
+ int blockHistogramSize = cache.blockHistogramSize;
+ size_t dsize = getDescriptorSize();
+
+ double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;
+ vector<float> blockHist(blockHistogramSize);
+
+ for( size_t i = 0; i < nwindows; i++ )
+ {
+ Point pt0;
+ pt0 = locations[i];
+ if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
+ pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
+ {
+ // out of image
+ confidences.push_back(-10.0);
+ continue;
+ }
+
+ double s = rho;
+ const float* svmVec = &svmDetector[0];
+ int j, k;
+
+ for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
+ {
+ const HOGCache::BlockData& bj = blockData[j];
+ Point pt = pt0 + bj.imgOffset;
+ // need to devide this into 4 parts!
+ const float* vec = cache.getBlock(pt, &blockHist[0]);
+ for( k = 0; k <= blockHistogramSize - 4; k += 4 )
+ s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
+ vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
+ for( ; k < blockHistogramSize; k++ )
+ s += vec[k]*svmVec[k];
+ }
+ // cv::waitKey();
+ confidences.push_back(s);
+
+ if( s >= hitThreshold )
+ foundLocations.push_back(pt0);
+ }
+ }
+
+void HOGDescriptor::detectMultiScaleROI(const cv::Mat& img,
+ CV_OUT std::vector<cv::Rect>& foundLocations,
+ std::vector<DetectionROI>& locations,
+ double hitThreshold,
+ int groupThreshold) const
+{
+ ConcurrentRectVector allCandidates;
+
+ parallel_for(BlockedRange(0, (int)locations.size()),
+ HOGConfInvoker(this, img, hitThreshold, Size(8, 8), &locations, &allCandidates));
+
+ foundLocations.resize(allCandidates.size());
+ std::copy(allCandidates.begin(), allCandidates.end(), foundLocations.begin());
+ cv::groupRectangles(foundLocations, groupThreshold, 0.2);
+}
+
+void HOGDescriptor::readALTModel(std::string modelfile)
+{
+ // read model from SVMlight format..
+ FILE *modelfl;
+ if ((modelfl = fopen(modelfile.c_str(), "rb")) == NULL)
+ {
+ std::string eerr("file not exist");
+ std::string efile(__FILE__);
+ std::string efunc(__FUNCTION__);
+ throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
+ }
+ char version_buffer[10];
+ if (!fread (&version_buffer,sizeof(char),10,modelfl))
+ {
+ std::string eerr("version?");
+ std::string efile(__FILE__);
+ std::string efunc(__FUNCTION__);
+ throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
+ }
+ if(strcmp(version_buffer,"V6.01")) {
+ std::string eerr("version doesnot match");
+ std::string efile(__FILE__);
+ std::string efunc(__FUNCTION__);
+ throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
+ }
+ /* read version number */
+ int version = 0;
+ if (!fread (&version,sizeof(int),1,modelfl))
+ { throw Exception(); }
+ if (version < 200)
+ {
+ std::string eerr("version doesnot match");
+ std::string efile(__FILE__);
+ std::string efunc(__FUNCTION__);
+ throw Exception();
+ }
+ int kernel_type;
+ int nread;
+ nread=fread(&(kernel_type),sizeof(int),1,modelfl);
+
+ {// ignore these
+ int poly_degree;
+ nread=fread(&(poly_degree),sizeof(int),1,modelfl);
+
+ double rbf_gamma;
+ nread=fread(&(rbf_gamma),sizeof(double), 1, modelfl);
+ double coef_lin;
+ nread=fread(&(coef_lin),sizeof(double),1,modelfl);
+ double coef_const;
+ nread=fread(&(coef_const),sizeof(double),1,modelfl);
+ int l;
+ nread=fread(&l,sizeof(int),1,modelfl);
+ char* custom = new char[l];
+ nread=fread(custom,sizeof(char),l,modelfl);
+ delete[] custom;
+ }
+ int totwords;
+ nread=fread(&(totwords),sizeof(int),1,modelfl);
+ {// ignore these
+ int totdoc;
+ nread=fread(&(totdoc),sizeof(int),1,modelfl);
+ int sv_num;
+ nread=fread(&(sv_num), sizeof(int),1,modelfl);
+ }
+
+ double linearbias;
+ nread=fread(&linearbias, sizeof(double), 1, modelfl);
+
+ std::vector<float> detector;
+ detector.clear();
+ if(kernel_type == 0) { /* linear kernel */
+ /* save linear wts also */
+ double *linearwt = new double[totwords+1];
+ int length = totwords;
+ nread = fread(linearwt, sizeof(double), totwords + 1, modelfl);
+ if(nread != length + 1)
+ throw Exception();
+
+ for(int i = 0; i < length; i++)
+ detector.push_back((float)linearwt[i]);
+
+ detector.push_back((float)-linearbias);
+ setSVMDetector(detector);
+ delete linearwt;
+ } else {
+ throw Exception();
+ }
+ fclose(modelfl);
+}
+
}