\r
\r
void compute_hists(int nbins, int block_stride_x, int block_stride_y, \r
- int height, int width, const DevMem2Df& grad, \r
- const DevMem2D& qangle, float sigma, float* block_hists) \r
+ int height, int width, const DevMem2Df& grad, \r
+ const DevMem2D& qangle, float sigma, float* block_hists) \r
{\r
const int nblocks = 1;\r
\r
\r
\r
void normalize_hists(int nbins, int block_stride_x, int block_stride_y, \r
- int height, int width, float* block_hists, float threshold)\r
+ int height, int width, float* block_hists, float threshold)\r
{ \r
const int nblocks = 1;\r
\r
template <int nthreads, // Number of threads per one histogram block \r
int nblocks> // Number of histogram block processed by single GPU thread block\r
__global__ void classify_hists_kernel_many_blocks(const int img_win_width, const int img_block_width, \r
+ const int win_block_stride_x, const int win_block_stride_y,\r
const float* block_hists, const float* coefs,\r
float free_coef, float threshold, unsigned char* labels)\r
{ \r
if (blockIdx.x * blockDim.z + win_x >= img_win_width)\r
return;\r
\r
- const float* hist = block_hists + (blockIdx.y * img_block_width + \r
- blockIdx.x * blockDim.z + win_x) * \r
+ const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + \r
+ blockIdx.x * win_block_stride_x * blockDim.z + win_x) * \r
cblock_hist_size;\r
\r
float product = 0.f;\r
\r
\r
// We only support win_stride_x == block_stride_x, win_stride_y == block_stride_y\r
-void classify_hists(int win_height, int win_width, int block_stride_x, int block_stride_y, \r
- int height, int width, float* block_hists, float* coefs, \r
- float free_coef, float threshold, unsigned char* labels)\r
+void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x, \r
+ int win_stride_y, int win_stride_x,\r
+ int height, int width, float* block_hists, float* coefs, \r
+ float free_coef, float threshold, unsigned char* labels)\r
{ \r
const int nthreads = 256;\r
const int nblocks = 1;\r
\r
- int img_win_width = (width - win_width + block_stride_x) / block_stride_x;\r
- int img_win_height = (height - win_height + block_stride_y) / block_stride_y;\r
+ int win_block_stride_x = win_stride_x / block_stride_x;\r
+ int win_block_stride_y = win_stride_y / block_stride_y;\r
+ int img_win_width = (width - win_width + win_stride_x) / win_stride_x;\r
+ int img_win_height = (height - win_height + win_stride_y) / win_stride_y;\r
\r
dim3 threads(nthreads, 1, nblocks);\r
dim3 grid(div_up(img_win_width, nblocks), img_win_height);\r
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / \r
block_stride_x;\r
classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(\r
- img_win_width, img_block_width, block_hists, coefs, free_coef, threshold, labels);\r
+ img_win_width, img_block_width, win_block_stride_x, win_block_stride_y, \r
+ block_hists, coefs, free_coef, threshold, labels);\r
cudaSafeCall(cudaThreadSynchronize());\r
} \r
\r
\r
\r
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2D& img, \r
- float angle_scale, DevMem2Df grad, DevMem2D qangle)\r
+ float angle_scale, DevMem2Df grad, DevMem2D qangle)\r
{\r
const int nthreads = 256;\r
\r
\r
\r
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2D& img, \r
- float angle_scale, DevMem2Df grad, DevMem2D qangle)\r
+ float angle_scale, DevMem2Df grad, DevMem2D qangle)\r
{\r
const int nthreads = 256;\r
\r
void normalize_hists(int nbins, int block_stride_x, int block_stride_y, \r
int height, int width, float* block_hists, float threshold);\r
\r
-void classify_hists(int win_height, int win_width, int block_stride_x, \r
- int block_stride_y, int height, int width, float* block_hists, \r
- float* coefs, float free_coef, float threshold, unsigned char* labels);\r
+void classify_hists(int win_height, int win_width, int block_stride_y, \r
+ int block_stride_x, int win_stride_y, int win_stride_x, int height, \r
+ int width, float* block_hists, float* coefs, float free_coef, \r
+ float threshold, unsigned char* labels);\r
\r
void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img, \r
float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle);\r
if (win_stride == Size())\r
win_stride = block_stride;\r
else\r
- CV_Assert(win_stride == block_stride);\r
+ CV_Assert(win_stride.width % block_stride.width == 0 &&\r
+ win_stride.height % block_stride.height == 0);\r
\r
CV_Assert(padding == Size(0, 0));\r
\r
block_hists.ptr<float>(), (float)threshold_L2hys);\r
\r
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, \r
- img.rows, img.cols, block_hists.ptr<float>(), detector.ptr<float>(), \r
- (float)free_coef, (float)hit_threshold, labels.ptr());\r
+ win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), \r
+ detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr());\r
\r
labels.download(labels_host);\r
unsigned char* vec = labels_host.ptr();\r
int gr_threshold;\r
double hit_threshold;\r
int win_width;\r
+ int win_stride_width;\r
+ int win_stride_height;\r
};\r
\r
\r
<< " [-scale <double>] # HOG window scale factor\n"\r
<< " [-nlevels <int>] # max number of HOG window scales\n"\r
<< " [-win_width <int>] # width of the window (48 or 64)\n"\r
+ << " [-win_stride_width <int>] # distance by OX axis between neighbour wins\n"\r
+ << " [-win_stride_height <int>] # distance by OY axis between neighbour wins\n"\r
<< " [-gr_threshold <int>] # merging similar rects constant\n";\r
return 1;\r
}\r
gr_threshold = 8;\r
hit_threshold = 1.4;\r
win_width = 48;\r
+ win_stride_width = 8;\r
+ win_stride_height = 8;\r
}\r
\r
\r
else if (key == "-scale") settings.scale = atof(val.c_str());\r
else if (key == "-nlevels") settings.nlevels = atoi(val.c_str());\r
else if (key == "-win_width") settings.win_width = atoi(val.c_str());\r
+ else if (key == "-win_stride_width") settings.win_stride_width = atoi(val.c_str());\r
+ else if (key == "-win_stride_height") settings.win_stride_height = atoi(val.c_str());\r
else if (key == "-gr_threshold") settings.gr_threshold = atoi(val.c_str());\r
else throw exception((string("Unknown key: ") + key).c_str());\r
}\r
{\r
settings = s;\r
cout << "\nControls:\n"\r
- << "ESC - exit\n"\r
- << "m - change mode GPU <-> CPU\n"\r
- << "g - convert image to gray or not\n"\r
- << "1/q - increase/decrease HOG scale\n"\r
- << "2/w - increase/decrease levels count\n"\r
- << "3/e - increase/decrease HOG group threshold\n"\r
- << "4/r - increase/decrease hit threshold\n"\r
+ << "\tESC - exit\n"\r
+ << "\tm - change mode GPU <-> CPU\n"\r
+ << "\tg - convert image to gray or not\n"\r
+ << "\t1/q - increase/decrease HOG scale\n"\r
+ << "\t2/w - increase/decrease levels count\n"\r
+ << "\t3/e - increase/decrease HOG group threshold\n"\r
+ << "\t4/r - increase/decrease hit threshold\n"\r
<< endl;\r
\r
use_gpu = true;\r
if (settings.win_width != 64 && settings.win_width != 48)\r
settings.win_width = 64;\r
\r
- cout << endl << "Scale: " << scale << endl;\r
+ cout << "Scale: " << scale << endl;\r
cout << "Group threshold: " << gr_threshold << endl;\r
cout << "Levels number: " << nlevels << endl;\r
cout << "Win width: " << settings.win_width << endl;\r
+ cout << "Win stride: (" << settings.win_stride_width << ", " << settings.win_stride_height << ")\n";\r
cout << "Hit threshold: " << hit_threshold << endl;\r
cout << endl;\r
}\r
running = true;\r
\r
Size win_size(settings.win_width, settings.win_width * 2); //(64, 128) or (48, 96)\r
+ Size win_stride(settings.win_stride_width, settings.win_stride_height);\r
\r
vector<float> detector;\r
\r
- if (win_size == Size(64,128))\r
+ if (win_size == Size(64, 128))\r
detector = cv::gpu::HOGDescriptor::getPeopleDetector_64x128();\r
else\r
detector = cv::gpu::HOGDescriptor::getPeopleDetector_48x96();\r
gpu_hog.setSVMDetector(detector);\r
\r
// CPU's HOG classifier\r
- cv::HOGDescriptor cpu_hog(win_size, Size(16,16), Size(8,8), Size(8,8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2, true, HOGDescriptor::DEFAULT_NLEVELS);\r
+ cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2, true, HOGDescriptor::DEFAULT_NLEVELS);\r
cpu_hog.setSVMDetector(detector);\r
\r
// Make endless cycle from video (if src is video)\r
if (use_gpu)\r
{\r
gpu_img = img;\r
- gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, Size(8, 8), Size(0, 0), scale, gr_threshold);\r
+ gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold);\r
}\r
else\r
- cpu_hog.detectMultiScale(img, found, hit_threshold, Size(8, 8), Size(0, 0), scale, gr_threshold);\r
+ cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold);\r
HogWorkEnd();\r
\r
// Draw positive classified windows\r