double win_sigma;\r
double threshold_L2hys;\r
int nlevels;\r
+ bool gamma_correction;\r
\r
protected:\r
void computeBlockHistograms(const GpuMat& img);\r
static int numPartsWithin(int size, int part_size, int stride);\r
static Size numPartsWithin(Size size, Size part_size, Size stride);\r
\r
- bool gamma_correction;\r
-\r
// Coefficients of the separating plane\r
float free_coef;\r
GpuMat detector;\r
// Gradients computation\r
\r
\r
-template <int nthreads>\r
+template <int nthreads, int correct_gamma>\r
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrElemStep img, \r
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)\r
{\r
__shared__ float sh_row[(nthreads + 2) * 3];\r
\r
uchar4 val;\r
- if (x < width)\r
- val = row[x];\r
- else if (x == width)\r
- val = row[x - 2];\r
- // Othrewise we do not read variable 'val' at all\r
+ if (x < width) \r
+ val = row[x]; \r
+ else \r
+ val = row[width - 2];\r
\r
sh_row[threadIdx.x + 1] = val.x;\r
sh_row[threadIdx.x + 1 + (nthreads + 2)] = val.y;\r
\r
if (threadIdx.x == 0)\r
{\r
- val = x > 0 ? row[x - 1] : row[1];\r
+ val = row[max(x - 1, 1)];\r
sh_row[0] = val.x;\r
sh_row[(nthreads + 2)] = val.y;\r
sh_row[2 * (nthreads + 2)] = val.z;\r
\r
if (threadIdx.x == blockDim.x - 1)\r
{\r
- val = (x < width - 1) ? row[x + 1] : row[width - 2];\r
+ val = row[min(x + 1, width - 2)];\r
sh_row[blockDim.x + 1] = val.x;\r
sh_row[blockDim.x + 1 + (nthreads + 2)] = val.y;\r
sh_row[blockDim.x + 1 + 2 * (nthreads + 2)] = val.z;\r
a.y = sh_row[threadIdx.x + (nthreads + 2)];\r
a.z = sh_row[threadIdx.x + 2 * (nthreads + 2)];\r
\r
- float3 dx = make_float3(sqrtf(b.x) - sqrtf(a.x), \r
- sqrtf(b.y) - sqrtf(a.y), \r
- sqrtf(b.z) - sqrtf(a.z)); \r
+ float3 dx;\r
+ if (correct_gamma)\r
+ dx = make_float3(sqrtf(b.x) - sqrtf(a.x), sqrtf(b.y) - sqrtf(a.y), sqrtf(b.z) - sqrtf(a.z)); \r
+ else\r
+ dx = make_float3(b.x - a.x, b.y - a.y, b.z - a.z); \r
+\r
float3 dy = make_float3(0.f, 0.f, 0.f);\r
\r
if (blockIdx.y > 0 && blockIdx.y < height - 1)\r
val = ((const uchar4*)img.ptr(blockIdx.y + 1))[x];\r
b = make_float3(val.x, val.y, val.z);\r
\r
- dy = make_float3(sqrtf(b.x) - sqrtf(a.x), \r
- sqrtf(b.y) - sqrtf(a.y), \r
- sqrtf(b.z) - sqrtf(a.z));\r
+ if (correct_gamma)\r
+ dy = make_float3(sqrtf(b.x) - sqrtf(a.x), sqrtf(b.y) - sqrtf(a.y), sqrtf(b.z) - sqrtf(a.z));\r
+ else\r
+ dy = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);\r
}\r
\r
float best_dx = dx.x;\r
\r
\r
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2D& img, \r
- float angle_scale, DevMem2Df grad, DevMem2D qangle)\r
+ float angle_scale, DevMem2Df grad, DevMem2D qangle, bool correct_gamma)\r
{\r
const int nthreads = 256;\r
\r
dim3 bdim(nthreads, 1);\r
dim3 gdim(div_up(width, bdim.x), div_up(height, bdim.y));\r
\r
- compute_gradients_8UC4_kernel<nthreads><<<gdim, bdim>>>(height, width, img, angle_scale, \r
- grad, qangle);\r
+ if (correct_gamma)\r
+ compute_gradients_8UC4_kernel<nthreads, 1><<<gdim, bdim>>>(\r
+ height, width, img, angle_scale, grad, qangle);\r
+ else\r
+ compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(\r
+ height, width, img, angle_scale, grad, qangle);\r
+\r
cudaSafeCall(cudaThreadSynchronize());\r
}\r
\r
\r
-template <int nthreads>\r
+template <int nthreads, int correct_gamma>\r
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img, \r
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)\r
{\r
__shared__ float sh_row[nthreads + 2];\r
\r
if (x < width) \r
- sh_row[threadIdx.x + 1] = row[x];\r
- else if (x == width)\r
- sh_row[threadIdx.x + 1] = row[x - 2];\r
+ sh_row[threadIdx.x + 1] = row[x]; \r
+ else \r
+ sh_row[threadIdx.x + 1] = row[width - 2];\r
\r
if (threadIdx.x == 0)\r
- sh_row[0] = x > 0 ? row[x - 1] : row[1];\r
+ sh_row[0] = row[max(x - 1, 1)];\r
\r
if (threadIdx.x == blockDim.x - 1)\r
- sh_row[blockDim.x + 1] = (x < width - 1) ? row[x + 1] : row[width - 2];\r
+ sh_row[blockDim.x + 1] = row[min(x + 1, width - 2)];\r
\r
__syncthreads();\r
if (x < width)\r
{\r
- float dx = sqrtf(sh_row[threadIdx.x + 2]) - sqrtf(sh_row[threadIdx.x]);\r
+ float dx;\r
+\r
+ if (correct_gamma)\r
+ dx = sqrtf(sh_row[threadIdx.x + 2]) - sqrtf(sh_row[threadIdx.x]);\r
+ else\r
+ dx = sh_row[threadIdx.x + 2] - sh_row[threadIdx.x];\r
+\r
float dy = 0.f;\r
if (blockIdx.y > 0 && blockIdx.y < height - 1)\r
- dy = sqrtf(((const unsigned char*)img.ptr(blockIdx.y + 1))[x]) - \r
- sqrtf(((const unsigned char*)img.ptr(blockIdx.y - 1))[x]);\r
+ {\r
+ float a = ((const unsigned char*)img.ptr(blockIdx.y + 1))[x];\r
+ float b = ((const unsigned char*)img.ptr(blockIdx.y - 1))[x];\r
+ if (correct_gamma)\r
+ dy = sqrtf(a) - sqrtf(b);\r
+ else\r
+ dy = a - b;\r
+ }\r
float mag = sqrtf(dx * dx + dy * dy);\r
\r
float ang = (atan2f(dy, dx) + CV_PI_F) * angle_scale - 0.5f;\r
\r
\r
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2D& img, \r
- float angle_scale, DevMem2Df grad, DevMem2D qangle)\r
+ float angle_scale, DevMem2Df grad, DevMem2D qangle, bool correct_gamma)\r
{\r
const int nthreads = 256;\r
\r
dim3 bdim(nthreads, 1);\r
dim3 gdim(div_up(width, bdim.x), div_up(height, bdim.y));\r
\r
- compute_gradients_8UC1_kernel<nthreads><<<gdim, bdim>>>(height, width, img, angle_scale, \r
- grad, qangle);\r
+ if (correct_gamma)\r
+ compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(\r
+ height, width, img, angle_scale, grad, qangle);\r
+ else\r
+ compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(\r
+ height, width, img, angle_scale, grad, qangle);\r
+\r
cudaSafeCall(cudaThreadSynchronize());\r
}\r
\r
cv::gpu::DevMem2Df descriptors);\r
\r
void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img, \r
- float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle);\r
+ float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle, bool correct_gamma);\r
void compute_gradients_8UC4(int nbins, int height, int width, const cv::gpu::DevMem2D& img, \r
- float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle);\r
+ float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle, bool correct_gamma);\r
\r
void resize_8UC1(const cv::gpu::DevMem2D& src, cv::gpu::DevMem2D dst);\r
void resize_8UC4(const cv::gpu::DevMem2D& src, cv::gpu::DevMem2D dst);\r
\r
CV_Assert(cell_size == Size(8, 8));\r
\r
- CV_Assert(gamma_correction == true);\r
-\r
Size cells_per_block = Size(block_size.width / cell_size.width, \r
block_size.height / cell_size.height);\r
CV_Assert(cells_per_block == Size(2, 2));\r
float angleScale = (float)(nbins / CV_PI);\r
switch (img.type()) {\r
case CV_8UC1:\r
- hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, grad, qangle);\r
+ hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);\r
break;\r
case CV_8UC4:\r
- hog::compute_gradients_8UC4(nbins, img.rows, img.cols, img, angleScale, grad, qangle);\r
+ hog::compute_gradients_8UC4(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction);\r
break;\r
}\r
}\r
int win_width;\r
int win_stride_width;\r
int win_stride_height;\r
+ bool gamma_corr;\r
};\r
\r
\r
int gr_threshold;\r
int nlevels;\r
double hit_threshold;\r
+ bool gamma_corr;\r
\r
int64 hog_work_begin;\r
double hog_work_fps;\r
<< " [-win_width <int>] # width of the window (48 or 64)\n"\r
<< " [-win_stride_width <int>] # distance by OX axis between neighbour wins\n"\r
<< " [-win_stride_height <int>] # distance by OY axis between neighbour wins\n"\r
- << " [-gr_threshold <int>] # merging similar rects constant\n";\r
+ << " [-gr_threshold <int>] # merging similar rects constant\n"\r
+ << " [-gamma_corr <int>] # do gamma correction or not\n";\r
return 1;\r
}\r
App app(Settings::Read(argc, argv));\r
win_width = 48;\r
win_stride_width = 8;\r
win_stride_height = 8;\r
+ gamma_corr = true;\r
}\r
\r
\r
else if (key == "-win_stride_width") settings.win_stride_width = atoi(val.c_str());\r
else if (key == "-win_stride_height") settings.win_stride_height = atoi(val.c_str());\r
else if (key == "-gr_threshold") settings.gr_threshold = atoi(val.c_str());\r
+ else if (key == "-gamma_corr") settings.gamma_corr = atoi(val.c_str()) != 0;\r
else throw runtime_error((string("Unknown key: ") + key));\r
}\r
\r
gr_threshold = settings.gr_threshold;\r
nlevels = settings.nlevels;\r
hit_threshold = settings.hit_threshold;\r
+ gamma_corr = settings.gamma_corr;\r
\r
if (settings.win_width != 64 && settings.win_width != 48)\r
settings.win_width = 64;\r
cout << "Win width: " << settings.win_width << endl;\r
cout << "Win stride: (" << settings.win_stride_width << ", " << settings.win_stride_height << ")\n";\r
cout << "Hit threshold: " << hit_threshold << endl;\r
+ cout << "Gamma correction: " << gamma_corr << endl;\r
cout << endl;\r
}\r
\r
detector = cv::gpu::HOGDescriptor::getPeopleDetector_48x96();\r
\r
// GPU's HOG classifier\r
- cv::gpu::HOGDescriptor gpu_hog(win_size);\r
+ cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, \r
+ cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, \r
+ cv::gpu::HOGDescriptor::DEFAULT_NLEVELS);\r
gpu_hog.setSVMDetector(detector);\r
\r
// CPU's HOG classifier\r
- cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2, true, HOGDescriptor::DEFAULT_NLEVELS);\r
+ cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, \r
+ HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);\r
cpu_hog.setSVMDetector(detector);\r
\r
// Make endless cycle from video (if src is video)\r
hit_threshold = max(0.0, hit_threshold - 0.25);\r
cout << "Hit threshold: " << hit_threshold << endl;\r
break;\r
+ case 'c':\r
+ case 'C':\r
+ gamma_corr = !gamma_corr;\r
+ cout << "Gamma correction: " << gamma_corr << endl;\r
+ break;\r
}\r
}\r
\r
//#define DUMP\r
\r
#define CHECK(pred, err) if (!(pred)) { \\r
- ts->printf(CvTS::LOG, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \\r
+ ts->printf(CvTS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \\r
ts->set_failed_test_info(err); \\r
return; }\r
\r
{\r
cv::gpu::GpuMat d_img(img);\r
\r
+ gamma_correction = false;\r
setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());\r
//cpu detector may be updated soon\r
//hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());\r
#endif\r
\r
// Test detect on smaller image\r
- cv::gpu::GpuMat d_img2;\r
- cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols / 2, d_img.rows / 2)); \r
- detect(d_img2, locations, 0);\r
+ cv::Mat img2;\r
+ cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2)); \r
+ detect(cv::gpu::GpuMat(img2), locations, 0);\r
\r
#ifdef DUMP\r
dump(block_hists, locations);\r
#endif\r
\r
// Test detect on greater image\r
- cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols * 2, d_img.rows * 2)); \r
- detect(d_img2, locations, 0);\r
+ cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2)); \r
+ detect(cv::gpu::GpuMat(img2), locations, 0);\r
\r
#ifdef DUMP\r
dump(block_hists, locations);\r