From: Alexey Spizhevoy Date: Fri, 3 Dec 2010 11:11:44 +0000 (+0000) Subject: added support of gamama_correction parameter into gpu::HOGDescriptor, updated tests X-Git-Tag: accepted/2.0/20130307.220821~3954 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=df48ad76b0ffc8d89af3be68e5886b49d3fba6ce;p=profile%2Fivi%2Fopencv.git added support of gamama_correction parameter into gpu::HOGDescriptor, updated tests --- diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index fca5f66..6f941f3 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1076,6 +1076,7 @@ namespace cv double win_sigma; double threshold_L2hys; int nlevels; + bool gamma_correction; protected: void computeBlockHistograms(const GpuMat& img); @@ -1084,8 +1085,6 @@ namespace cv static int numPartsWithin(int size, int part_size, int stride); static Size numPartsWithin(Size size, Size part_size, Size stride); - bool gamma_correction; - // Coefficients of the separating plane float free_coef; GpuMat detector; diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index d79e076..ab9df03 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -522,7 +522,7 @@ void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, i // Gradients computation -template +template __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrElemStep img, float angle_scale, PtrElemStepf grad, PtrElemStep qangle) { @@ -533,11 +533,10 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl __shared__ float sh_row[(nthreads + 2) * 3]; uchar4 val; - if (x < width) - val = row[x]; - else if (x == width) - val = row[x - 2]; - // Othrewise we do not read variable 'val' at all + if (x < width) + val = row[x]; + else + val = row[width - 2]; sh_row[threadIdx.x + 1] = val.x; sh_row[threadIdx.x + 1 + (nthreads + 2)] = val.y; @@ -545,7 +544,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl if (threadIdx.x == 0) { - val = x > 0 ? row[x - 1] : row[1]; + val = row[max(x - 1, 1)]; sh_row[0] = val.x; sh_row[(nthreads + 2)] = val.y; sh_row[2 * (nthreads + 2)] = val.z; @@ -553,7 +552,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl if (threadIdx.x == blockDim.x - 1) { - val = (x < width - 1) ? row[x + 1] : row[width - 2]; + val = row[min(x + 1, width - 2)]; sh_row[blockDim.x + 1] = val.x; sh_row[blockDim.x + 1 + (nthreads + 2)] = val.y; sh_row[blockDim.x + 1 + 2 * (nthreads + 2)] = val.z; @@ -571,9 +570,12 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl a.y = sh_row[threadIdx.x + (nthreads + 2)]; a.z = sh_row[threadIdx.x + 2 * (nthreads + 2)]; - float3 dx = make_float3(sqrtf(b.x) - sqrtf(a.x), - sqrtf(b.y) - sqrtf(a.y), - sqrtf(b.z) - sqrtf(a.z)); + float3 dx; + if (correct_gamma) + dx = make_float3(sqrtf(b.x) - sqrtf(a.x), sqrtf(b.y) - sqrtf(a.y), sqrtf(b.z) - sqrtf(a.z)); + else + dx = make_float3(b.x - a.x, b.y - a.y, b.z - a.z); + float3 dy = make_float3(0.f, 0.f, 0.f); if (blockIdx.y > 0 && blockIdx.y < height - 1) @@ -584,9 +586,10 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl val = ((const uchar4*)img.ptr(blockIdx.y + 1))[x]; b = make_float3(val.x, val.y, val.z); - dy = make_float3(sqrtf(b.x) - sqrtf(a.x), - sqrtf(b.y) - sqrtf(a.y), - sqrtf(b.z) - sqrtf(a.z)); + if (correct_gamma) + dy = make_float3(sqrtf(b.x) - sqrtf(a.x), sqrtf(b.y) - sqrtf(a.y), sqrtf(b.z) - sqrtf(a.z)); + else + dy = make_float3(b.x - a.x, b.y - a.y, b.z - a.z); } float best_dx = dx.x; @@ -623,20 +626,25 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2D& img, - float angle_scale, DevMem2Df grad, DevMem2D qangle) + float angle_scale, DevMem2Df grad, DevMem2D qangle, bool correct_gamma) { const int nthreads = 256; dim3 bdim(nthreads, 1); dim3 gdim(div_up(width, bdim.x), div_up(height, bdim.y)); - compute_gradients_8UC4_kernel<<>>(height, width, img, angle_scale, - grad, qangle); + if (correct_gamma) + compute_gradients_8UC4_kernel<<>>( + height, width, img, angle_scale, grad, qangle); + else + compute_gradients_8UC4_kernel<<>>( + height, width, img, angle_scale, grad, qangle); + cudaSafeCall(cudaThreadSynchronize()); } -template +template __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img, float angle_scale, PtrElemStepf grad, PtrElemStep qangle) { @@ -647,24 +655,36 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl __shared__ float sh_row[nthreads + 2]; if (x < width) - sh_row[threadIdx.x + 1] = row[x]; - else if (x == width) - sh_row[threadIdx.x + 1] = row[x - 2]; + sh_row[threadIdx.x + 1] = row[x]; + else + sh_row[threadIdx.x + 1] = row[width - 2]; if (threadIdx.x == 0) - sh_row[0] = x > 0 ? row[x - 1] : row[1]; + sh_row[0] = row[max(x - 1, 1)]; if (threadIdx.x == blockDim.x - 1) - sh_row[blockDim.x + 1] = (x < width - 1) ? row[x + 1] : row[width - 2]; + sh_row[blockDim.x + 1] = row[min(x + 1, width - 2)]; __syncthreads(); if (x < width) { - float dx = sqrtf(sh_row[threadIdx.x + 2]) - sqrtf(sh_row[threadIdx.x]); + float dx; + + if (correct_gamma) + dx = sqrtf(sh_row[threadIdx.x + 2]) - sqrtf(sh_row[threadIdx.x]); + else + dx = sh_row[threadIdx.x + 2] - sh_row[threadIdx.x]; + float dy = 0.f; if (blockIdx.y > 0 && blockIdx.y < height - 1) - dy = sqrtf(((const unsigned char*)img.ptr(blockIdx.y + 1))[x]) - - sqrtf(((const unsigned char*)img.ptr(blockIdx.y - 1))[x]); + { + float a = ((const unsigned char*)img.ptr(blockIdx.y + 1))[x]; + float b = ((const unsigned char*)img.ptr(blockIdx.y - 1))[x]; + if (correct_gamma) + dy = sqrtf(a) - sqrtf(b); + else + dy = a - b; + } float mag = sqrtf(dx * dx + dy * dy); float ang = (atan2f(dy, dx) + CV_PI_F) * angle_scale - 0.5f; @@ -679,15 +699,20 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2D& img, - float angle_scale, DevMem2Df grad, DevMem2D qangle) + float angle_scale, DevMem2Df grad, DevMem2D qangle, bool correct_gamma) { const int nthreads = 256; dim3 bdim(nthreads, 1); dim3 gdim(div_up(width, bdim.x), div_up(height, bdim.y)); - compute_gradients_8UC1_kernel<<>>(height, width, img, angle_scale, - grad, qangle); + if (correct_gamma) + compute_gradients_8UC1_kernel<<>>( + height, width, img, angle_scale, grad, qangle); + else + compute_gradients_8UC1_kernel<<>>( + height, width, img, angle_scale, grad, qangle); + cudaSafeCall(cudaThreadSynchronize()); } diff --git a/modules/gpu/src/hog.cpp b/modules/gpu/src/hog.cpp index 8883645..9a77ad1 100644 --- a/modules/gpu/src/hog.cpp +++ b/modules/gpu/src/hog.cpp @@ -85,9 +85,9 @@ void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, i cv::gpu::DevMem2Df descriptors); void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img, - float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle); + float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle, bool correct_gamma); void compute_gradients_8UC4(int nbins, int height, int width, const cv::gpu::DevMem2D& img, - float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle); + float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle, bool correct_gamma); void resize_8UC1(const cv::gpu::DevMem2D& src, cv::gpu::DevMem2D dst); void resize_8UC4(const cv::gpu::DevMem2D& src, cv::gpu::DevMem2D dst); @@ -118,8 +118,6 @@ cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block CV_Assert(cell_size == Size(8, 8)); - CV_Assert(gamma_correction == true); - Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); CV_Assert(cells_per_block == Size(2, 2)); @@ -194,10 +192,10 @@ void cv::gpu::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& grad, Gp float angleScale = (float)(nbins / CV_PI); switch (img.type()) { case CV_8UC1: - hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, grad, qangle); + hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction); break; case CV_8UC4: - hog::compute_gradients_8UC4(nbins, img.rows, img.cols, img, angleScale, grad, qangle); + hog::compute_gradients_8UC4(nbins, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction); break; } } diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp index 34e7c58..de2b9ca 100644 --- a/samples/gpu/hog.cpp +++ b/samples/gpu/hog.cpp @@ -34,6 +34,7 @@ public: int win_width; int win_stride_width; int win_stride_height; + bool gamma_corr; }; @@ -72,6 +73,7 @@ private: int gr_threshold; int nlevels; double hit_threshold; + bool gamma_corr; int64 hog_work_begin; double hog_work_fps; @@ -99,7 +101,8 @@ int main(int argc, char** argv) << " [-win_width ] # width of the window (48 or 64)\n" << " [-win_stride_width ] # distance by OX axis between neighbour wins\n" << " [-win_stride_height ] # distance by OY axis between neighbour wins\n" - << " [-gr_threshold ] # merging similar rects constant\n"; + << " [-gr_threshold ] # merging similar rects constant\n" + << " [-gamma_corr ] # do gamma correction or not\n"; return 1; } App app(Settings::Read(argc, argv)); @@ -125,6 +128,7 @@ Settings::Settings() win_width = 48; win_stride_width = 8; win_stride_height = 8; + gamma_corr = true; } @@ -149,6 +153,7 @@ Settings Settings::Read(int argc, char** argv) else if (key == "-win_stride_width") settings.win_stride_width = atoi(val.c_str()); else if (key == "-win_stride_height") settings.win_stride_height = atoi(val.c_str()); else if (key == "-gr_threshold") settings.gr_threshold = atoi(val.c_str()); + else if (key == "-gamma_corr") settings.gamma_corr = atoi(val.c_str()) != 0; else throw runtime_error((string("Unknown key: ") + key)); } @@ -176,6 +181,7 @@ App::App(const Settings &s) gr_threshold = settings.gr_threshold; nlevels = settings.nlevels; hit_threshold = settings.hit_threshold; + gamma_corr = settings.gamma_corr; if (settings.win_width != 64 && settings.win_width != 48) settings.win_width = 64; @@ -186,6 +192,7 @@ App::App(const Settings &s) cout << "Win width: " << settings.win_width << endl; cout << "Win stride: (" << settings.win_stride_width << ", " << settings.win_stride_height << ")\n"; cout << "Hit threshold: " << hit_threshold << endl; + cout << "Gamma correction: " << gamma_corr << endl; cout << endl; } @@ -205,11 +212,14 @@ void App::RunOpencvGui() detector = cv::gpu::HOGDescriptor::getPeopleDetector_48x96(); // GPU's HOG classifier - cv::gpu::HOGDescriptor gpu_hog(win_size); + cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, + cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, + cv::gpu::HOGDescriptor::DEFAULT_NLEVELS); gpu_hog.setSVMDetector(detector); // CPU's HOG classifier - cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, HOGDescriptor::L2Hys, 0.2, true, HOGDescriptor::DEFAULT_NLEVELS); + cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, + HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); cpu_hog.setSVMDetector(detector); // Make endless cycle from video (if src is video) @@ -347,6 +357,11 @@ void App::HandleKey(char key) hit_threshold = max(0.0, hit_threshold - 0.25); cout << "Hit threshold: " << hit_threshold << endl; break; + case 'c': + case 'C': + gamma_corr = !gamma_corr; + cout << "Gamma correction: " << gamma_corr << endl; + break; } } diff --git a/tests/gpu/src/hog.cpp b/tests/gpu/src/hog.cpp index 2477eb6..474f3bc 100644 --- a/tests/gpu/src/hog.cpp +++ b/tests/gpu/src/hog.cpp @@ -47,7 +47,7 @@ using namespace std; //#define DUMP #define CHECK(pred, err) if (!(pred)) { \ - ts->printf(CvTS::LOG, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \ + ts->printf(CvTS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \ ts->set_failed_test_info(err); \ return; } @@ -141,6 +141,7 @@ struct CV_GpuHogDetectionTest: public CvTest, public cv::gpu::HOGDescriptor { cv::gpu::GpuMat d_img(img); + gamma_correction = false; setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); //cpu detector may be updated soon //hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector()); @@ -157,9 +158,9 @@ struct CV_GpuHogDetectionTest: public CvTest, public cv::gpu::HOGDescriptor #endif // Test detect on smaller image - cv::gpu::GpuMat d_img2; - cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols / 2, d_img.rows / 2)); - detect(d_img2, locations, 0); + cv::Mat img2; + cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2)); + detect(cv::gpu::GpuMat(img2), locations, 0); #ifdef DUMP dump(block_hists, locations); @@ -168,8 +169,8 @@ struct CV_GpuHogDetectionTest: public CvTest, public cv::gpu::HOGDescriptor #endif // Test detect on greater image - cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols * 2, d_img.rows * 2)); - detect(d_img2, locations, 0); + cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2)); + detect(cv::gpu::GpuMat(img2), locations, 0); #ifdef DUMP dump(block_hists, locations);