From: Alexey Spizhevoy Date: Thu, 18 Nov 2010 09:22:23 +0000 (+0000) Subject: added getDescriptors support into gpu HOG, also added commented test for this feature X-Git-Tag: accepted/2.0/20130307.220821~4193 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=11c0c5bf857e42a27b82f3d7e26acb865ad04319;p=profile%2Fivi%2Fopencv.git added getDescriptors support into gpu HOG, also added commented test for this feature --- diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index eeb3bc6..cc0b438 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1001,14 +1001,13 @@ namespace cv void setSVMDetector(const vector& detector); bool checkDetectorSize() const; + void computeBlockHistograms(const GpuMat& img); void detect(const GpuMat& img, vector& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size()); void detectMultiScale(const GpuMat& img, vector& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size(), double scale0=1.05, int group_threshold=2); - - ////TODO: test it - //void getDescriptors(const GpuMat& img, Size win_stride, vector& descriptors) + void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors); Size win_size; Size block_size; @@ -1035,7 +1034,6 @@ namespace cv static int numPartsWithin(int size, int part_size, int stride); static Size numPartsWithin(Size size, Size part_size, Size stride); - void computeBlockHistograms(const GpuMat& img); void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle); GpuMat grad, qangle; diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index 741aa06..3dc7147 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -397,11 +397,9 @@ __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const } -// We only support win_stride_x == block_stride_x, win_stride_y == block_stride_y void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, - int height, int width, float* block_hists, float* coefs, - float free_coef, float threshold, unsigned char* labels) + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + float* coefs, float free_coef, float threshold, unsigned char* labels) { const int nthreads = 256; const int nblocks = 1; @@ -425,8 +423,54 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block cudaSafeCall(cudaThreadSynchronize()); } +//---------------------------------------------------------------------------- +// Extract descriptors + + +template +__global__ void extract_descriptors_kernel(const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + const float* block_hists, PtrElemStepf descriptors) +{ + // Get left top corner of the window in src + const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + + blockIdx.x * win_block_stride_x) * cblock_hist_size; + + // Get left top corner of the window in dst + float* descriptor = descriptors.ptr(blockIdx.y * gridDim.x + blockIdx.x); + + // Copy elements from src to dst + for (int i = threadIdx.x; i < cdescr_size; i += nthreads) + { + int offset_y = i / cdescr_width; + int offset_x = i - offset_y * cdescr_width; + descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x]; + } +} + -//------------------------------------------------------------ +void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + DevMem2Df descriptors) +{ + const int nthreads = 256; + + int win_block_stride_x = win_stride_x / block_stride_x; + int win_block_stride_y = win_stride_y / block_stride_y; + int img_win_width = (width - win_width + win_stride_x) / win_stride_x; + int img_win_height = (height - win_height + win_stride_y) / win_stride_y; + dim3 threads(nthreads, 1); + dim3 grid(img_win_width, img_win_height); + + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; + extract_descriptors_kernel<<>>( + img_win_width, img_block_width, win_block_stride_x, win_block_stride_y, + block_hists, descriptors); + cudaSafeCall(cudaThreadSynchronize()); +} + +//---------------------------------------------------------------------------- // Gradients computation @@ -481,7 +525,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl float3 dx = make_float3(sqrtf(b.x) - sqrtf(a.x), sqrtf(b.y) - sqrtf(a.y), - sqrtf(b.z) - sqrtf(a.z)); + sqrtf(b.z) - sqrtf(a.z)); float3 dy = make_float3(0.f, 0.f, 0.f); if (blockIdx.y > 0 && blockIdx.y < height - 1) diff --git a/modules/gpu/src/hog.cpp b/modules/gpu/src/hog.cpp index 218be45..7f0b3ea 100644 --- a/modules/gpu/src/hog.cpp +++ b/modules/gpu/src/hog.cpp @@ -51,8 +51,10 @@ double cv::gpu::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; } bool cv::gpu::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; } void cv::gpu::HOGDescriptor::setSVMDetector(const vector&) { throw_nogpu(); } void cv::gpu::HOGDescriptor::computeGradient(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_nogpu(); } void cv::gpu::HOGDescriptor::detect(const GpuMat&, vector&, double, Size, Size) { throw_nogpu(); } void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat&, vector&, double, Size, Size, double, int) { throw_nogpu(); } +void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&) { throw_nogpu(); } std::vector cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector(); } std::vector cv::gpu::HOGDescriptor::getPeopleDetector_48x96() { throw_nogpu(); return std::vector(); } std::vector cv::gpu::HOGDescriptor::getPeopleDetector_64x128() { throw_nogpu(); return std::vector(); } @@ -76,6 +78,10 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int width, float* block_hists, float* coefs, float free_coef, float threshold, unsigned char* labels); +void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + cv::gpu::DevMem2Df descriptors); + void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img, float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle); void compute_gradients_8UC4(int nbins, int height, int width, const cv::gpu::DevMem2D& img, @@ -212,39 +218,23 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img) } -////TODO: test it -//void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, -// vector& descriptors) -//{ -// CV_Assert(win_stride.width % block_stride.width == 0 && -// win_stride.height % block_stride.height == 0); -// -// computeBlockHistograms(img); -// -// Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); -// GpuMat hists_reshaped = block_hists.reshape(0, blocks_per_img.height); -// -// const int block_hist_size = getBlockHistogramSize(); -// Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); -// Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); -// -// descriptors.resize(wins_per_img.area()); -// for (int i = 0; i < wins_per_img.height; ++i) -// { -// for (int j = 0; j < wins_per_img.width; ++j) -// { -// Range rows; -// rows.start = i * (blocks_per_win.height + 1); -// rows.end = rows.start + blocks_per_win.height; -// -// Range cols; -// cols.start = j * (blocks_per_win.width + 1) * block_hist_size; -// cols.end = cols.start + blocks_per_win.width * block_hist_size; -// -// descriptors[i * wins_per_img.width + j] = hists_reshaped(rows, cols); -// } -// } -//} +void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors) +{ + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); + + computeBlockHistograms(img); + + const int block_hist_size = getBlockHistogramSize(); + Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); + Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); + + descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F); + + hog::extract_descriptors(win_size.height, win_size.width, block_stride.height, block_stride.width, + win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr(), + descriptors); +} void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector& hits, double hit_threshold, diff --git a/samples/gpu/gpu_hog.cpp b/samples/gpu/gpu_hog.cpp index 53370ba..b4de08c 100644 --- a/samples/gpu/gpu_hog.cpp +++ b/samples/gpu/gpu_hog.cpp @@ -225,7 +225,11 @@ void App::RunOpencvGui() vc >> frame; } else + { frame = imread(settings.src); + if (frame.empty()) + throw exception(string("Can't open image file: " + settings.src).c_str()); + } Mat img_aux, img, img_to_show; gpu::GpuMat gpu_img; diff --git a/tests/gpu/src/hog.cpp b/tests/gpu/src/hog.cpp index cc3e903..b4366b5 100644 --- a/tests/gpu/src/hog.cpp +++ b/tests/gpu/src/hog.cpp @@ -51,9 +51,43 @@ using namespace std; ts->set_failed_test_info(err); \ return; } -struct CV_GpuHogTest : public CvTest +struct CV_GpuHogDetectionTest: public CvTest { - CV_GpuHogTest() : CvTest( "GPU-HOG", "HOGDescriptor" ) {} + CV_GpuHogDetectionTest(): CvTest( "GPU-HOG-detect", "HOGDescriptorDetection" ) {} + + void run(int) + { + try + { + cv::Mat img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/road.png"); + CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); + +#ifdef DUMP + f.open((std::string(ts->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); + CHECK(f.is_open(), CvTS::FAIL_GENERIC); +#else + f.open((std::string(ts->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); + CHECK(f.is_open(), CvTS::FAIL_MISSING_TEST_DATA); +#endif + + // Test on color image + cv::Mat img; + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + test(img); + + // Test on gray image + cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + test(img); + + f.close(); + } + catch (const cv::Exception& e) + { + f.close(); + if (!check_and_treat_gpu_exception(e, ts)) throw; + return; + } + } #ifdef DUMP void dump(const cv::Mat& block_hists, const std::vector& locations) @@ -168,45 +202,115 @@ struct CV_GpuHogTest : public CvTest #endif } +#ifdef DUMP + std::ofstream f; +#else + std::ifstream f; +#endif - void run(int) - { +} gpu_hog_detection_test; + + +struct CV_GpuHogGetDescriptorsTest: public CvTest +{ + CV_GpuHogGetDescriptorsTest(): CvTest("GPU-HOG-getDescriptors", "HOGDescriptorGetDescriptors") {} + + void run(int) + { try { - cv::Mat img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/road.png"); + // Load image (e.g. train data, composed from windows) + cv::Mat img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/train_data.png"); CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); -#ifdef DUMP - f.open((std::string(ts->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); - CHECK(f.is_open(), CvTS::FAIL_GENERIC); -#else - f.open((std::string(ts->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); - CHECK(f.is_open(), CvTS::FAIL_MISSING_TEST_DATA); -#endif - - // Test on color image + // Convert to C4 cv::Mat img; cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - test(img); + cv::gpu::GpuMat d_img(img); - // Test on gray image - cv::cvtColor(img_rgb, img, CV_BGR2GRAY); - test(img); + cv::Size win_size(64, 128); + cv::gpu::HOGDescriptor hog(win_size); - f.close(); + // Convert train images into feature vectors (train table) + cv::gpu::GpuMat descriptors; + hog.getDescriptors(d_img, win_size, descriptors); + + // Check size of the result train table + wins_per_img_x = 3; + wins_per_img_y = 2; + blocks_per_win_x = 7; + blocks_per_win_y = 15; + block_hist_size = 36; + cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size, + wins_per_img_x * wins_per_img_y); + CHECK(descriptors.size() == descr_size_expected, CvTS::FAIL_INVALID_OUTPUT); + + /* Now we want to extract the same feature vectors, but from single images. NOTE: results will + be defferent, due to border values interpolation. Using of many small images is slower, however we + wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms + works good, it can be checked in the gpu_hog sample */ + + img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive1.png"); + CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + hog.computeBlockHistograms(cv::gpu::GpuMat(img)); + // Everything is fine with interpolation for left top subimage + CHECK(cv::norm(hog.block_hists, descriptors.rowRange(0, 1)) == 0.f, CvTS::FAIL_INVALID_OUTPUT); + + img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive2.png"); + CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + hog.computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(hog.block_hists, descriptors.rowRange(1, 2)); + + img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative1.png"); + CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + hog.computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(hog.block_hists, descriptors.rowRange(2, 3)); + + img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative2.png"); + CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + hog.computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(hog.block_hists, descriptors.rowRange(3, 4)); + + img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive3.png"); + CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + hog.computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(hog.block_hists, descriptors.rowRange(4, 5)); + + img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative3.png"); + CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + hog.computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(hog.block_hists, descriptors.rowRange(5, 6)); } catch (const cv::Exception& e) { - f.close(); if (!check_and_treat_gpu_exception(e, ts)) throw; return; } } -#ifdef DUMP - std::ofstream f; -#else - std::ifstream f; -#endif + // Does not compare border value, as interpolation leads to delta + void compare_inner_parts(cv::Mat d1, cv::Mat d2) + { + for (int i = 1; i < blocks_per_win_y - 1; ++i) + for (int j = 1; j < blocks_per_win_x - 1; ++j) + for (int k = 0; k < block_hist_size; ++k) + { + float a = d1.at(0, (i * blocks_per_win_x + j) * block_hist_size); + float b = d2.at(0, (i * blocks_per_win_x + j) * block_hist_size); + CHECK(a == b, CvTS::FAIL_INVALID_OUTPUT) + } + } + + int wins_per_img_x; + int wins_per_img_y; + int blocks_per_win_x; + int blocks_per_win_y; + int block_hist_size; +} gpu_hog_get_descriptors_test; -} gpu_hog_test;