From ab04a03621695cdcaef9b531e2cc43db9f7ded21 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Thu, 2 Dec 2010 09:07:13 +0000 Subject: [PATCH] added support of BORDER_REFLECT into gpu::cornerHarris and etc, added support of border extr. int linear filters --- modules/gpu/include/opencv2/gpu/gpu.hpp | 6 +- modules/gpu/src/cuda/border_interpolate.hpp | 108 ++++++++++++ modules/gpu/src/cuda/cuda_shared.hpp | 8 + modules/gpu/src/cuda/hog.cu | 3 +- modules/gpu/src/cuda/imgproc.cu | 112 ++++++++---- modules/gpu/src/cuda/linear_filters_beta.cu | 264 ++++++++++++++++++++++++++++ modules/gpu/src/imgproc_gpu.cpp | 120 ++++++++++--- tests/gpu/src/imgproc_gpu.cpp | 34 ++-- 8 files changed, 577 insertions(+), 78 deletions(-) create mode 100644 modules/gpu/src/cuda/border_interpolate.hpp create mode 100644 modules/gpu/src/cuda/linear_filters_beta.cu diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 50dfc2a..fca5f66 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -628,13 +628,11 @@ namespace cv CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize = 3); //! computes Harris cornerness criteria at each image pixel - // (does BORDER_CONSTANT interpolation with 0 as the fill value) - CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k); + CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101); //! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria - // (does BORDER_CONSTANT interpolation with 0 as the fill value) - CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize=3); + CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101); //////////////////////////////// Filter Engine //////////////////////////////// diff --git a/modules/gpu/src/cuda/border_interpolate.hpp b/modules/gpu/src/cuda/border_interpolate.hpp new file mode 100644 index 0000000..26b19cb --- /dev/null +++ b/modules/gpu/src/cuda/border_interpolate.hpp @@ -0,0 +1,108 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +namespace cv { namespace gpu { + + struct BrdReflect101 + { + BrdReflect101(int len) : last(len - 1) {} + + __device__ int idx_low(int i) const + { + return abs(i); + } + + __device__ int idx_high(int i) const + { + return last - abs(i - last); + } + + __device__ int idx(int i) const + { + return i <= last ? idx_low(i) : idx_high(i); + } + + bool is_range_safe(int mini, int maxi) const + { + return -last <= mini && maxi <= 2 * last; + } + + int last; + }; + + + template + struct BrdRowReflect101: BrdReflect101 + { + BrdRowReflect101(int len) : BrdReflect101(len) {} + + __device__ float at_low(int i, const T* data) const + { + return data[idx_low(i)]; + } + + __device__ float at_high(int i, const T* data) const + { + return data[idx_high(i)]; + } + }; + + + template + struct BrdColReflect101: BrdReflect101 + { + BrdColReflect101(int len, int step) : BrdReflect101(len), step(step) {} + + __device__ float at_low(int i, const T* data) const + { + return data[idx_low(i) * step]; + } + + __device__ float at_high(int i, const T* data) const + { + return data[idx_high(i) * step]; + } + + int step; + }; + +}} \ No newline at end of file diff --git a/modules/gpu/src/cuda/cuda_shared.hpp b/modules/gpu/src/cuda/cuda_shared.hpp index ca82059..ce0a704 100644 --- a/modules/gpu/src/cuda/cuda_shared.hpp +++ b/modules/gpu/src/cuda/cuda_shared.hpp @@ -94,6 +94,14 @@ namespace cv cudaSafeCall( cudaGetTextureReference(&tex, name) ); cudaSafeCall( cudaUnbindTexture(tex) ); } + + // Available GPU border interpolation modes (named as CPU + // border interpolation modes) + enum + { + BORDER_REFLECT101 = 0, + }; + } } diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index b1fee4c..8c8144e 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -322,7 +322,8 @@ void normalize_hists(int nbins, int block_stride_x, int block_stride_y, normalize_hists_kernel_many_blocks<256, nblocks><<>>(block_hist_size, img_block_width, block_hists, threshold); else if (nthreads == 512) normalize_hists_kernel_many_blocks<512, nblocks><<>>(block_hist_size, img_block_width, block_hists, threshold); - // We don't support bigger sizes of the block histograms + else + cv::gpu::error("normalize_hists: histogram's size is too big, try to decrease number of bins", __FILE__, __LINE__); cudaSafeCall(cudaThreadSynchronize()); } diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index ab57cf9..0f4fe41 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -41,6 +41,7 @@ //M*/ #include "cuda_shared.hpp" +#include "border_interpolate.hpp" using namespace cv::gpu; @@ -464,10 +465,40 @@ namespace cv { namespace gpu { namespace imgproc reprojectImageTo3D_caller(disp, xyzw, q, stream); } +//////////////////////////////////////// Extract Cov Data //////////////////////////////////////////////// + + __global__ void extractCovData_kernel(const int cols, const int rows, const PtrStepf Dx, + const PtrStepf Dy, PtrStepf dst) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < cols && y < rows) + { + float dx = Dx.ptr(y)[x]; + float dy = Dy.ptr(y)[x]; + + dst.ptr(y)[x] = dx * dx; + dst.ptr(y + rows)[x] = dx * dy; + dst.ptr(y + (rows << 1))[x] = dy * dy; + } + } + + void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst) + { + dim3 threads(32, 8); + dim3 grid(divUp(Dx.cols, threads.x), divUp(Dx.rows, threads.y)); + + extractCovData_kernel<<>>(Dx.cols, Dx.rows, Dx, Dy, dst); + cudaSafeCall(cudaThreadSynchronize()); + } + /////////////////////////////////////////// Corner Harris ///////////////////////////////////////////////// + template __global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k, - const PtrStep Dx, const PtrStep Dy, PtrStep dst) + const PtrStep Dx, const PtrStep Dy, PtrStep dst, B border_row, + B border_col) { const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -478,22 +509,21 @@ namespace cv { namespace gpu { namespace imgproc float b = 0.f; float c = 0.f; - int offset1 = -(block_size / 2); - int offset2 = offset1 + block_size; - - unsigned int j_begin = max(x + offset1, 0); - unsigned int i_begin = max(y + offset1, 0); - unsigned int j_end = min(x + offset2, cols); - unsigned int i_end = min(y + offset2, rows); + const int ibegin = y - (block_size / 2); + const int jbegin = x - (block_size / 2); + const int iend = ibegin + block_size; + const int jend = jbegin + block_size; - for (unsigned int i = i_begin; i < i_end; ++i) + for (int i = ibegin; i < iend; ++i) { - const float* dx_row = (const float*)Dx.ptr(i); - const float* dy_row = (const float*)Dy.ptr(i); - for (unsigned int j = j_begin; j < j_end; ++j) + int y = border_col.idx(i); + const float* dx_row = (const float*)Dx.ptr(y); + const float* dy_row = (const float*)Dy.ptr(y); + for (int j = jbegin; j < jend; ++j) { - float dx = dx_row[j]; - float dy = dy_row[j]; + int x = border_row.idx(j); + float dx = dx_row[x]; + float dy = dy_row[x]; a += dx * dx; b += dx * dy; c += dy * dy; @@ -504,7 +534,8 @@ namespace cv { namespace gpu { namespace imgproc } } - void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst) + void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, + int border_type) { const int rows = Dx.rows; const int cols = Dx.cols; @@ -512,14 +543,22 @@ namespace cv { namespace gpu { namespace imgproc dim3 threads(32, 8); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - cornerHarris_kernel<<>>(cols, rows, block_size, k, Dx, Dy, dst); + switch (border_type) + { + case BORDER_REFLECT101: + cornerHarris_kernel<<>>( + cols, rows, block_size, k, Dx, Dy, dst, + BrdReflect101(cols), BrdReflect101(rows)); + break; + } cudaSafeCall(cudaThreadSynchronize()); } /////////////////////////////////////////// Corner Min Eigen Val ///////////////////////////////////////////////// - __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, - const PtrStep Dx, const PtrStep Dy, PtrStep dst) + template + __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, const PtrStep Dx, + const PtrStep Dy, PtrStep dst, B border_row, B border_col) { const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -530,22 +569,21 @@ namespace cv { namespace gpu { namespace imgproc float b = 0.f; float c = 0.f; - int offset1 = -(block_size / 2); - int offset2 = offset1 + block_size; - - unsigned int j_begin = max(x + offset1, 0); - unsigned int i_begin = max(y + offset1, 0); - unsigned int j_end = min(x + offset2, cols); - unsigned int i_end = min(y + offset2, rows); + const int ibegin = y - (block_size / 2); + const int jbegin = x - (block_size / 2); + const int iend = ibegin + block_size; + const int jend = jbegin + block_size; - for (unsigned int i = i_begin; i < i_end; ++i) + for (int i = ibegin; i < iend; ++i) { - const float* dx_row = (const float*)Dx.ptr(i); - const float* dy_row = (const float*)Dy.ptr(i); - for (unsigned int j = j_begin; j < j_end; ++j) + int y = border_col.idx(i); + const float* dx_row = (const float*)Dx.ptr(y); + const float* dy_row = (const float*)Dy.ptr(y); + for (int j = jbegin; j < jend; ++j) { - float dx = dx_row[j]; - float dy = dy_row[j]; + int x = border_row.idx(j); + float dx = dx_row[x]; + float dy = dy_row[x]; a += dx * dx; b += dx * dy; c += dy * dy; @@ -558,7 +596,8 @@ namespace cv { namespace gpu { namespace imgproc } } - void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst) + void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, + int border_type) { const int rows = Dx.rows; const int cols = Dx.cols; @@ -566,7 +605,14 @@ namespace cv { namespace gpu { namespace imgproc dim3 threads(32, 8); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - cornerMinEigenVal_kernel<<>>(cols, rows, block_size, Dx, Dy, dst); + switch (border_type) + { + case BORDER_REFLECT101: + cornerMinEigenVal_kernel<<>>( + cols, rows, block_size, Dx, Dy, dst, + BrdReflect101(cols), BrdReflect101(rows)); + break; + } cudaSafeCall(cudaThreadSynchronize()); } }}} diff --git a/modules/gpu/src/cuda/linear_filters_beta.cu b/modules/gpu/src/cuda/linear_filters_beta.cu new file mode 100644 index 0000000..c951daf --- /dev/null +++ b/modules/gpu/src/cuda/linear_filters_beta.cu @@ -0,0 +1,264 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/gpu/devmem2d.hpp" +#include "safe_call.hpp" +#include "cuda_shared.hpp" +#include "border_interpolate.hpp" + +#define BLOCK_DIM_X 16 +#define BLOCK_DIM_Y 16 +#define MAX_KERNEL_SIZE 16 + +using namespace cv::gpu; + + +namespace cv { namespace gpu { namespace linear_filters { + + +// Global linear kernel data storage +__constant__ float ckernel[MAX_KERNEL_SIZE]; + + +void loadKernel(const float* kernel, int ksize) +{ + cudaSafeCall(cudaMemcpyToSymbol(ckernel, kernel, ksize * sizeof(float))); +} + + +template +__global__ void rowFilterKernel(const DevMem2D_ src, PtrStepf dst, + int anchor, B border) +{ + __shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3]; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + float* srow = smem + threadIdx.y * blockDim.x * 3; + + if (y < src.rows) + { + const T* src_row = src.ptr(y); + + srow[threadIdx.x + blockDim.x] = border.at_high(x, src_row); + + srow[threadIdx.x] = border.at_low(x - blockDim.x, src_row); + + srow[threadIdx.x + (blockDim.x << 1)] = border.at_high(x + blockDim.x, src_row); + + __syncthreads(); + + if (x < src.cols) + { + srow += threadIdx.x + blockDim.x - anchor; + + float sum = 0.f; + for (int i = 0; i < ksize; ++i) + sum += srow[i] * ckernel[i]; + + dst.ptr(y)[x] = sum; + } + } +} + + +template +void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor) +{ + dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); + dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y)); + + B border(src.cols); + + if (!border.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1)) + cv::gpu::error("rowFilterCaller: can't use specified border extrapolation, image is too small, " + "try bigger image or another border extrapolation mode", __FILE__, __LINE__); + + rowFilterKernel<<>>(src, dst, anchor, border); + cudaSafeCall(cudaThreadSynchronize()); +} + + +template +void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, + const float* kernel, int ksize) +{ + typedef void (*Caller)(const DevMem2D_, PtrStepf, int); + + static const Caller callers[] = + { + 0, rowFilterCaller, + rowFilterCaller, rowFilterCaller, + rowFilterCaller, rowFilterCaller, + rowFilterCaller, rowFilterCaller, + rowFilterCaller, rowFilterCaller, + rowFilterCaller, rowFilterCaller, + rowFilterCaller, rowFilterCaller, + rowFilterCaller, rowFilterCaller + }; + + loadKernel(kernel, ksize); + callers[ksize](src, dst, anchor); +} + + +template +void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, + const float* kernel, int ksize, int brd_interp) +{ + typedef void (*Caller)(const DevMem2D_, PtrStepf, int, const float*, int); + + static const Caller callers[] = + { + rowFilterCaller > + }; + + callers[brd_interp](src, dst, anchor, kernel, ksize); +} + + +template void rowFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); +template void rowFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); + + +template +__global__ void colFilterKernel(const DevMem2D_ src, PtrStepf dst, int anchor, B border) +{ + __shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3]; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + const int smem_step = blockDim.x; + + float* scol = smem + threadIdx.x; + + if (x < src.cols) + { + const T* src_col = src.data + x; + + scol[(threadIdx.y + blockDim.y) * smem_step] = border.at_high(y, src_col); + + scol[threadIdx.y * smem_step] = border.at_low(y - blockDim.y, src_col); + + scol[(threadIdx.y + (blockDim.y << 1)) * smem_step] = border.at_high(y + blockDim.y, src_col); + + __syncthreads(); + + if (y < src.rows) + { + scol += (threadIdx.y + blockDim.y - anchor)* smem_step; + + float sum = 0.f; + for(int i = 0; i < ksize; ++i) + sum += scol[i * smem_step] * ckernel[i]; + + dst.ptr(y)[x] = sum; + } + } +} + + +template +void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor) +{ + dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); + dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y)); + + B border(src.rows, src.step / src.elem_size); + + if (src.step - border.step * src.elem_size != 0) + cv::gpu::error("colFilterCaller: src step must be multiple of its element size", + __FILE__, __LINE__); + + if (!border.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1)) + cv::gpu::error("colFilterCaller: can't use specified border extrapolation, image is too small, " + "try bigger image or another border extrapolation mode", __FILE__, __LINE__); + + colFilterKernel<<>>(src, dst, anchor, border); + cudaSafeCall(cudaThreadSynchronize()); +} + + +template +void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, + const float* kernel, int ksize) +{ + typedef void (*Caller)(const DevMem2D_, PtrStepf, int); + + static const Caller callers[] = + { + 0, colFilterCaller, + colFilterCaller, colFilterCaller, + colFilterCaller, colFilterCaller, + colFilterCaller, colFilterCaller, + colFilterCaller, colFilterCaller, + colFilterCaller, colFilterCaller, + colFilterCaller, colFilterCaller, + colFilterCaller, colFilterCaller + }; + + loadKernel(kernel, ksize); + callers[ksize](src, dst, anchor); +} + + +template +void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, + const float* kernel, int ksize, int brd_interp) +{ + typedef void (*Caller)(const DevMem2D_, PtrStepf, int, const float*, int); + + static const Caller callers[] = + { + colFilterCaller > + }; + + callers[brd_interp](src, dst, anchor, kernel, ksize); +} + + +template void colFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); +template void colFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); + +}}} \ No newline at end of file diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 26a464e..9becef0 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -68,8 +68,8 @@ void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); } void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*) { throw_nogpu(); } void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu(); } -void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double) { throw_nogpu(); } -void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); } +void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); } +void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -861,49 +861,119 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4 namespace cv { namespace gpu { namespace imgproc { - void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst); - void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst); + void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst); + void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type); + void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type); + +}}} + +namespace cv { namespace gpu { namespace linear_filters { + + template + void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, const float* kernel, + int ksize, int brd_interp); + + template + void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, const float* kernel, + int ksize, int brd_interp); }}} namespace { - void computeGradients(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize) - { - CV_Assert(src.type() == CV_32F); - + template + void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType) + { double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize; - if (ksize < 0) scale *= 2.; + if (ksize < 0) + scale *= 2.; + if (src.depth() == CV_8U) + scale *= 255.; scale = 1./scale; - if (ksize > 0) - { - Sobel(src, Dx, CV_32F, 1, 0, ksize, scale); - Sobel(src, Dy, CV_32F, 0, 1, ksize, scale); - } - else + GpuMat tmp_buf(src.size(), CV_32F); + Dx.create(src.size(), CV_32F); + Dy.create(src.size(), CV_32F); + Mat kx, ky; + + getDerivKernels(kx, ky, 1, 0, ksize, false, CV_32F); + kx = kx.reshape(1, 1) * scale; + ky = ky.reshape(1, 1); + + linear_filters::rowFilterCaller( + src, tmp_buf, kx.cols >> 1, kx.ptr(0), kx.cols, + gpuBorderType); + + linear_filters::colFilterCaller( + tmp_buf, Dx, ky.cols >> 1, ky.ptr(0), ky.cols, + gpuBorderType); + + getDerivKernels(kx, ky, 0, 1, ksize, false, CV_32F); + kx = kx.reshape(1, 1); + ky = ky.reshape(1, 1) * scale; + + linear_filters::rowFilterCaller( + src, tmp_buf, kx.cols >> 1, kx.ptr(0), kx.cols, + gpuBorderType); + + linear_filters::colFilterCaller( + tmp_buf, Dy, ky.cols >> 1, ky.ptr(0), ky.cols, + gpuBorderType); + } + + void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType) + { + switch (src.type()) { - Scharr(src, Dx, CV_32F, 1, 0, scale); - Scharr(src, Dy, CV_32F, 0, 1, scale); + case CV_8U: + extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); + break; + case CV_32F: + extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); + break; + default: + CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix"); } } -} -void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k) +} // Anonymous namespace + +void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType) { + int gpuBorderType; + switch (borderType) + { + case cv::BORDER_REFLECT101: + gpuBorderType = cv::gpu::BORDER_REFLECT101; + break; + default: + CV_Error(CV_StsBadArg, "cornerHarris: unsupported border type"); + } + GpuMat Dx, Dy; - computeGradients(src, Dx, Dy, blockSize, ksize); + extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); dst.create(src.size(), CV_32F); - imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst); + imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType); } -void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize) -{ +void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType) +{ + int gpuBorderType; + switch (borderType) + { + case cv::BORDER_REFLECT101: + gpuBorderType = cv::gpu::BORDER_REFLECT101; + break; + default: + CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border type"); + } + GpuMat Dx, Dy; - computeGradients(src, Dx, Dy, blockSize, ksize); + extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); dst.create(src.size(), CV_32F); - imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst); + imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType); } #endif /* !defined (HAVE_CUDA) */ + diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp index f621782..ead568a 100644 --- a/tests/gpu/src/imgproc_gpu.cpp +++ b/tests/gpu/src/imgproc_gpu.cpp @@ -616,9 +616,11 @@ struct CV_GpuCornerHarrisTest: CvTest { for (int i = 0; i < 5; ++i) { - int rows = 10 + rand() % 300, cols = 10 + rand() % 300; + int rows = 25 + rand() % 300, cols = 25 + rand() % 300; if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return; if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, -1)) return; + if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return; + if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, -1)) return; } } catch (const Exception& e) @@ -634,22 +636,22 @@ struct CV_GpuCornerHarrisTest: CvTest cv::Mat src(rows, cols, depth); if (depth == CV_32F) rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1)); + else if (depth == CV_8U) + rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256)); double k = 0.1; - int borderType = BORDER_DEFAULT; + int borderType = BORDER_REFLECT101; cv::Mat dst_gold; cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType); cv::gpu::GpuMat dst; - cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k); - - int asize = apertureSize > 0 ? apertureSize : 3; + cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType); cv::Mat dsth = dst; - for (int i = max(blockSize, asize) + 2; i < dst.rows - max(blockSize, asize) - 2; ++i) + for (int i = 0; i < dst.rows; ++i) { - for (int j = max(blockSize, asize) + 2; j < dst.cols - max(blockSize, asize) - 2; ++j) + for (int j = 0; j < dst.cols; ++j) { float a = dst_gold.at(i, j); float b = dsth.at(i, j); @@ -678,9 +680,11 @@ struct CV_GpuCornerMinEigenValTest: CvTest { for (int i = 0; i < 3; ++i) { - int rows = 10 + rand() % 300, cols = 10 + rand() % 300; + int rows = 25 + rand() % 300, cols = 25 + rand() % 300; if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, -1)) return; if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return; + if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, -1)) return; + if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return; } } catch (const Exception& e) @@ -696,25 +700,25 @@ struct CV_GpuCornerMinEigenValTest: CvTest cv::Mat src(rows, cols, depth); if (depth == CV_32F) rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1)); + else if (depth == CV_8U) + rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256)); - int borderType = BORDER_DEFAULT; + int borderType = BORDER_REFLECT101; cv::Mat dst_gold; cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType); cv::gpu::GpuMat dst; - cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize); - - int asize = apertureSize > 0 ? apertureSize : 3; + cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType); cv::Mat dsth = dst; - for (int i = max(blockSize, asize) + 2; i < dst.rows - max(blockSize, asize) - 2; ++i) + for (int i = 0; i < dst.rows; ++i) { - for (int j = max(blockSize, asize) + 2; j < dst.cols - max(blockSize, asize) - 2; ++j) + for (int j = 0; j < dst.cols; ++j) { float a = dst_gold.at(i, j); float b = dsth.at(i, j); - if (fabs(a - b) > 1e-3f) + if (fabs(a - b) > 1e-2f) { ts->printf(CvTS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize); ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); -- 2.7.4