+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
-//\r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other materials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#include "opencv2/gpu/devmem2d.hpp"\r
-#include "opencv2/gpu/device/border_interpolate.hpp"\r
-#include "safe_call.hpp"\r
-#include "internal_shared.hpp"\r
-\r
-#define BLOCK_DIM_X 16\r
-#define BLOCK_DIM_Y 16\r
-#define MAX_KERNEL_SIZE 16\r
-\r
-using namespace cv::gpu;\r
-using namespace cv::gpu::device;\r
-\r
-namespace cv { namespace gpu { namespace linear_filters {\r
-\r
-\r
-// Global linear kernel data storage\r
-__constant__ float ckernel[MAX_KERNEL_SIZE];\r
-\r
-\r
-void loadKernel(const float* kernel, int ksize) \r
-{\r
- cudaSafeCall(cudaMemcpyToSymbol(ckernel, kernel, ksize * sizeof(float)));\r
-}\r
-\r
-\r
-template <typename T, typename B, int ksize>\r
-__global__ void rowFilterKernel(const DevMem2D_<T> src, PtrStepf dst, \r
- int anchor, B border)\r
-{\r
- __shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3];\r
-\r
- const int x = blockIdx.x * blockDim.x + threadIdx.x;\r
- const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
-\r
- float* srow = smem + threadIdx.y * blockDim.x * 3;\r
-\r
- if (y < src.rows)\r
- {\r
- const T* src_row = src.ptr(y);\r
-\r
- srow[threadIdx.x + blockDim.x] = border.at_high(x, src_row);\r
-\r
- srow[threadIdx.x] = border.at_low(x - blockDim.x, src_row);\r
-\r
- srow[threadIdx.x + (blockDim.x << 1)] = border.at_high(x + blockDim.x, src_row);\r
-\r
- __syncthreads();\r
-\r
- if (x < src.cols)\r
- {\r
- srow += threadIdx.x + blockDim.x - anchor;\r
-\r
- float sum = 0.f;\r
- for (int i = 0; i < ksize; ++i)\r
- sum += srow[i] * ckernel[i];\r
-\r
- dst.ptr(y)[x] = sum;\r
- }\r
- }\r
-}\r
-\r
-\r
-template <typename T, typename B, int ksize>\r
-void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor)\r
-{\r
- dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);\r
- dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y));\r
-\r
- B border(src.cols);\r
-\r
- if (!border.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1))\r
- cv::gpu::error("rowFilterCaller: can't use specified border extrapolation, image is too small, "\r
- "try bigger image or another border extrapolation mode", __FILE__, __LINE__);\r
-\r
- rowFilterKernel<T, B, ksize><<<grid, threads>>>(src, dst, anchor, border);\r
- cudaSafeCall(cudaThreadSynchronize());\r
-}\r
-\r
-\r
-template <typename T, typename B>\r
-void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, \r
- const float* kernel, int ksize)\r
-{\r
- typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int);\r
-\r
- static const Caller callers[] = \r
- { \r
- 0, rowFilterCaller<T, B, 1>, \r
- rowFilterCaller<T, B, 2>, rowFilterCaller<T, B, 3>, \r
- rowFilterCaller<T, B, 4>, rowFilterCaller<T, B, 5>, \r
- rowFilterCaller<T, B, 6>, rowFilterCaller<T, B, 7>, \r
- rowFilterCaller<T, B, 8>, rowFilterCaller<T, B, 9>, \r
- rowFilterCaller<T, B, 10>, rowFilterCaller<T, B, 11>, \r
- rowFilterCaller<T, B, 12>, rowFilterCaller<T, B, 13>, \r
- rowFilterCaller<T, B, 14>, rowFilterCaller<T, B, 15> \r
- };\r
-\r
- loadKernel(kernel, ksize);\r
- callers[ksize](src, dst, anchor);\r
-}\r
-\r
-\r
-template <typename T>\r
-void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, \r
- const float* kernel, int ksize, int brd_interp)\r
-{\r
- typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int, const float*, int);\r
-\r
- static const Caller callers[] = \r
- { \r
- rowFilterCaller<T, BrdRowReflect101<T> >,\r
- rowFilterCaller<T, BrdRowReplicate<T> >\r
- };\r
-\r
- callers[brd_interp](src, dst, anchor, kernel, ksize);\r
-}\r
-\r
-\r
-template void rowFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);\r
-template void rowFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);\r
-\r
-\r
-template <typename T, typename B, int ksize>\r
-__global__ void colFilterKernel(const DevMem2D_<T> src, PtrStepf dst, int anchor, B border)\r
-{\r
- __shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3];\r
-\r
- const int x = blockIdx.x * blockDim.x + threadIdx.x;\r
- const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
-\r
- const int smem_step = blockDim.x;\r
-\r
- float* scol = smem + threadIdx.x;\r
-\r
- if (x < src.cols)\r
- {\r
- const T* src_col = src.data + x;\r
-\r
- scol[(threadIdx.y + blockDim.y) * smem_step] = border.at_high(y, src_col);\r
-\r
- scol[threadIdx.y * smem_step] = border.at_low(y - blockDim.y, src_col);\r
-\r
- scol[(threadIdx.y + (blockDim.y << 1)) * smem_step] = border.at_high(y + blockDim.y, src_col);\r
-\r
- __syncthreads();\r
-\r
- if (y < src.rows)\r
- {\r
- scol += (threadIdx.y + blockDim.y - anchor)* smem_step;\r
-\r
- float sum = 0.f;\r
- for(int i = 0; i < ksize; ++i)\r
- sum += scol[i * smem_step] * ckernel[i];\r
-\r
- dst.ptr(y)[x] = sum;\r
- }\r
- }\r
-}\r
-\r
-\r
-template <typename T, typename B, int ksize>\r
-void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor)\r
-{\r
- dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);\r
- dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y));\r
-\r
- B border(src.rows, src.step / src.elem_size);\r
-\r
- if (src.step - border.step * src.elem_size != 0)\r
- cv::gpu::error("colFilterCaller: src step must be multiple of its element size", \r
- __FILE__, __LINE__);\r
-\r
- if (!border.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))\r
- cv::gpu::error("colFilterCaller: can't use specified border extrapolation, image is too small, "\r
- "try bigger image or another border extrapolation mode", __FILE__, __LINE__);\r
-\r
- colFilterKernel<T, B, ksize><<<grid, threads>>>(src, dst, anchor, border);\r
- cudaSafeCall(cudaThreadSynchronize());\r
-}\r
-\r
-\r
-template <typename T, typename B>\r
-void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, \r
- const float* kernel, int ksize)\r
-{\r
- typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int);\r
-\r
- static const Caller callers[] = \r
- { \r
- 0, colFilterCaller<T, B, 1>, \r
- colFilterCaller<T, B, 2>, colFilterCaller<T, B, 3>, \r
- colFilterCaller<T, B, 4>, colFilterCaller<T, B, 5>, \r
- colFilterCaller<T, B, 6>, colFilterCaller<T, B, 7>, \r
- colFilterCaller<T, B, 8>, colFilterCaller<T, B, 9>, \r
- colFilterCaller<T, B, 10>, colFilterCaller<T, B, 11>, \r
- colFilterCaller<T, B, 12>, colFilterCaller<T, B, 13>, \r
- colFilterCaller<T, B, 14>, colFilterCaller<T, B, 15> \r
- };\r
-\r
- loadKernel(kernel, ksize);\r
- callers[ksize](src, dst, anchor);\r
-}\r
-\r
-\r
-template <typename T>\r
-void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, \r
- const float* kernel, int ksize, int brd_interp)\r
-{\r
- typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int, const float*, int);\r
-\r
- static const Caller callers[] = \r
- { \r
- colFilterCaller<T, BrdColReflect101<T> >,\r
- colFilterCaller<T, BrdColReplicate<T> >\r
- };\r
-\r
- callers[brd_interp](src, dst, anchor, kernel, ksize);\r
-}\r
-\r
-\r
-template void colFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);\r
-template void colFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);\r
-\r
-}}} \r
\r
}}}\r
\r
-namespace cv { namespace gpu { namespace linear_filters {\r
-\r
- template <typename T>\r
- void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, const float* kernel, \r
- int ksize, int brd_interp);\r
-\r
- template <typename T>\r
- void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, const float* kernel, \r
- int ksize, int brd_interp);\r
-\r
-}}}\r
-\r
namespace \r
{\r
template <typename T>\r
- void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType)\r
+ void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
{ \r
double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;\r
if (ksize < 0) \r
GpuMat tmp_buf(src.size(), CV_32F);\r
Dx.create(src.size(), CV_32F);\r
Dy.create(src.size(), CV_32F);\r
- Mat kx, ky;\r
-\r
- getDerivKernels(kx, ky, 1, 0, ksize, false, CV_32F);\r
- kx = kx.reshape(1, 1) * scale;\r
- ky = ky.reshape(1, 1);\r
-\r
- linear_filters::rowFilterCaller<T>(\r
- src, tmp_buf, kx.cols >> 1, kx.ptr<float>(0), kx.cols,\r
- gpuBorderType);\r
\r
- linear_filters::colFilterCaller<float>(\r
- tmp_buf, Dx, ky.cols >> 1, ky.ptr<float>(0), ky.cols, \r
- gpuBorderType);\r
-\r
- getDerivKernels(kx, ky, 0, 1, ksize, false, CV_32F);\r
- kx = kx.reshape(1, 1);\r
- ky = ky.reshape(1, 1) * scale;\r
-\r
- linear_filters::rowFilterCaller<T>(\r
- src, tmp_buf, kx.cols >> 1, kx.ptr<float>(0), kx.cols, \r
- gpuBorderType);\r
-\r
- linear_filters::colFilterCaller<float>(\r
- tmp_buf, Dy, ky.cols >> 1, ky.ptr<float>(0), ky.cols, \r
- gpuBorderType);\r
+ if (ksize > 0)\r
+ {\r
+ Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, borderType);\r
+ Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, borderType);\r
+ }\r
+ else\r
+ {\r
+ Scharr(src, Dx, CV_32F, 1, 0, scale, borderType);\r
+ Scharr(src, Dy, CV_32F, 0, 1, scale, borderType);\r
+ }\r
}\r
\r
- void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType)\r
+ void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)\r
{\r
switch (src.type())\r
{\r
case CV_8U:\r
- extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, gpuBorderType);\r
+ extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, borderType);\r
break;\r
case CV_32F:\r
- extractCovData<float>(src, Dx, Dy, blockSize, ksize, gpuBorderType);\r
+ extractCovData<float>(src, Dx, Dy, blockSize, ksize, borderType);\r
break;\r
default:\r
CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");\r
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
\r
GpuMat Dx, Dy;\r
- extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType);\r
+ extractCovData(src, Dx, Dy, blockSize, ksize, borderType);\r
dst.create(src.size(), CV_32F);\r
imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);\r
}\r
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
\r
GpuMat Dx, Dy;\r
- extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); \r
+ extractCovData(src, Dx, Dy, blockSize, ksize, borderType); \r
dst.create(src.size(), CV_32F);\r
imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);\r
}\r