changed buffer type in linear filters to float.
added support of 1 channel image to linear filters.
added support of BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border type to gpu linear filters.
minor fix in tests.
update comments in gpu.hpp.
CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c);\r
\r
//! transposes the matrix\r
- //! supports only CV_8UC1 type\r
+ //! supports CV_8UC1, CV_8SC1, CV_8UC4, CV_8SC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32FC1 type\r
CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst);\r
\r
//! computes element-wise absolute difference of two arrays (c = abs(a - b))\r
};\r
\r
//! returns the non-separable filter engine with the specified filter\r
- CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D);\r
+ CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D, int srcType, int dstType);\r
\r
//! returns the separable filter engine with the specified filters\r
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter, \r
- const Ptr<BaseColumnFilter_GPU>& columnFilter);\r
+ const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);\r
\r
//! returns horizontal 1D box filter\r
//! supports only CV_8UC1 source type and CV_32FC1 sum type\r
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, \r
const Point& anchor = Point(-1,-1));\r
\r
- //! returns the primitive row filter with the specified kernel\r
+ //! returns the primitive row filter with the specified kernel.\r
+ //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.\r
+ //! there are two version of algorithm: NPP and OpenCV.\r
+ //! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType,\r
+ //! otherwise calls OpenCV version.\r
+ //! NPP supports only BORDER_CONSTANT border type.\r
+ //! OpenCV version supports only CV_32F as buffer depth and \r
+ //! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.\r
CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, \r
- int anchor = -1);\r
-\r
- //! returns the primitive column filter with the specified kernel\r
+ int anchor = -1, int borderType = BORDER_CONSTANT);\r
+\r
+ //! returns the primitive column filter with the specified kernel.\r
+ //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.\r
+ //! there are two version of algorithm: NPP and OpenCV.\r
+ //! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType,\r
+ //! otherwise calls OpenCV version.\r
+ //! NPP supports only BORDER_CONSTANT border type.\r
+ //! OpenCV version supports only CV_32F as buffer depth and \r
+ //! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.\r
CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, \r
- int anchor = -1);\r
+ int anchor = -1, int borderType = BORDER_CONSTANT);\r
\r
//! returns the separable linear filter engine\r
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, \r
- const Mat& columnKernel, const Point& anchor = Point(-1,-1));\r
+ const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,\r
+ int columnBorderType = -1);\r
\r
//! returns filter engine for the generalized Sobel operator\r
- CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize);\r
+ CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, \r
+ int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);\r
\r
//! returns the Gaussian filter engine\r
- CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0);\r
+ CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, \r
+ int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);\r
\r
//! returns maximum filter\r
CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));\r
\r
//! applies separable 2D linear filter to the image\r
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, \r
- Point anchor = Point(-1,-1));\r
+ Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);\r
\r
//! applies generalized Sobel operator to the image\r
- CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1);\r
+ CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, \r
+ int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);\r
\r
//! applies the vertical or horizontal Scharr operator to the image\r
- CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1);\r
+ CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1, \r
+ int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);\r
\r
//! smooths the image using Gaussian filter.\r
- CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0);\r
+ CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0, \r
+ int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);\r
\r
//! applies Laplacian operator to the image\r
//! supports only ksize = 1 and ksize = 3\r
\r
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)\r
{\r
- CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 \r
+ CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8SC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 \r
|| src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1);\r
\r
dst.create( src.cols, src.rows, src.type() );\r
\r
- if (src.type() == CV_8UC1)\r
+ if (src.type() == CV_8UC1 || src.type() == CV_8SC1)\r
{\r
NppiSize sz;\r
sz.width = src.cols;\r
#include "opencv2/gpu/devmem2d.hpp"\r
#include "opencv2/gpu/device/saturate_cast.hpp"\r
#include "opencv2/gpu/device/vecmath.hpp"\r
+#include "opencv2/gpu/device/limits_gpu.hpp"\r
\r
#include "safe_call.hpp"\r
#include "internal_shared.hpp"\r
using namespace cv::gpu;\r
using namespace cv::gpu::device;\r
\r
-#ifndef FLT_MAX\r
-#define FLT_MAX 3.402823466e+30F\r
-#endif\r
+namespace cv \r
+{ \r
+ namespace gpu \r
+ {\r
+ namespace device\r
+ {\r
+ struct BrdReflect101 \r
+ {\r
+ explicit BrdReflect101(int len): last(len - 1) {}\r
+\r
+ __device__ int idx_low(int i) const\r
+ {\r
+ return abs(i);\r
+ }\r
+\r
+ __device__ int idx_high(int i) const \r
+ {\r
+ return last - abs(last - i);\r
+ }\r
+\r
+ __device__ int idx(int i) const\r
+ {\r
+ return abs(idx_high(i));\r
+ }\r
+\r
+ bool is_range_safe(int mini, int maxi) const \r
+ {\r
+ return -last <= mini && maxi <= 2 * last;\r
+ }\r
+\r
+ int last;\r
+ };\r
+ template <typename D>\r
+ struct BrdRowReflect101: BrdReflect101\r
+ {\r
+ explicit BrdRowReflect101(int len): BrdReflect101(len) {}\r
+\r
+ template <typename T>\r
+ __device__ D at_low(int i, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_low(i)]);\r
+ }\r
+\r
+ template <typename T>\r
+ __device__ D at_high(int i, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_high(i)]);\r
+ }\r
+ };\r
+ template <typename D>\r
+ struct BrdColReflect101: BrdReflect101\r
+ {\r
+ BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {}\r
+\r
+ template <typename T>\r
+ __device__ D at_low(int i, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_low(i) * step]);\r
+ }\r
+\r
+ template <typename T>\r
+ __device__ D at_high(int i, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_high(i) * step]);\r
+ }\r
+\r
+ int step;\r
+ };\r
+\r
+ struct BrdReplicate\r
+ {\r
+ explicit BrdReplicate(int len): last(len - 1) {}\r
+\r
+ __device__ int idx_low(int i) const\r
+ {\r
+ return max(i, 0);\r
+ }\r
+\r
+ __device__ int idx_high(int i) const \r
+ {\r
+ return min(i, last);\r
+ }\r
+\r
+ __device__ int idx(int i) const\r
+ {\r
+ return max(min(i, last), 0);\r
+ }\r
+\r
+ bool is_range_safe(int mini, int maxi) const \r
+ {\r
+ return true;\r
+ }\r
+\r
+ int last;\r
+ };\r
+ template <typename D>\r
+ struct BrdRowReplicate: BrdReplicate\r
+ {\r
+ explicit BrdRowReplicate(int len): BrdReplicate(len) {}\r
+\r
+ template <typename T>\r
+ __device__ D at_low(int i, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_low(i)]);\r
+ }\r
+\r
+ template <typename T>\r
+ __device__ D at_high(int i, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_high(i)]);\r
+ }\r
+ };\r
+ template <typename D>\r
+ struct BrdColReplicate: BrdReplicate\r
+ {\r
+ BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {}\r
+\r
+ template <typename T>\r
+ __device__ D at_low(int i, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_low(i) * step]);\r
+ }\r
+\r
+ template <typename T>\r
+ __device__ D at_high(int i, const T* data) const \r
+ {\r
+ return saturate_cast<D>(data[idx_high(i) * step]);\r
+ }\r
+ int step;\r
+ };\r
+\r
+ template <typename D>\r
+ struct BrdRowConstant\r
+ {\r
+ explicit BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {}\r
+\r
+ template <typename T>\r
+ __device__ D at_low(int i, const T* data) const \r
+ {\r
+ return i >= 0 ? saturate_cast<D>(data[i]) : val;\r
+ }\r
+\r
+ template <typename T>\r
+ __device__ D at_high(int i, const T* data) const \r
+ {\r
+ return i < len ? saturate_cast<D>(data[i]) : val;\r
+ }\r
+\r
+ bool is_range_safe(int mini, int maxi) const \r
+ {\r
+ return true;\r
+ }\r
+\r
+ int len;\r
+ D val;\r
+ };\r
+ template <typename D>\r
+ struct BrdColConstant\r
+ {\r
+ BrdColConstant(int len_, int step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}\r
+\r
+ template <typename T>\r
+ __device__ D at_low(int i, const T* data) const \r
+ {\r
+ return i >= 0 ? saturate_cast<D>(data[i * step]) : val;\r
+ }\r
+\r
+ template <typename T>\r
+ __device__ D at_high(int i, const T* data) const \r
+ {\r
+ return i < len ? saturate_cast<D>(data[i * step]) : val;\r
+ }\r
+\r
+ bool is_range_safe(int mini, int maxi) const \r
+ {\r
+ return true;\r
+ }\r
+\r
+ int len;\r
+ int step;\r
+ D val;\r
+ };\r
+ }\r
+ }\r
+}\r
\r
/////////////////////////////////////////////////////////////////////////////////////////////////\r
// Linear filters\r
\r
#define MAX_KERNEL_SIZE 16\r
+#define BLOCK_DIM_X 16\r
+#define BLOCK_DIM_Y 16\r
\r
namespace filter_krnls\r
{\r
\r
namespace filter_krnls\r
{\r
- template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>\r
- __global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)\r
+ template <typename T, size_t size> struct SmemType_\r
{\r
- __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];\r
- \r
- const int blockStartX = blockDim.x * blockIdx.x;\r
- const int blockStartY = blockDim.y * blockIdx.y;\r
+ typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t smem_t;\r
+ };\r
+ template <typename T> struct SmemType_<T, 4>\r
+ {\r
+ typedef T smem_t;\r
+ };\r
+ template <typename T> struct SmemType\r
+ {\r
+ typedef typename SmemType_<T, sizeof(T)>::smem_t smem_t;\r
+ };\r
\r
- const int threadX = blockStartX + threadIdx.x;\r
- const int prevThreadX = threadX - blockDim.x;\r
- const int nextThreadX = threadX + blockDim.x;\r
+ template <int ksize, typename T, typename D, typename B>\r
+ __global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep_<D> dst, int anchor, const B b)\r
+ {\r
+ typedef typename SmemType<T>::smem_t smem_t;\r
\r
- const int threadY = blockStartY + threadIdx.y;\r
+ __shared__ smem_t smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];\r
+ \r
+ const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;\r
+ const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;\r
\r
- T* sDataRow = smem + threadIdx.y * blockDim.x * 3;\r
+ smem_t* sDataRow = smem + threadIdx.y * BLOCK_DIM_X * 3;\r
\r
- if (threadY < height)\r
+ if (y < src.rows)\r
{\r
- const T* rowSrc = src + threadY * src_step;\r
-\r
- sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] : VecTraits<T>::all(0);\r
+ const T* rowSrc = src.ptr(y);\r
\r
- sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] : VecTraits<T>::all(0);\r
-\r
- sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] : VecTraits<T>::all(0);\r
+ sDataRow[threadIdx.x ] = b.at_low(x - BLOCK_DIM_X, rowSrc);\r
+ sDataRow[threadIdx.x + BLOCK_DIM_X ] = b.at_high(x, rowSrc);\r
+ sDataRow[threadIdx.x + BLOCK_DIM_X * 2] = b.at_high(x + BLOCK_DIM_X, rowSrc);\r
\r
__syncthreads();\r
\r
- if (threadX < width)\r
+ if (x < src.cols)\r
{\r
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t;\r
sum_t sum = VecTraits<sum_t>::all(0);\r
\r
- sDataRow += threadIdx.x + blockDim.x - anchor;\r
+ sDataRow += threadIdx.x + BLOCK_DIM_X - anchor;\r
\r
#pragma unroll\r
- for(int i = 0; i < KERNEL_SIZE; ++i)\r
+ for(int i = 0; i < ksize; ++i)\r
sum = sum + sDataRow[i] * cLinearKernel[i];\r
\r
- dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);\r
+ dst.ptr(y)[x] = saturate_cast<D>(sum);\r
}\r
}\r
}\r
\r
namespace cv { namespace gpu { namespace filters\r
{\r
- template <int KERNEL_SIZE, typename T, typename D>\r
+ template <int ksize, typename T, typename D, template<typename> class B>\r
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)\r
{\r
- const int BLOCK_DIM_X = 16;\r
- const int BLOCK_DIM_Y = 16;\r
-\r
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);\r
- dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));\r
+ dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));\r
\r
- filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.data, src.step/src.elemSize(), \r
- dst.data, dst.step/dst.elemSize(), anchor, src.cols, src.rows);\r
+ typedef typename filter_krnls::SmemType<T>::smem_t smem_t;\r
+ B<smem_t> b(src.cols);\r
+\r
+ if (!b.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1))\r
+ {\r
+ cv::gpu::error("linearRowFilter: can't use specified border extrapolation, image is too small, "\r
+ "try bigger image or another border extrapolation mode", __FILE__, __LINE__);\r
+ }\r
+\r
+ filter_krnls::linearRowFilter<ksize, T, D><<<grid, threads>>>(src, dst, anchor, b);\r
\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
template <typename T, typename D>\r
- void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type)\r
{\r
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);\r
- static const caller_t callers[] = \r
- {linearRowFilter_caller<0 , T, D>, linearRowFilter_caller<1 , T, D>, \r
- linearRowFilter_caller<2 , T, D>, linearRowFilter_caller<3 , T, D>, \r
- linearRowFilter_caller<4 , T, D>, linearRowFilter_caller<5 , T, D>, \r
- linearRowFilter_caller<6 , T, D>, linearRowFilter_caller<7 , T, D>, \r
- linearRowFilter_caller<8 , T, D>, linearRowFilter_caller<9 , T, D>, \r
- linearRowFilter_caller<10, T, D>, linearRowFilter_caller<11, T, D>, \r
- linearRowFilter_caller<12, T, D>, linearRowFilter_caller<13, T, D>, \r
- linearRowFilter_caller<14, T, D>, linearRowFilter_caller<15, T, D>};\r
-\r
+ static const caller_t callers[3][17] = \r
+ {\r
+ {\r
+ 0, \r
+ linearRowFilter_caller<1 , T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<2 , T, D, BrdRowReflect101>,\r
+ linearRowFilter_caller<3 , T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<4 , T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<5 , T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<6 , T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<7 , T, D, BrdRowReflect101>,\r
+ linearRowFilter_caller<8 , T, D, BrdRowReflect101>,\r
+ linearRowFilter_caller<9 , T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<10, T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<11, T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<12, T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<13, T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<14, T, D, BrdRowReflect101>,\r
+ linearRowFilter_caller<15, T, D, BrdRowReflect101>, \r
+ linearRowFilter_caller<16, T, D, BrdRowReflect101>,\r
+ }, \r
+ {\r
+ 0, \r
+ linearRowFilter_caller<1 , T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<2 , T, D, BrdRowReplicate>,\r
+ linearRowFilter_caller<3 , T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<4 , T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<5 , T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<6 , T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<7 , T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<8 , T, D, BrdRowReplicate>,\r
+ linearRowFilter_caller<9 , T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<10, T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<11, T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<12, T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<13, T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<14, T, D, BrdRowReplicate>,\r
+ linearRowFilter_caller<15, T, D, BrdRowReplicate>, \r
+ linearRowFilter_caller<16, T, D, BrdRowReplicate>,\r
+ }, \r
+ {\r
+ 0, \r
+ linearRowFilter_caller<1 , T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<2 , T, D, BrdRowConstant>,\r
+ linearRowFilter_caller<3 , T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<4 , T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<5 , T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<6 , T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<7 , T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<8 , T, D, BrdRowConstant>,\r
+ linearRowFilter_caller<9 , T, D, BrdRowConstant>,\r
+ linearRowFilter_caller<10, T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<11, T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<12, T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<13, T, D, BrdRowConstant>,\r
+ linearRowFilter_caller<14, T, D, BrdRowConstant>,\r
+ linearRowFilter_caller<15, T, D, BrdRowConstant>, \r
+ linearRowFilter_caller<16, T, D, BrdRowConstant>,\r
+ }\r
+ };\r
+ \r
loadLinearKernel(kernel, ksize);\r
\r
- callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);\r
+ callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);\r
}\r
\r
- template void linearRowFilter_gpu<uchar4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<uchar4, char4>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<char4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<char4, char4>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
-\r
- template void linearRowFilter_gpu<ushort2, ushort2>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<ushort2, short2>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<short2, ushort2>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<short2, short2>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
-\r
- template void linearRowFilter_gpu<int, int>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<int, float>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<float, int>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearRowFilter_gpu<float, float>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
+ template void linearRowFilter_gpu<uchar , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearRowFilter_gpu<uchar4, float4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearRowFilter_gpu<short , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);;\r
+ template void linearRowFilter_gpu<short2, float2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearRowFilter_gpu<int , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearRowFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
}}}\r
\r
namespace filter_krnls\r
{\r
- template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>\r
- __global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)\r
+ template <int ksize, typename T, typename D, typename B>\r
+ __global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep_<D> dst, int anchor, const B b)\r
{\r
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];\r
\r
- const int blockStartX = blockDim.x * blockIdx.x;\r
- const int blockStartY = blockDim.y * blockIdx.y;\r
-\r
- const int threadX = blockStartX + threadIdx.x;\r
-\r
- const int threadY = blockStartY + threadIdx.y;\r
- const int prevThreadY = threadY - blockDim.y;\r
- const int nextThreadY = threadY + blockDim.y;\r
-\r
- const int smem_step = blockDim.x;\r
+ const int x = BLOCK_DIM_X * blockIdx.x + threadIdx.x;\r
+ const int y = BLOCK_DIM_Y * blockIdx.y + threadIdx.y;\r
\r
T* sDataColumn = smem + threadIdx.x;\r
\r
- if (threadX < width)\r
+ if (x < src.cols)\r
{\r
- const T* colSrc = src + threadX;\r
+ const T* srcCol = src.ptr() + x;\r
\r
- sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] : VecTraits<T>::all(0);\r
-\r
- sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] : VecTraits<T>::all(0);\r
-\r
- sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] : VecTraits<T>::all(0);\r
+ sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol);\r
+ sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol);\r
+ sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol);\r
\r
__syncthreads();\r
\r
- if (threadY < height)\r
+ if (y < src.rows)\r
{\r
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t;\r
sum_t sum = VecTraits<sum_t>::all(0);\r
\r
- sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step;\r
+ sDataColumn += (threadIdx.y + BLOCK_DIM_Y - anchor) * BLOCK_DIM_X;\r
\r
#pragma unroll\r
- for(int i = 0; i < KERNEL_SIZE; ++i)\r
- sum = sum + sDataColumn[i * smem_step] * cLinearKernel[i];\r
+ for(int i = 0; i < ksize; ++i)\r
+ sum = sum + sDataColumn[i * BLOCK_DIM_X] * cLinearKernel[i];\r
\r
- dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);\r
+ dst.ptr(y)[x] = saturate_cast<D>(sum);\r
}\r
}\r
}\r
\r
namespace cv { namespace gpu { namespace filters\r
{\r
- template <int KERNEL_SIZE, typename T, typename D>\r
+ template <int ksize, typename T, typename D, template<typename> class B>\r
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)\r
{\r
- const int BLOCK_DIM_X = 16;\r
- const int BLOCK_DIM_Y = 16;\r
-\r
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);\r
- dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));\r
+ dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));\r
+\r
+ B<T> b(src.rows, src.step / src.elemSize());\r
\r
- filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.data, src.step/src.elemSize(), \r
- dst.data, dst.step/dst.elemSize(), anchor, src.cols, src.rows);\r
+ if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))\r
+ {\r
+ cv::gpu::error("linearColumnFilter: can't use specified border extrapolation, image is too small, "\r
+ "try bigger image or another border extrapolation mode", __FILE__, __LINE__);\r
+ }\r
+\r
+ filter_krnls::linearColumnFilter<ksize, T, D><<<grid, threads>>>(src, dst, anchor, b);\r
\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
template <typename T, typename D>\r
- void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type)\r
{\r
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);\r
- static const caller_t callers[] = \r
- {linearColumnFilter_caller<0 , T, D>, linearColumnFilter_caller<1 , T, D>, \r
- linearColumnFilter_caller<2 , T, D>, linearColumnFilter_caller<3 , T, D>, \r
- linearColumnFilter_caller<4 , T, D>, linearColumnFilter_caller<5 , T, D>, \r
- linearColumnFilter_caller<6 , T, D>, linearColumnFilter_caller<7 , T, D>, \r
- linearColumnFilter_caller<8 , T, D>, linearColumnFilter_caller<9 , T, D>, \r
- linearColumnFilter_caller<10, T, D>, linearColumnFilter_caller<11, T, D>, \r
- linearColumnFilter_caller<12, T, D>, linearColumnFilter_caller<13, T, D>, \r
- linearColumnFilter_caller<14, T, D>, linearColumnFilter_caller<15, T, D>};\r
-\r
+ static const caller_t callers[3][17] = \r
+ {\r
+ {\r
+ 0, \r
+ linearColumnFilter_caller<1 , T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<2 , T, D, BrdColReflect101>,\r
+ linearColumnFilter_caller<3 , T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<4 , T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<5 , T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<6 , T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<7 , T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<8 , T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<9 , T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<10, T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<11, T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<12, T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<13, T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<14, T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<15, T, D, BrdColReflect101>, \r
+ linearColumnFilter_caller<16, T, D, BrdColReflect101>, \r
+ }, \r
+ {\r
+ 0, \r
+ linearColumnFilter_caller<1 , T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<2 , T, D, BrdColReplicate>,\r
+ linearColumnFilter_caller<3 , T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<4 , T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<5 , T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<6 , T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<7 , T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<8 , T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<9 , T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<10, T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<11, T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<12, T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<13, T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<14, T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<15, T, D, BrdColReplicate>, \r
+ linearColumnFilter_caller<16, T, D, BrdColReplicate>, \r
+ }, \r
+ {\r
+ 0, \r
+ linearColumnFilter_caller<1 , T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<2 , T, D, BrdColConstant>,\r
+ linearColumnFilter_caller<3 , T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<4 , T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<5 , T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<6 , T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<7 , T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<8 , T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<9 , T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<10, T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<11, T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<12, T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<13, T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<14, T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<15, T, D, BrdColConstant>, \r
+ linearColumnFilter_caller<16, T, D, BrdColConstant>, \r
+ }\r
+ };\r
+ \r
loadLinearKernel(kernel, ksize);\r
\r
- callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);\r
+ callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);\r
}\r
\r
- template void linearColumnFilter_gpu<uchar4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<uchar4, char4>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<char4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<char4, char4>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
-\r
- template void linearColumnFilter_gpu<ushort2, ushort2>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<ushort2, short2>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<short2, ushort2>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<short2, short2>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
-\r
- template void linearColumnFilter_gpu<int, int>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<int, float>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<float, int>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
- template void linearColumnFilter_gpu<float, float>(const DevMem2D&, const DevMem2D&, const float[], int , int);\r
+ template void linearColumnFilter_gpu<float , uchar >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearColumnFilter_gpu<float , short >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearColumnFilter_gpu<float2, short2>(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearColumnFilter_gpu<float , int >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
+ template void linearColumnFilter_gpu<float , float >(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
}}}\r
\r
/////////////////////////////////////////////////////////////////////////////////////////////////\r
}\r
}\r
\r
- float minimum = FLT_MAX;\r
+ float minimum = numeric_limits_gpu<float>::max();\r
int id = 0;\r
\r
if (cost[0] < minimum)\r
enum \r
{\r
BORDER_REFLECT101_GPU = 0,\r
- BORDER_REPLICATE_GPU\r
+ BORDER_REPLICATE_GPU,\r
+ BORDER_CONSTANT_GPU\r
};\r
\r
// Converts CPU border extrapolation mode into GPU internal analogue.\r
\r
#if !defined (HAVE_CUDA)\r
\r
-Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }\r
Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }\r
Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }\r
-Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }\r
+Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
\r
void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }\r
void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }\r
void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point) { throw_nogpu(); }\r
-void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point) { throw_nogpu(); }\r
-void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double) { throw_nogpu(); }\r
-void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double) { throw_nogpu(); }\r
-void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double) { throw_nogpu(); }\r
+void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_nogpu(); }\r
+void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_nogpu(); }\r
+void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double, int, int) { throw_nogpu(); }\r
+void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double, int, int) { throw_nogpu(); }\r
void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double) { throw_nogpu(); }\r
\r
#else\r
class Filter2DEngine_GPU : public FilterEngine_GPU\r
{\r
public:\r
- Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_) : filter2D(filter2D_) {}\r
+ Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int srcType_, int dstType_) : \r
+ filter2D(filter2D_), srcType(srcType_), dstType(dstType_)\r
+ {}\r
\r
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
{\r
+ CV_Assert(src.type() == srcType);\r
+\r
Size src_size = src.size();\r
\r
- dst.create(src_size, src.type());\r
+ dst.create(src_size, dstType);\r
dst = Scalar(0.0);\r
\r
normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);\r
}\r
\r
Ptr<BaseFilter_GPU> filter2D;\r
+ int srcType, dstType;\r
};\r
}\r
\r
-Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D)\r
+Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D, int srcType, int dstType)\r
{\r
- return Ptr<FilterEngine_GPU>(new Filter2DEngine_GPU(filter2D));\r
+ return Ptr<FilterEngine_GPU>(new Filter2DEngine_GPU(filter2D, srcType, dstType));\r
}\r
\r
////////////////////////////////////////////////////////////////////////////////////////////////////\r
{\r
public:\r
SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, \r
- const Ptr<BaseColumnFilter_GPU>& columnFilter_) :\r
- rowFilter(rowFilter_), columnFilter(columnFilter_)\r
+ const Ptr<BaseColumnFilter_GPU>& columnFilter_, int srcType_, int bufType_, int dstType_) :\r
+ rowFilter(rowFilter_), columnFilter(columnFilter_), \r
+ srcType(srcType_), bufType(bufType_), dstType(dstType_)\r
{\r
ksize = Size(rowFilter->ksize, columnFilter->ksize);\r
anchor = Point(rowFilter->anchor, columnFilter->anchor);\r
\r
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
{\r
+ CV_Assert(src.type() == srcType);\r
+\r
Size src_size = src.size();\r
- int src_type = src.type();\r
\r
- dst.create(src_size, src_type);\r
+ dst.create(src_size, dstType);\r
dst = Scalar(0.0);\r
- dstBuf.create(src_size, src_type);\r
+ dstBuf.create(src_size, bufType);\r
dstBuf = Scalar(0.0);\r
\r
normalizeROI(roi, ksize, anchor, src_size);\r
\r
- srcROI = src(roi);\r
- dstROI = dst(roi);\r
- dstBufROI = dstBuf(roi);\r
+ GpuMat srcROI = src(roi);\r
+ GpuMat dstROI = dst(roi);\r
+ GpuMat dstBufROI = dstBuf(roi);\r
\r
(*rowFilter)(srcROI, dstBufROI);\r
(*columnFilter)(dstBufROI, dstROI);\r
\r
Ptr<BaseRowFilter_GPU> rowFilter;\r
Ptr<BaseColumnFilter_GPU> columnFilter;\r
+ int srcType, bufType, dstType;\r
+\r
Size ksize;\r
Point anchor;\r
- GpuMat dstBuf;\r
- GpuMat srcROI;\r
- GpuMat dstROI;\r
- GpuMat dstBufROI;\r
+\r
+ GpuMat dstBuf; \r
};\r
}\r
\r
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter, \r
- const Ptr<BaseColumnFilter_GPU>& columnFilter)\r
+ const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType)\r
{\r
- return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));\r
+ return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter, srcType, bufType, dstType));\r
}\r
\r
////////////////////////////////////////////////////////////////////////////////////////////////////\r
Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, const Point& anchor)\r
{\r
Ptr<BaseFilter_GPU> boxFilter = getBoxFilter_GPU(srcType, dstType, ksize, anchor);\r
- return createFilter2D_GPU(boxFilter);\r
+ return createFilter2D_GPU(boxFilter, srcType, dstType);\r
}\r
\r
void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor)\r
class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU\r
{\r
public:\r
- MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int iters_) : \r
- Filter2DEngine_GPU(filter2D_), iters(iters_) {}\r
+ MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type, int iters_) : \r
+ Filter2DEngine_GPU(filter2D_, type, type), iters(iters_) {}\r
\r
virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
{\r
\r
Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor);\r
\r
- return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, iterations));\r
+ return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, type, iterations));\r
}\r
\r
namespace\r
\r
Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, ksize, anchor);\r
\r
- return createFilter2D_GPU(linearFilter);\r
+ return createFilter2D_GPU(linearFilter, srcType, dstType);\r
}\r
\r
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor)\r
namespace cv { namespace gpu { namespace filters\r
{\r
template <typename T, typename D>\r
- void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
\r
template <typename T, typename D>\r
- void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
}}}\r
\r
namespace\r
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI, \r
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);\r
\r
- typedef void (*gpuFilter1D_t)(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ typedef void (*gpuFilter1D_t)(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor, int brd_type);\r
\r
class NppLinearRowFilter : public BaseRowFilter_GPU\r
{\r
class GpuLinearRowFilter : public BaseRowFilter_GPU\r
{\r
public:\r
- GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_) : \r
- BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}\r
+ GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) : \r
+ BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}\r
\r
virtual void operator()(const GpuMat& src, GpuMat& dst)\r
{\r
- func(src, dst, kernel.ptr<float>(), ksize, anchor);\r
+ func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type);\r
}\r
\r
Mat kernel;\r
gpuFilter1D_t func;\r
+ int brd_type;\r
};\r
}\r
\r
-Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor)\r
+Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)\r
{\r
- using namespace cv::gpu::filters;\r
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};\r
- static const gpuFilter1D_t gpuFilter1D_callers[6][6] =\r
- {\r
- {linearRowFilter_gpu<uchar4, uchar4>,linearRowFilter_gpu<uchar4, char4>,0,0,0,0},\r
- {linearRowFilter_gpu<char4, uchar4>,linearRowFilter_gpu<char4, char4>,0,0,0,0},\r
- {0,0,linearRowFilter_gpu<ushort2, ushort2>,linearRowFilter_gpu<ushort2, short2>,0,0},\r
- {0,0,linearRowFilter_gpu<short2, ushort2>,linearRowFilter_gpu<short2, short2>,0,0},\r
- {0,0,0,0,linearRowFilter_gpu<int, int>, linearRowFilter_gpu<int, float>},\r
- {0,0,0,0,linearRowFilter_gpu<float, int>, linearRowFilter_gpu<float, float>}\r
- };\r
\r
if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4))\r
{\r
+ CV_Assert(borderType == BORDER_CONSTANT);\r
+\r
GpuMat gpu_row_krnl;\r
int nDivisor;\r
normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nDivisor, true);\r
return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor,\r
nppFilter1D_callers[CV_MAT_CN(srcType)]));\r
}\r
+ \r
+ CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);\r
+ int gpuBorderType;\r
+ CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
\r
- CV_Assert(srcType == CV_8UC4 || srcType == CV_8SC4 || srcType == CV_16UC2 || srcType == CV_16SC2 || srcType == CV_32SC1 || srcType == CV_32FC1);\r
- CV_Assert(bufType == CV_8UC4 || bufType == CV_8SC4 || bufType == CV_16UC2 || bufType == CV_16SC2 || bufType == CV_32SC1 || bufType == CV_32FC1);\r
+ CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_16SC1 || srcType == CV_16SC2 \r
+ || srcType == CV_32SC1 || srcType == CV_32FC1);\r
+\r
+ CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));\r
\r
Mat temp(rowKernel.size(), CV_32FC1);\r
rowKernel.convertTo(temp, CV_32FC1);\r
\r
int ksize = cont_krnl.cols;\r
\r
- CV_Assert(ksize < 16);\r
+ CV_Assert(ksize > 0 && ksize <= 16);\r
\r
normalizeAnchor(anchor, ksize);\r
\r
- return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl, \r
- gpuFilter1D_callers[CV_MAT_DEPTH(srcType)][CV_MAT_DEPTH(bufType)]));\r
+ gpuFilter1D_t func = 0;\r
+\r
+ switch (srcType)\r
+ {\r
+ case CV_8UC1:\r
+ func = filters::linearRowFilter_gpu<uchar, float>;\r
+ break;\r
+ case CV_8UC4:\r
+ func = filters::linearRowFilter_gpu<uchar4, float4>;\r
+ break;\r
+ case CV_16SC1:\r
+ func = filters::linearRowFilter_gpu<short, float>;\r
+ break;\r
+ case CV_16SC2:\r
+ func = filters::linearRowFilter_gpu<short2, float2>;\r
+ break;\r
+ case CV_32SC1:\r
+ func = filters::linearRowFilter_gpu<int, float>;\r
+ break;\r
+ case CV_32FC1:\r
+ func = filters::linearRowFilter_gpu<float, float>;\r
+ break;\r
+ }\r
+\r
+ return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl, func, gpuBorderType));\r
}\r
\r
namespace\r
class GpuLinearColumnFilter : public BaseColumnFilter_GPU\r
{\r
public:\r
- GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_) : \r
- BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}\r
+ GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) : \r
+ BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}\r
\r
virtual void operator()(const GpuMat& src, GpuMat& dst)\r
{\r
- func(src, dst, kernel.ptr<float>(), ksize, anchor);\r
+ func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type);\r
}\r
\r
Mat kernel;\r
gpuFilter1D_t func;\r
+ int brd_type;\r
};\r
}\r
\r
-Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor)\r
+Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)\r
{\r
- using namespace cv::gpu::filters;\r
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};\r
- static const gpuFilter1D_t gpuFilter1D_callers[6][6] =\r
- {\r
- {linearColumnFilter_gpu<uchar4, uchar4>,linearColumnFilter_gpu<uchar4, char4>,0,0,0,0},\r
- {linearColumnFilter_gpu<char4, uchar4>,linearColumnFilter_gpu<char4, char4>,0,0,0,0},\r
- {0,0,linearColumnFilter_gpu<ushort2, ushort2>,linearColumnFilter_gpu<ushort2, short2>,0,0},\r
- {0,0,linearColumnFilter_gpu<short2, ushort2>,linearColumnFilter_gpu<short2, short2>,0,0},\r
- {0,0,0,0,linearColumnFilter_gpu<int, int>, linearColumnFilter_gpu<int, float>},\r
- {0,0,0,0,linearColumnFilter_gpu<float, int>, linearColumnFilter_gpu<float, float>}\r
- };\r
\r
if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4))\r
{\r
+ CV_Assert(borderType == BORDER_CONSTANT);\r
+\r
GpuMat gpu_col_krnl;\r
int nDivisor;\r
normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nDivisor, true);\r
return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor, \r
nppFilter1D_callers[CV_MAT_CN(bufType)]));\r
}\r
+ \r
+ CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);\r
+ int gpuBorderType;\r
+ CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
+ \r
+ CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC1 || dstType == CV_16SC2 \r
+ || dstType == CV_32SC1 || dstType == CV_32FC1);\r
\r
- CV_Assert(dstType == CV_8UC4 || dstType == CV_8SC4 || dstType == CV_16UC2 || dstType == CV_16SC2 || dstType == CV_32SC1 || dstType == CV_32FC1);\r
- CV_Assert(bufType == CV_8UC4 || bufType == CV_8SC4 || bufType == CV_16UC2 || bufType == CV_16SC2 || bufType == CV_32SC1 || bufType == CV_32FC1);\r
+ CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));\r
\r
Mat temp(columnKernel.size(), CV_32FC1);\r
columnKernel.convertTo(temp, CV_32FC1);\r
\r
int ksize = cont_krnl.cols;\r
\r
- CV_Assert(ksize < 16);\r
+ CV_Assert(ksize > 0 && ksize <= 16);\r
\r
normalizeAnchor(anchor, ksize);\r
\r
- return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl, \r
- gpuFilter1D_callers[CV_MAT_DEPTH(bufType)][CV_MAT_DEPTH(dstType)]));\r
+ gpuFilter1D_t func = 0;\r
+\r
+ switch (dstType)\r
+ {\r
+ case CV_8UC1:\r
+ func = filters::linearColumnFilter_gpu<float, uchar>;\r
+ break;\r
+ case CV_8UC4:\r
+ func = filters::linearColumnFilter_gpu<float4, uchar4>;\r
+ break;\r
+ case CV_16SC1:\r
+ func = filters::linearColumnFilter_gpu<float, short>;\r
+ break;\r
+ case CV_16SC2:\r
+ func = filters::linearColumnFilter_gpu<float2, short2>;\r
+ break;\r
+ case CV_32SC1:\r
+ func = filters::linearColumnFilter_gpu<float, int>;\r
+ break;\r
+ case CV_32FC1:\r
+ func = filters::linearColumnFilter_gpu<float, float>;\r
+ break;\r
+ }\r
+\r
+ return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl, func, gpuBorderType));\r
}\r
\r
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, \r
- const Point& anchor)\r
+ const Point& anchor, int rowBorderType, int columnBorderType)\r
{\r
+ if (columnBorderType < 0)\r
+ columnBorderType = rowBorderType;\r
+\r
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);\r
int cn = CV_MAT_CN(srcType);\r
- int bdepth = std::max(sdepth, ddepth);\r
+ int bdepth = CV_32F;\r
int bufType = CV_MAKETYPE(bdepth, cn);\r
\r
- Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x);\r
- Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y);\r
+ Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, rowBorderType);\r
+ Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, columnBorderType);\r
\r
- return createSeparableFilter_GPU(rowFilter, columnFilter);\r
+ return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType);\r
}\r
\r
-void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor)\r
+void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor, int rowBorderType, int columnBorderType)\r
{\r
if( ddepth < 0 )\r
ddepth = src.depth();\r
\r
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));\r
\r
- Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor);\r
- f->apply(src, dst);\r
+ Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowBorderType, columnBorderType);\r
+ f->apply(src, dst, Rect(0, 0, src.cols, src.rows));\r
}\r
\r
////////////////////////////////////////////////////////////////////////////////////////////////////\r
// Deriv Filter\r
\r
-Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize)\r
+Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int rowBorderType, int columnBorderType)\r
{\r
Mat kx, ky;\r
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);\r
- return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1));\r
+ return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);\r
}\r
\r
-void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale)\r
+void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale, int rowBorderType, int columnBorderType)\r
{\r
Mat kx, ky;\r
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);\r
ky *= scale;\r
}\r
\r
- sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1));\r
+ sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);\r
}\r
\r
-void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale)\r
+void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale, int rowBorderType, int columnBorderType)\r
{\r
Mat kx, ky;\r
getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);\r
ky *= scale;\r
}\r
\r
- sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1));\r
+ sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);\r
}\r
\r
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale)\r
////////////////////////////////////////////////////////////////////////////////////////////////////\r
// Gaussian Filter\r
\r
-Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2)\r
+Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType)\r
{ \r
int depth = CV_MAT_DEPTH(type);\r
\r
else\r
ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) );\r
\r
- return createSeparableLinearFilter_GPU(type, type, kx, ky);\r
+ return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1,-1), rowBorderType, columnBorderType);\r
}\r
\r
-void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2)\r
+void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType)\r
{\r
if (ksize.width == 1 && ksize.height == 1)\r
{\r
\r
dst.create(src.size(), src.type());\r
\r
- Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2);\r
- f->apply(src, dst);\r
+ Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, rowBorderType, columnBorderType);\r
+ f->apply(src, dst, Rect(0, 0, src.cols, src.rows));\r
}\r
\r
////////////////////////////////////////////////////////////////////////////////////////////////////\r
gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;\r
return true;\r
}\r
+ \r
+ if (cpuBorderType == cv::BORDER_CONSTANT)\r
+ {\r
+ gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;\r
+ return true;\r
+ }\r
\r
return false;\r
}\r
{ \r
typedef uchar elem_t; \r
enum {cn=1};\r
- static __device__ uchar all(uchar v) {return v;}\r
+ static __device__ __host__ uchar all(uchar v) {return v;}\r
+ static __device__ __host__ uchar make(uchar x) {return x;}\r
};\r
template<> struct VecTraits<uchar1> \r
{ \r
typedef uchar elem_t; \r
enum {cn=1};\r
- static __device__ uchar1 all(uchar v) {return make_uchar1(v);}\r
+ static __device__ __host__ uchar1 all(uchar v) {return make_uchar1(v);}\r
+ static __device__ __host__ uchar1 make(uchar x) {return make_uchar1(x);}\r
};\r
template<> struct VecTraits<uchar2> \r
{ \r
typedef uchar elem_t; \r
enum {cn=2}; \r
- static __device__ uchar2 all(uchar v) {return make_uchar2(v, v);}\r
+ static __device__ __host__ uchar2 all(uchar v) {return make_uchar2(v, v);}\r
+ static __device__ __host__ uchar2 make(uchar x, uchar y) {return make_uchar2(x, y);}\r
};\r
template<> struct VecTraits<uchar3> \r
{ \r
typedef uchar elem_t; \r
enum {cn=3}; \r
- static __device__ uchar3 all(uchar v) {return make_uchar3(v, v, v);}\r
+ static __device__ __host__ uchar3 all(uchar v) {return make_uchar3(v, v, v);}\r
+ static __device__ __host__ uchar3 make(uchar x, uchar y, uchar z) {return make_uchar3(x, y, z);}\r
};\r
template<> struct VecTraits<uchar4> \r
{ \r
typedef uchar elem_t; \r
enum {cn=4}; \r
- static __device__ uchar4 all(uchar v) {return make_uchar4(v, v, v, v);}\r
+ static __device__ __host__ uchar4 all(uchar v) {return make_uchar4(v, v, v, v);}\r
+ static __device__ __host__ uchar4 make(uchar x, uchar y, uchar z, uchar w) {return make_uchar4(x, y, z, w);}\r
};\r
\r
template<> struct VecTraits<char> \r
{ \r
typedef char elem_t; \r
enum {cn=1}; \r
- static __device__ char all(char v) {return v;}\r
+ static __device__ __host__ char all(char v) {return v;}\r
+ static __device__ __host__ char make(char x) {return x;}\r
};\r
template<> struct VecTraits<char1> \r
{ \r
typedef char elem_t; \r
enum {cn=1}; \r
- static __device__ char1 all(char v) {return make_char1(v);}\r
+ static __device__ __host__ char1 all(char v) {return make_char1(v);}\r
+ static __device__ __host__ char1 make(char x) {return make_char1(x);}\r
};\r
template<> struct VecTraits<char2> \r
{ \r
typedef char elem_t; \r
enum {cn=2}; \r
- static __device__ char2 all(char v) {return make_char2(v, v);}\r
+ static __device__ __host__ char2 all(char v) {return make_char2(v, v);}\r
+ static __device__ __host__ char2 make(char x, char y) {return make_char2(x, y);}\r
};\r
template<> struct VecTraits<char3> \r
{ \r
typedef char elem_t; \r
enum {cn=3}; \r
- static __device__ char3 all(char v) {return make_char3(v, v, v);}\r
+ static __device__ __host__ char3 all(char v) {return make_char3(v, v, v);}\r
+ static __device__ __host__ char3 make(char x, char y, char z) {return make_char3(x, y, z);}\r
};\r
template<> struct VecTraits<char4> \r
{ \r
typedef char elem_t; \r
enum {cn=4}; \r
- static __device__ char4 all(char v) {return make_char4(v, v, v, v);}\r
+ static __device__ __host__ char4 all(char v) {return make_char4(v, v, v, v);}\r
+ static __device__ __host__ char4 make(char x, char y, char z, char w) {return make_char4(x, y, z, w);}\r
};\r
\r
template<> struct VecTraits<ushort> \r
{ \r
typedef ushort elem_t; \r
enum {cn=1}; \r
- static __device__ ushort all(ushort v) {return v;}\r
+ static __device__ __host__ ushort all(ushort v) {return v;}\r
+ static __device__ __host__ ushort make(ushort x) {return x;}\r
};\r
template<> struct VecTraits<ushort1> \r
{ \r
typedef ushort elem_t; \r
enum {cn=1}; \r
- static __device__ ushort1 all(ushort v) {return make_ushort1(v);}\r
+ static __device__ __host__ ushort1 all(ushort v) {return make_ushort1(v);}\r
+ static __device__ __host__ ushort1 make(ushort x) {return make_ushort1(x);}\r
};\r
template<> struct VecTraits<ushort2> \r
{ \r
typedef ushort elem_t; \r
enum {cn=2}; \r
- static __device__ ushort2 all(ushort v) {return make_ushort2(v, v);}\r
+ static __device__ __host__ ushort2 all(ushort v) {return make_ushort2(v, v);}\r
+ static __device__ __host__ ushort2 make(ushort x, ushort y) {return make_ushort2(x, y);}\r
};\r
template<> struct VecTraits<ushort3> \r
{ \r
typedef ushort elem_t; \r
enum {cn=3}; \r
- static __device__ ushort3 all(ushort v) {return make_ushort3(v, v, v);}\r
+ static __device__ __host__ ushort3 all(ushort v) {return make_ushort3(v, v, v);}\r
+ static __device__ __host__ ushort3 make(ushort x, ushort y, ushort z) {return make_ushort3(x, y, z);}\r
};\r
template<> struct VecTraits<ushort4> \r
{ \r
typedef ushort elem_t; \r
enum {cn=4}; \r
- static __device__ ushort4 all(ushort v) {return make_ushort4(v, v, v, v);}\r
+ static __device__ __host__ ushort4 all(ushort v) {return make_ushort4(v, v, v, v);}\r
+ static __device__ __host__ ushort4 make(ushort x, ushort y, ushort z, ushort w) {return make_ushort4(x, y, z, w);}\r
};\r
\r
template<> struct VecTraits<short> \r
{ \r
typedef short elem_t; \r
enum {cn=1}; \r
- static __device__ short all(short v) {return v;}\r
+ static __device__ __host__ short all(short v) {return v;}\r
+ static __device__ __host__ short make(short x) {return x;}\r
};\r
template<> struct VecTraits<short1> \r
{ \r
typedef short elem_t; \r
enum {cn=1}; \r
- static __device__ short1 all(short v) {return make_short1(v);}\r
+ static __device__ __host__ short1 all(short v) {return make_short1(v);}\r
+ static __device__ __host__ short1 make(short x) {return make_short1(x);}\r
};\r
template<> struct VecTraits<short2> \r
{ \r
typedef short elem_t; \r
enum {cn=2}; \r
- static __device__ short2 all(short v) {return make_short2(v, v);}\r
+ static __device__ __host__ short2 all(short v) {return make_short2(v, v);}\r
+ static __device__ __host__ short2 make(short x, short y) {return make_short2(x, y);}\r
};\r
template<> struct VecTraits<short3> \r
{ \r
typedef short elem_t; \r
enum {cn=3}; \r
- static __device__ short3 all(short v) {return make_short3(v, v, v);}\r
+ static __device__ __host__ short3 all(short v) {return make_short3(v, v, v);}\r
+ static __device__ __host__ short3 make(short x, short y, short z) {return make_short3(x, y, z);}\r
};\r
template<> struct VecTraits<short4> \r
{ \r
typedef short elem_t; \r
enum {cn=4}; \r
- static __device__ short4 all(short v) {return make_short4(v, v, v, v);}\r
+ static __device__ __host__ short4 all(short v) {return make_short4(v, v, v, v);}\r
+ static __device__ __host__ short4 make(short x, short y, short z, short w) {return make_short4(x, y, z, w);}\r
};\r
\r
template<> struct VecTraits<uint> \r
{ \r
typedef uint elem_t; \r
enum {cn=1}; \r
- static __device__ uint all(uint v) {return v;}\r
+ static __device__ __host__ uint all(uint v) {return v;}\r
+ static __device__ __host__ uint make(uint x) {return x;}\r
};\r
template<> struct VecTraits<uint1> \r
{ \r
typedef uint elem_t; \r
enum {cn=1}; \r
- static __device__ uint1 all(uint v) {return make_uint1(v);}\r
+ static __device__ __host__ uint1 all(uint v) {return make_uint1(v);}\r
+ static __device__ __host__ uint1 make(uint x) {return make_uint1(x);}\r
};\r
template<> struct VecTraits<uint2> \r
{ \r
typedef uint elem_t; \r
enum {cn=2}; \r
- static __device__ uint2 all(uint v) {return make_uint2(v, v);}\r
+ static __device__ __host__ uint2 all(uint v) {return make_uint2(v, v);}\r
+ static __device__ __host__ uint2 make(uint x, uint y) {return make_uint2(x, y);}\r
};\r
template<> struct VecTraits<uint3> \r
{ \r
typedef uint elem_t; \r
enum {cn=3}; \r
- static __device__ uint3 all(uint v) {return make_uint3(v, v, v);}\r
+ static __device__ __host__ uint3 all(uint v) {return make_uint3(v, v, v);}\r
+ static __device__ __host__ uint3 make(uint x, uint y, uint z) {return make_uint3(x, y, z);}\r
};\r
template<> struct VecTraits<uint4> \r
{ \r
typedef uint elem_t; \r
enum {cn=4}; \r
- static __device__ uint4 all(uint v) {return make_uint4(v, v, v, v);}\r
+ static __device__ __host__ uint4 all(uint v) {return make_uint4(v, v, v, v);}\r
+ static __device__ __host__ uint4 make(uint x, uint y, uint z, uint w) {return make_uint4(x, y, z, w);}\r
};\r
\r
template<> struct VecTraits<int> \r
{ \r
typedef int elem_t; \r
enum {cn=1}; \r
- static __device__ int all(int v) {return v;}\r
+ static __device__ __host__ int all(int v) {return v;}\r
+ static __device__ __host__ int make(int x) {return x;}\r
};\r
template<> struct VecTraits<int1> \r
{ \r
typedef int elem_t; \r
enum {cn=1}; \r
- static __device__ int1 all(int v) {return make_int1(v);}\r
+ static __device__ __host__ int1 all(int v) {return make_int1(v);}\r
+ static __device__ __host__ int1 make(int x) {return make_int1(x);}\r
};\r
template<> struct VecTraits<int2> \r
{ \r
typedef int elem_t; \r
enum {cn=2}; \r
- static __device__ int2 all(int v) {return make_int2(v, v);}\r
+ static __device__ __host__ int2 all(int v) {return make_int2(v, v);}\r
+ static __device__ __host__ int2 make(int x, int y) {return make_int2(x, y);}\r
};\r
template<> struct VecTraits<int3> \r
{ \r
typedef int elem_t; \r
enum {cn=3}; \r
- static __device__ int3 all(int v) {return make_int3(v, v, v);}\r
+ static __device__ __host__ int3 all(int v) {return make_int3(v, v, v);}\r
+ static __device__ __host__ int3 make(int x, int y, int z) {return make_int3(x, y, z);}\r
};\r
template<> struct VecTraits<int4> \r
{ \r
typedef int elem_t; \r
enum {cn=4}; \r
- static __device__ int4 all(int v) {return make_int4(v, v, v, v);}\r
+ static __device__ __host__ int4 all(int v) {return make_int4(v, v, v, v);}\r
+ static __device__ __host__ int4 make(int x, int y, int z, int w) {return make_int4(x, y, z, w);}\r
};\r
\r
template<> struct VecTraits<float> \r
{ \r
typedef float elem_t; \r
enum {cn=1}; \r
- static __device__ float all(float v) {return v;}\r
+ static __device__ __host__ float all(float v) {return v;}\r
+ static __device__ __host__ float make(float x) {return x;}\r
};\r
template<> struct VecTraits<float1> \r
{ \r
typedef float elem_t; \r
enum {cn=1}; \r
- static __device__ float1 all(float v) {return make_float1(v);}\r
+ static __device__ __host__ float1 all(float v) {return make_float1(v);}\r
+ static __device__ __host__ float1 make(float x) {return make_float1(x);}\r
};\r
template<> struct VecTraits<float2> \r
{ \r
typedef float elem_t; \r
enum {cn=2}; \r
- static __device__ float2 all(float v) {return make_float2(v, v);}\r
+ static __device__ __host__ float2 all(float v) {return make_float2(v, v);}\r
+ static __device__ __host__ float2 make(float x, float y) {return make_float2(x, y);}\r
};\r
template<> struct VecTraits<float3> \r
{ \r
typedef float elem_t; \r
enum {cn=3}; \r
- static __device__ float3 all(float v) {return make_float3(v, v, v);}\r
+ static __device__ __host__ float3 all(float v) {return make_float3(v, v, v);}\r
+ static __device__ __host__ float3 make(float x, float y, float z) {return make_float3(x, y, z);}\r
};\r
template<> struct VecTraits<float4> \r
{ \r
typedef float elem_t;\r
enum {cn=4}; \r
- static __device__ float4 all(float v) {return make_float4(v, v, v, v);}\r
+ static __device__ __host__ float4 all(float v) {return make_float4(v, v, v, v);}\r
+ static __device__ __host__ float4 make(float x, float y, float z, float w) {return make_float4(x, y, z, w);}\r
};\r
\r
template <int cn, typename VecD> struct SatCast;\r
template <typename VecD> struct SatCast<1, VecD>\r
{\r
template <typename VecS>\r
- __device__ VecD operator()(const VecS& v)\r
+ static __device__ VecD cast(const VecS& v)\r
{\r
- VecD res; \r
- res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x);\r
- return res;\r
+ typedef typename VecTraits<VecD>::elem_t D;\r
+ return VecTraits<VecD>::make(saturate_cast<D>(v.x));\r
}\r
};\r
template <typename VecD> struct SatCast<2, VecD>\r
{\r
template <typename VecS>\r
- __device__ VecD operator()(const VecS& v)\r
+ static __device__ VecD cast(const VecS& v)\r
{\r
- VecD res; \r
- res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x);\r
- res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y);\r
- return res;\r
+ typedef typename VecTraits<VecD>::elem_t D;\r
+ return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));\r
}\r
};\r
template <typename VecD> struct SatCast<3, VecD>\r
{\r
template <typename VecS>\r
- __device__ VecD operator()(const VecS& v)\r
+ static __device__ VecD cast(const VecS& v)\r
{\r
- VecD res; \r
- res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x);\r
- res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y);\r
- res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.z);\r
- return res;\r
+ typedef typename VecTraits<VecD>::elem_t D;\r
+ return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));\r
}\r
};\r
template <typename VecD> struct SatCast<4, VecD>\r
{\r
template <typename VecS>\r
- __device__ VecD operator()(const VecS& v)\r
+ static __device__ VecD cast(const VecS& v)\r
{\r
- VecD res; \r
- res.x = saturate_cast< VecTraits<VecD>::elem_t >(v.x);\r
- res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.y);\r
- res.y = saturate_cast< VecTraits<VecD>::elem_t >(v.z);\r
- res.w = saturate_cast< VecTraits<VecD>::elem_t >(v.w);\r
- return res;\r
+ typedef typename VecTraits<VecD>::elem_t D;\r
+ return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));\r
}\r
};\r
\r
template <typename VecD, typename VecS> static __device__ VecD saturate_cast_caller(const VecS& v)\r
{\r
- SatCast<\r
- \r
- VecTraits<VecD>::cn, \r
- \r
- VecD\r
- > \r
- \r
- cast;\r
- return cast(v);\r
+ return SatCast<VecTraits<VecD>::cn, VecD>::cast(v);\r
}\r
\r
template<typename _Tp> static __device__ _Tp saturate_cast(const uchar1& v) {return saturate_cast_caller<_Tp>(v);}\r
\r
if (!compareMatches(matchesCPU, matchesGPU))\r
{\r
- ts->printf(CvTS::LOG, "Match FAIL");\r
+ ts->printf(CvTS::LOG, "Match FAIL\n");\r
ts->set_failed_test_info(CvTS::FAIL_MISMATCH);\r
return;\r
}\r
\r
if (!compareMatches(knnMatchesCPU, knnMatchesGPU))\r
{\r
- ts->printf(CvTS::LOG, "KNN Match FAIL");\r
+ ts->printf(CvTS::LOG, "KNN Match FAIL\n");\r
ts->set_failed_test_info(CvTS::FAIL_MISMATCH);\r
return;\r
}\r
\r
if (!compareMatches(radiusMatchesCPU, radiusMatchesGPU))\r
{\r
- ts->printf(CvTS::LOG, "Radius Match FAIL");\r
+ ts->printf(CvTS::LOG, "Radius Match FAIL\n");\r
ts->set_failed_test_info(CvTS::FAIL_MISMATCH);\r
return;\r
}\r
\r
double res = norm(m1ROI, m2ROI, NORM_INF);\r
\r
- if (res <= 1)\r
+ // Max difference (2.0) in GaussianBlur\r
+ if (res <= 2)\r
return CvTS::OK;\r
\r
ts->printf(CvTS::LOG, "Norm: %f\n", res);\r
\r
int test(const Mat& img)\r
{\r
- if (img.type() != CV_8UC1)\r
- return CvTS::OK;\r
int ksizes[] = {3, 5, 7};\r
int ksizes_num = sizeof(ksizes) / sizeof(int);\r
\r
cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]);\r
\r
GpuMat gpu1(img);\r
- gpu1.convertTo(gpu1, CV_32S);\r
GpuMat gpudst;\r
cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]);\r
- gpudst.convertTo(gpudst, CV_8U);\r
\r
if (CheckNorm(cpudst, gpudst, Size(ksizes[i], ksizes[i])) != CvTS::OK)\r
test_res = CvTS::FAIL_GENERIC;\r
\r
int test(const Mat& img)\r
{\r
- if (img.type() != CV_8UC1)\r
- return CvTS::OK;\r
-\r
int dx = 1, dy = 0;\r
\r
Mat cpudst;\r
cv::Scharr(img, cpudst, -1, dx, dy);\r
\r
GpuMat gpu1(img);\r
- gpu1.convertTo(gpu1, CV_32S);\r
GpuMat gpudst;\r
cv::gpu::Scharr(gpu1, gpudst, -1, dx, dy);\r
- gpudst.convertTo(gpudst, CV_8U);\r
- \r
+ \r
return CheckNorm(cpudst, gpudst, Size(3, 3));\r
}\r
};\r
{\r
cv::Size ksize(ksizes[i], ksizes[j]);\r
\r
- ts->printf(CvTS::LOG, "ksize = (%dx%d)\t", ksizes[i], ksizes[j]);\r
+ ts->printf(CvTS::LOG, "ksize = (%dx%d)\t\n", ksizes[i], ksizes[j]);\r
\r
Mat cpudst;\r
cv::GaussianBlur(img, cpudst, ksize, sigma1);\r