refactored gpu module.
// Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.\r
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile\r
\r
- template<typename T = unsigned char>\r
+ template <typename T>\r
struct DevMem2D_\r
{\r
typedef T elem_t;\r
int rows;\r
T* ptr;\r
size_t step;\r
+ size_t elem_step;\r
\r
- DevMem2D_() : cols(0), rows(0), ptr(0), step(0) {}\r
+ DevMem2D_() : cols(0), rows(0), ptr(0), step(0), elem_step(0) {}\r
\r
DevMem2D_(int rows_, int cols_, T *ptr_, size_t step_)\r
- : cols(cols_), rows(rows_), ptr(ptr_), step(step_) {}\r
+ : cols(cols_), rows(rows_), ptr(ptr_), step(step_), elem_step(step_ / sizeof(T)) {}\r
+ \r
+ template <typename U>\r
+ explicit DevMem2D_(const DevMem2D_<U>& d)\r
+ : cols(d.cols), rows(d.rows), ptr((T*)d.ptr), step(d.step), elem_step(d.step / sizeof(T)) {}\r
\r
size_t elemSize() const { return elem_size; }\r
};\r
\r
- typedef DevMem2D_<> DevMem2D;\r
+ typedef DevMem2D_<unsigned char> DevMem2D;\r
typedef DevMem2D_<float> DevMem2Df;\r
typedef DevMem2D_<int> DevMem2Di;\r
}\r
\r
//! returns the separable filter engine with the specified filters\r
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter, \r
- const Ptr<BaseColumnFilter_GPU>& columnFilter, bool rowFilterFirst = true);\r
+ const Ptr<BaseColumnFilter_GPU>& columnFilter);\r
\r
//! returns horizontal 1D box filter\r
//! supports only CV_8UC1 source type and CV_32FC1 sum type\r
//! only MORPH_ERODE and MORPH_DILATE are supported\r
//! supports CV_8UC1 and CV_8UC4 types\r
//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height\r
- CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const GpuMat& kernel, const Size& ksize, \r
+ CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, \r
Point anchor=Point(-1,-1));\r
\r
//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.\r
\r
//! returns 2D filter with the specified kernel\r
//! supports CV_8UC1 and CV_8UC4 types\r
- //! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height\r
- CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const GpuMat& kernel, const Size& ksize, \r
- Point anchor = Point(-1, -1), int nDivisor = 1);\r
+ CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, \r
+ Point anchor = Point(-1, -1));\r
\r
//! returns the non-separable linear filter engine\r
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, \r
const Point& anchor = Point(-1,-1));\r
\r
//! returns the primitive row filter with the specified kernel\r
- CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const GpuMat& rowKernel, \r
- int anchor = -1, int nDivisor = 1);\r
+ CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, \r
+ int anchor = -1);\r
\r
//! returns the primitive column filter with the specified kernel\r
- CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const GpuMat& columnKernel, \r
- int anchor = -1, int nDivisor = 1);\r
+ CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, \r
+ int anchor = -1);\r
\r
//! returns the separable linear filter engine\r
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, \r
- const Mat& columnKernel, const Point& anchor = Point(-1,-1), bool rowFilterFirst = true);\r
+ const Mat& columnKernel, const Point& anchor = Point(-1,-1));\r
\r
//! returns filter engine for the generalized Sobel operator\r
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize);\r
\r
//! applies separable 2D linear filter to the image\r
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, \r
- Point anchor = Point(-1,-1), bool rowFilterFirst = true);\r
+ Point anchor = Point(-1,-1));\r
\r
//! applies generalized Sobel operator to the image\r
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1);\r
////////////////////////////////////////////////////////////////////////\r
// compare\r
\r
-namespace cv { namespace gpu { namespace matrix_operations\r
+namespace cv { namespace gpu { namespace mathfunc\r
{\r
- void compare_ne_8u(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);\r
+ void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);\r
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);\r
}}}\r
\r
}\r
else\r
{\r
- matrix_operations::compare_ne_8u(src1, src2, dst);\r
+ mathfunc::compare_ne_8uc4(src1, src2, dst);\r
}\r
}\r
else\r
}\r
else\r
{\r
- matrix_operations::compare_ne_32f(src1, src2, dst);\r
+ mathfunc::compare_ne_32f(src1, src2, dst);\r
}\r
}\r
}\r
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
-//\r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other materials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#include "opencv2/gpu/devmem2d.hpp"\r
-#include "saturate_cast.hpp"\r
-#include "safe_call.hpp"\r
-\r
-using namespace cv::gpu;\r
-\r
-#ifndef FLT_MAX\r
-#define FLT_MAX 3.402823466e+30F\r
-#endif\r
-\r
-namespace bf_krnls\r
-{\r
- __constant__ float* ctable_color;\r
- __constant__ float* ctable_space;\r
- __constant__ size_t ctable_space_step;\r
-\r
- __constant__ int cndisp;\r
- __constant__ int cradius;\r
-\r
- __constant__ short cedge_disc;\r
- __constant__ short cmax_disc;\r
-}\r
-\r
-namespace cv { namespace gpu { namespace bf \r
-{\r
- void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)\r
- {\r
- cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );\r
- cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.ptr, sizeof(table_space.ptr)) );\r
- size_t table_space_step = table_space.step / sizeof(float);\r
- cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );\r
- \r
- cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cndisp, &ndisp, sizeof(int)) );\r
- cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cradius, &radius, sizeof(int)) );\r
- \r
- cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cedge_disc, &edge_disc, sizeof(short)) );\r
- cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cmax_disc, &max_disc, sizeof(short)) );\r
- }\r
-}}}\r
-\r
-namespace bf_krnls\r
-{\r
- template <int channels>\r
- struct DistRgbMax\r
- {\r
- static __device__ uchar calc(const uchar* a, const uchar* b)\r
- {\r
- uchar x = abs(a[0] - b[0]);\r
- uchar y = abs(a[1] - b[1]);\r
- uchar z = abs(a[2] - b[2]);\r
- return (max(max(x, y), z));\r
- }\r
- };\r
-\r
- template <>\r
- struct DistRgbMax<1>\r
- {\r
- static __device__ uchar calc(const uchar* a, const uchar* b)\r
- {\r
- return abs(a[0] - b[0]);\r
- }\r
- };\r
-\r
- template <int channels, typename T>\r
- __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)\r
- {\r
- const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
- const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);\r
-\r
- T dp[5];\r
-\r
- if (y > 0 && y < h - 1 && x > 0 && x < w - 1)\r
- {\r
- dp[0] = *(disp + (y ) * disp_step + x + 0);\r
- dp[1] = *(disp + (y-1) * disp_step + x + 0);\r
- dp[2] = *(disp + (y ) * disp_step + x - 1);\r
- dp[3] = *(disp + (y+1) * disp_step + x + 0);\r
- dp[4] = *(disp + (y ) * disp_step + x + 1);\r
-\r
- if(abs(dp[1] - dp[0]) >= cedge_disc || abs(dp[2] - dp[0]) >= cedge_disc || abs(dp[3] - dp[0]) >= cedge_disc || abs(dp[4] - dp[0]) >= cedge_disc) \r
- {\r
- const int ymin = max(0, y - cradius);\r
- const int xmin = max(0, x - cradius);\r
- const int ymax = min(h - 1, y + cradius);\r
- const int xmax = min(w - 1, x + cradius);\r
-\r
- float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};\r
-\r
- const uchar* ic = img + y * img_step + channels * x;\r
-\r
- for(int yi = ymin; yi <= ymax; yi++)\r
- {\r
- const T* disp_y = disp + yi * disp_step;\r
-\r
- for(int xi = xmin; xi <= xmax; xi++)\r
- {\r
- const uchar* in = img + yi * img_step + channels * xi;\r
-\r
- uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);\r
-\r
- const float weight = ctable_color[dist_rgb] * (ctable_space + abs(y-yi)* ctable_space_step)[abs(x-xi)];\r
-\r
- const T disp_reg = disp_y[xi];\r
-\r
- cost[0] += min(cmax_disc, abs(disp_reg - dp[0])) * weight;\r
- cost[1] += min(cmax_disc, abs(disp_reg - dp[1])) * weight;\r
- cost[2] += min(cmax_disc, abs(disp_reg - dp[2])) * weight;\r
- cost[3] += min(cmax_disc, abs(disp_reg - dp[3])) * weight;\r
- cost[4] += min(cmax_disc, abs(disp_reg - dp[4])) * weight;\r
- }\r
- }\r
-\r
- float minimum = FLT_MAX;\r
- int id = 0;\r
-\r
- if (cost[0] < minimum)\r
- {\r
- minimum = cost[0];\r
- id = 0;\r
- }\r
- if (cost[1] < minimum)\r
- {\r
- minimum = cost[1];\r
- id = 1;\r
- }\r
- if (cost[2] < minimum)\r
- {\r
- minimum = cost[2];\r
- id = 2;\r
- }\r
- if (cost[3] < minimum)\r
- {\r
- minimum = cost[3];\r
- id = 3;\r
- }\r
- if (cost[4] < minimum)\r
- {\r
- minimum = cost[4];\r
- id = 4;\r
- }\r
-\r
- *(disp + y * disp_step + x) = dp[id];\r
- }\r
- }\r
- }\r
-}\r
-\r
-namespace cv { namespace gpu { namespace bf \r
-{\r
- template <typename T> \r
- void bilateral_filter_caller(const DevMem2D_<T>& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)\r
- {\r
- dim3 threads(32, 8, 1);\r
- dim3 grid(1, 1, 1);\r
- grid.x = divUp(disp.cols, threads.x << 1);\r
- grid.y = divUp(disp.rows, threads.y);\r
-\r
- switch (channels)\r
- {\r
- case 1:\r
- for (int i = 0; i < iters; ++i)\r
- {\r
- bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);\r
- bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);\r
- }\r
- break;\r
- case 3:\r
- for (int i = 0; i < iters; ++i)\r
- {\r
- bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);\r
- bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);\r
- }\r
- break;\r
- default:\r
- cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);\r
- } \r
-\r
- if (stream != 0)\r
- cudaSafeCall( cudaThreadSynchronize() );\r
- }\r
-\r
- void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)\r
- {\r
- bilateral_filter_caller(disp, img, channels, iters, stream);\r
- }\r
-\r
- void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)\r
- {\r
- bilateral_filter_caller(disp, img, channels, iters, stream);\r
- }\r
-}}}\r
\r
#include "cuda_shared.hpp"\r
#include "saturate_cast.hpp"\r
+#include "vecmath.hpp"\r
\r
using namespace cv::gpu;\r
\r
#define FLT_EPSILON 1.192092896e-07F\r
#endif\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
- template<typename T, int N> struct TypeVec {};\r
- template<> struct TypeVec<uchar, 3> { typedef uchar3 vec_t; };\r
- template<> struct TypeVec<uchar, 4> { typedef uchar4 vec_t; };\r
- template<> struct TypeVec<ushort, 3> { typedef ushort3 vec_t; };\r
- template<> struct TypeVec<ushort, 4> { typedef ushort4 vec_t; };\r
- template<> struct TypeVec<float, 3> { typedef float3 vec_t; };\r
- template<> struct TypeVec<float, 4> { typedef float4 vec_t; };\r
-\r
template<typename T> struct ColorChannel {};\r
template<> struct ColorChannel<uchar>\r
{\r
\r
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
template <int SRCCN, int DSTCN, typename T>\r
__global__ void RGB2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc\r
+namespace cv { namespace gpu { namespace imgproc\r
{\r
template <typename T, int SRCCN, int DSTCN>\r
void RGB2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
\r
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
template <int GREEN_BITS, int DSTCN> struct RGB5x52RGBConverter {}; \r
template <int DSTCN> struct RGB5x52RGBConverter<5, DSTCN>\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc\r
+namespace cv { namespace gpu { namespace imgproc\r
{\r
template <int GREEN_BITS, int DSTCN>\r
void RGB5x52RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB5x52RGB<GREEN_BITS, DSTCN><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2RGB5x5<SRCCN, GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
\r
///////////////////////////////// Grayscale to Color ////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
template <int DSTCN, typename T>\r
__global__ void Gray2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc\r
+namespace cv { namespace gpu { namespace imgproc\r
{\r
template <typename T, int DSTCN>\r
void Gray2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::Gray2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols);\r
\r
if (stream == 0)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols);\r
\r
if (stream == 0)\r
\r
///////////////////////////////// Color to Grayscale ////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
#undef R2Y\r
#undef G2Y\r
} \r
}\r
\r
-namespace cv { namespace gpu { namespace improc\r
+namespace cv { namespace gpu { namespace imgproc\r
{\r
template <typename T, int SRCCN>\r
void RGB2Gray_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2Gray<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols);\r
\r
if (stream == 0)\r
\r
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
__constant__ float cYCrCbCoeffs_f[5];\r
__constant__ int cYCrCbCoeffs_i[5];\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc\r
+namespace cv { namespace gpu { namespace imgproc\r
{\r
template <typename T, int SRCCN, int DSTCN>\r
void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
{RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );\r
\r
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
{RGB2YCrCb_caller<ushort, 4, 3>, RGB2YCrCb_caller<ushort, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );\r
\r
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
{RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );\r
\r
RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
{YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );\r
\r
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
{YCrCb2RGB_caller<ushort, 4, 3>, YCrCb2RGB_caller<ushort, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );\r
\r
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
{YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );\r
\r
YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);\r
}\r
\r
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
__constant__ float cXYZ_D65f[9];\r
__constant__ int cXYZ_D65i[9];\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc\r
+namespace cv { namespace gpu { namespace imgproc\r
{\r
template <typename T, int SRCCN, int DSTCN>\r
void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols);\r
\r
if (stream == 0)\r
{RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
\r
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
{RGB2XYZ_caller<ushort, 4, 3>, RGB2XYZ_caller<ushort, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
\r
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
{RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
\r
RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols);\r
\r
if (stream == 0)\r
{XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
\r
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
{XYZ2RGB_caller<ushort, 4, 3>, XYZ2RGB_caller<ushort, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65i, coeffs, 9 * sizeof(int)) );\r
\r
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
{XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}\r
};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cXYZ_D65f, coeffs, 9 * sizeof(float)) );\r
\r
XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);\r
}\r
\r
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
__constant__ int cHsvDivTable[256];\r
\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc\r
+namespace cv { namespace gpu { namespace imgproc\r
{\r
template <typename T, int SRCCN, int DSTCN>\r
void RGB2HSV_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)\r
grid.y = divUp(src.rows, threads.y);\r
\r
if (hrange == 180)\r
- imgproc::RGB2HSV<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2HSV<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
else\r
- imgproc::RGB2HSV<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2HSV<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
4352, 4334, 4316, 4298, 4281, 4263, 4246, 4229,\r
4212, 4195, 4178, 4161, 4145, 4128, 4112, 4096\r
};\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvDivTable, div_table, sizeof(div_table)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHsvDivTable, div_table, sizeof(div_table)) );\r
\r
RGB2HSV_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);\r
}\r
grid.y = divUp(src.rows, threads.y);\r
\r
if (hrange == 180)\r
- imgproc::HSV2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::HSV2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
else\r
- imgproc::HSV2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::HSV2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
static const int sector_data[][3] =\r
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvSectorData, sector_data, sizeof(sector_data)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHsvSectorData, sector_data, sizeof(sector_data)) );\r
\r
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);\r
}\r
static const int sector_data[][3] =\r
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHsvSectorData, sector_data, sizeof(sector_data)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHsvSectorData, sector_data, sizeof(sector_data)) );\r
\r
HSV2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);\r
}\r
\r
/////////////////////////////////////// RGB <-> HLS ////////////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
template<typename T, int HR> struct RGB2HLSConvertor;\r
template<int HR> struct RGB2HLSConvertor<float, HR>\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc\r
+namespace cv { namespace gpu { namespace imgproc\r
{\r
template <typename T, int SRCCN, int DSTCN>\r
void RGB2HLS_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, int hrange, cudaStream_t stream)\r
grid.y = divUp(src.rows, threads.y);\r
\r
if (hrange == 180)\r
- imgproc::RGB2HLS<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2HLS<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
else\r
- imgproc::RGB2HLS<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::RGB2HLS<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
grid.y = divUp(src.rows, threads.y);\r
\r
if (hrange == 180)\r
- imgproc::HLS2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::HLS2RGB<SRCCN, DSTCN, 180, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
else\r
- imgproc::HLS2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
+ imgproc_krnls::HLS2RGB<SRCCN, DSTCN, 255, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, \r
dst.ptr, dst.step, src.rows, src.cols, bidx);\r
\r
if (stream == 0)\r
static const int sector_data[][3]=\r
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHlsSectorData, sector_data, sizeof(sector_data)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHlsSectorData, sector_data, sizeof(sector_data)) );\r
\r
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);\r
}\r
static const int sector_data[][3]=\r
{{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cHlsSectorData, sector_data, sizeof(sector_data)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cHlsSectorData, sector_data, sizeof(sector_data)) );\r
\r
HLS2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, hrange, stream);\r
}\r
#define SHRT_MAX 32767\r
#endif\r
\r
-template <typename T>\r
-struct TypeLimits {};\r
-\r
-template <>\r
-struct TypeLimits<short>\r
-{\r
- static __device__ short max() {return SHRT_MAX;}\r
-};\r
-\r
-template <>\r
-struct TypeLimits<float>\r
+namespace csbp_krnls\r
{\r
- static __device__ float max() {return FLT_MAX;}\r
-};\r
+ template <typename T> struct TypeLimits;\r
+ template <> struct TypeLimits<short>\r
+ {\r
+ static __device__ short max() {return SHRT_MAX;}\r
+ };\r
+ template <> struct TypeLimits<float>\r
+ {\r
+ static __device__ float max() {return FLT_MAX;}\r
+ };\r
+}\r
\r
///////////////////////////////////////////////////////////////\r
/////////////////////// load constants ////////////////////////\r
\r
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }\r
\r
- namespace matrix_operations\r
- { \r
- extern "C" void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);\r
-\r
- extern "C" void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);\r
- extern "C" void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);\r
-\r
- extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);\r
- }\r
-\r
template<class T> \r
- inline void uploadConstant(const char* name, const T& value) { cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); }\r
-\r
+ static inline void uploadConstant(const char* name, const T& value) { cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); }\r
}\r
}\r
\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#include "opencv2/gpu/devmem2d.hpp"\r
+#include "saturate_cast.hpp"\r
+#include "safe_call.hpp"\r
+#include "cuda_shared.hpp"\r
+\r
+using namespace cv::gpu;\r
+\r
+#ifndef FLT_MAX\r
+#define FLT_MAX 3.402823466e+30F\r
+#endif\r
+\r
+/////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Linear filters\r
+\r
+#define MAX_KERNEL_SIZE 16\r
+\r
+namespace filter_krnls\r
+{\r
+ __constant__ float cLinearKernel[MAX_KERNEL_SIZE];\r
+}\r
+\r
+namespace cv { namespace gpu { namespace filters\r
+{\r
+ void loadLinearKernel(const float kernel[], int ksize)\r
+ {\r
+ cudaSafeCall( cudaMemcpyToSymbol(filter_krnls::cLinearKernel, kernel, ksize * sizeof(float)) );\r
+ }\r
+}}}\r
+\r
+namespace filter_krnls\r
+{\r
+ template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>\r
+ __global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)\r
+ {\r
+ __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];\r
+ \r
+ const int blockStartX = blockDim.x * blockIdx.x;\r
+ const int blockStartY = blockDim.y * blockIdx.y;\r
+\r
+ const int threadX = blockStartX + threadIdx.x;\r
+ const int prevThreadX = threadX - blockDim.x;\r
+ const int nextThreadX = threadX + blockDim.x;\r
+\r
+ const int threadY = blockStartY + threadIdx.y;\r
+\r
+ T* sDataRow = smem + threadIdx.y * blockDim.x * 3;\r
+\r
+ if (threadY < height)\r
+ {\r
+ const T* rowSrc = src + threadY * src_step;\r
+\r
+ sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] : 0;\r
+\r
+ sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] : 0;\r
+\r
+ sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] : 0;\r
+\r
+ __syncthreads();\r
+\r
+ if (threadX < width)\r
+ {\r
+ float sum = 0;\r
+\r
+ sDataRow += threadIdx.x + blockDim.x - anchor;\r
+\r
+ #pragma unroll\r
+ for(int i = 0; i < KERNEL_SIZE; ++i)\r
+ sum += cLinearKernel[i] * sDataRow[i];\r
+\r
+ dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);\r
+ }\r
+ }\r
+ }\r
+}\r
+\r
+namespace cv { namespace gpu { namespace filters\r
+{\r
+ template <int KERNEL_SIZE, typename T, typename D>\r
+ void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)\r
+ {\r
+ const int BLOCK_DIM_X = 16;\r
+ const int BLOCK_DIM_Y = 16;\r
+\r
+ dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);\r
+ dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));\r
+\r
+ filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step, \r
+ dst.ptr, dst.elem_step, anchor, src.cols, src.rows);\r
+\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+\r
+ template <typename T, typename D>\r
+ inline void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);\r
+ static const caller_t callers[] = \r
+ {linearRowFilter_caller<0 , T, D>, linearRowFilter_caller<1 , T, D>, \r
+ linearRowFilter_caller<2 , T, D>, linearRowFilter_caller<3 , T, D>, \r
+ linearRowFilter_caller<4 , T, D>, linearRowFilter_caller<5 , T, D>, \r
+ linearRowFilter_caller<6 , T, D>, linearRowFilter_caller<7 , T, D>, \r
+ linearRowFilter_caller<8 , T, D>, linearRowFilter_caller<9 , T, D>, \r
+ linearRowFilter_caller<10, T, D>, linearRowFilter_caller<11, T, D>, \r
+ linearRowFilter_caller<12, T, D>, linearRowFilter_caller<13, T, D>, \r
+ linearRowFilter_caller<14, T, D>, linearRowFilter_caller<15, T, D>};\r
+\r
+ loadLinearKernel(kernel, ksize);\r
+ callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);\r
+ }\r
+\r
+ void linearRowFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ linearRowFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);\r
+ }\r
+ void linearRowFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ linearRowFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);\r
+ }\r
+ void linearRowFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ linearRowFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);\r
+ }\r
+ void linearRowFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ linearRowFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);\r
+ }\r
+}}}\r
+\r
+namespace filter_krnls\r
+{\r
+ template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>\r
+ __global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)\r
+ {\r
+ __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];\r
+ \r
+ const int blockStartX = blockDim.x * blockIdx.x;\r
+ const int blockStartY = blockDim.y * blockIdx.y;\r
+\r
+ const int threadX = blockStartX + threadIdx.x;\r
+\r
+ const int threadY = blockStartY + threadIdx.y;\r
+ const int prevThreadY = threadY - blockDim.y;\r
+ const int nextThreadY = threadY + blockDim.y;\r
+\r
+ const int smem_step = blockDim.x;\r
+\r
+ T* sDataColumn = smem + threadIdx.x;\r
+\r
+ if (threadX < width)\r
+ {\r
+ const T* colSrc = src + threadX;\r
+\r
+ sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] : 0;\r
+\r
+ sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] : 0;\r
+\r
+ sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] : 0;\r
+\r
+ __syncthreads();\r
+\r
+ if (threadY < height)\r
+ {\r
+ float sum = 0;\r
+\r
+ sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step;\r
+\r
+ #pragma unroll\r
+ for(int i = 0; i < KERNEL_SIZE; ++i)\r
+ sum += cLinearKernel[i] * sDataColumn[i * smem_step];\r
+\r
+ dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);\r
+ }\r
+ }\r
+ }\r
+}\r
+\r
+namespace cv { namespace gpu { namespace filters\r
+{\r
+ template <int KERNEL_SIZE, typename T, typename D>\r
+ void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)\r
+ {\r
+ const int BLOCK_DIM_X = 16;\r
+ const int BLOCK_DIM_Y = 16;\r
+\r
+ dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);\r
+ dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));\r
+\r
+ filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step, \r
+ dst.ptr, dst.elem_step, anchor, src.cols, src.rows);\r
+\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+\r
+ template <typename T, typename D>\r
+ inline void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);\r
+ static const caller_t callers[] = \r
+ {linearColumnFilter_caller<0 , T, D>, linearColumnFilter_caller<1 , T, D>, \r
+ linearColumnFilter_caller<2 , T, D>, linearColumnFilter_caller<3 , T, D>, \r
+ linearColumnFilter_caller<4 , T, D>, linearColumnFilter_caller<5 , T, D>, \r
+ linearColumnFilter_caller<6 , T, D>, linearColumnFilter_caller<7 , T, D>, \r
+ linearColumnFilter_caller<8 , T, D>, linearColumnFilter_caller<9 , T, D>, \r
+ linearColumnFilter_caller<10, T, D>, linearColumnFilter_caller<11, T, D>, \r
+ linearColumnFilter_caller<12, T, D>, linearColumnFilter_caller<13, T, D>, \r
+ linearColumnFilter_caller<14, T, D>, linearColumnFilter_caller<15, T, D>};\r
+\r
+ loadLinearKernel(kernel, ksize);\r
+ callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);\r
+ }\r
+\r
+ void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ linearColumnFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);\r
+ }\r
+ void linearColumnFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ linearColumnFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);\r
+ }\r
+ void linearColumnFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ linearColumnFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);\r
+ }\r
+ void linearColumnFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)\r
+ {\r
+ linearColumnFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);\r
+ }\r
+}}}\r
+\r
+/////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Bilateral filters\r
+\r
+namespace bf_krnls\r
+{\r
+ __constant__ float* ctable_color;\r
+ __constant__ float* ctable_space;\r
+ __constant__ size_t ctable_space_step;\r
+\r
+ __constant__ int cndisp;\r
+ __constant__ int cradius;\r
+\r
+ __constant__ short cedge_disc;\r
+ __constant__ short cmax_disc;\r
+}\r
+\r
+namespace cv { namespace gpu { namespace bf \r
+{\r
+ void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)\r
+ {\r
+ cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.ptr, sizeof(table_space.ptr)) );\r
+ size_t table_space_step = table_space.step / sizeof(float);\r
+ cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );\r
+ \r
+ cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cndisp, &ndisp, sizeof(int)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cradius, &radius, sizeof(int)) );\r
+ \r
+ cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cedge_disc, &edge_disc, sizeof(short)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cmax_disc, &max_disc, sizeof(short)) );\r
+ }\r
+}}}\r
+\r
+namespace bf_krnls\r
+{\r
+ template <int channels>\r
+ struct DistRgbMax\r
+ {\r
+ static __device__ uchar calc(const uchar* a, const uchar* b)\r
+ {\r
+ uchar x = abs(a[0] - b[0]);\r
+ uchar y = abs(a[1] - b[1]);\r
+ uchar z = abs(a[2] - b[2]);\r
+ return (max(max(x, y), z));\r
+ }\r
+ };\r
+\r
+ template <>\r
+ struct DistRgbMax<1>\r
+ {\r
+ static __device__ uchar calc(const uchar* a, const uchar* b)\r
+ {\r
+ return abs(a[0] - b[0]);\r
+ }\r
+ };\r
+\r
+ template <int channels, typename T>\r
+ __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)\r
+ {\r
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
+ const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);\r
+\r
+ T dp[5];\r
+\r
+ if (y > 0 && y < h - 1 && x > 0 && x < w - 1)\r
+ {\r
+ dp[0] = *(disp + (y ) * disp_step + x + 0);\r
+ dp[1] = *(disp + (y-1) * disp_step + x + 0);\r
+ dp[2] = *(disp + (y ) * disp_step + x - 1);\r
+ dp[3] = *(disp + (y+1) * disp_step + x + 0);\r
+ dp[4] = *(disp + (y ) * disp_step + x + 1);\r
+\r
+ if(abs(dp[1] - dp[0]) >= cedge_disc || abs(dp[2] - dp[0]) >= cedge_disc || abs(dp[3] - dp[0]) >= cedge_disc || abs(dp[4] - dp[0]) >= cedge_disc) \r
+ {\r
+ const int ymin = max(0, y - cradius);\r
+ const int xmin = max(0, x - cradius);\r
+ const int ymax = min(h - 1, y + cradius);\r
+ const int xmax = min(w - 1, x + cradius);\r
+\r
+ float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};\r
+\r
+ const uchar* ic = img + y * img_step + channels * x;\r
+\r
+ for(int yi = ymin; yi <= ymax; yi++)\r
+ {\r
+ const T* disp_y = disp + yi * disp_step;\r
+\r
+ for(int xi = xmin; xi <= xmax; xi++)\r
+ {\r
+ const uchar* in = img + yi * img_step + channels * xi;\r
+\r
+ uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);\r
+\r
+ const float weight = ctable_color[dist_rgb] * (ctable_space + abs(y-yi)* ctable_space_step)[abs(x-xi)];\r
+\r
+ const T disp_reg = disp_y[xi];\r
+\r
+ cost[0] += min(cmax_disc, abs(disp_reg - dp[0])) * weight;\r
+ cost[1] += min(cmax_disc, abs(disp_reg - dp[1])) * weight;\r
+ cost[2] += min(cmax_disc, abs(disp_reg - dp[2])) * weight;\r
+ cost[3] += min(cmax_disc, abs(disp_reg - dp[3])) * weight;\r
+ cost[4] += min(cmax_disc, abs(disp_reg - dp[4])) * weight;\r
+ }\r
+ }\r
+\r
+ float minimum = FLT_MAX;\r
+ int id = 0;\r
+\r
+ if (cost[0] < minimum)\r
+ {\r
+ minimum = cost[0];\r
+ id = 0;\r
+ }\r
+ if (cost[1] < minimum)\r
+ {\r
+ minimum = cost[1];\r
+ id = 1;\r
+ }\r
+ if (cost[2] < minimum)\r
+ {\r
+ minimum = cost[2];\r
+ id = 2;\r
+ }\r
+ if (cost[3] < minimum)\r
+ {\r
+ minimum = cost[3];\r
+ id = 3;\r
+ }\r
+ if (cost[4] < minimum)\r
+ {\r
+ minimum = cost[4];\r
+ id = 4;\r
+ }\r
+\r
+ *(disp + y * disp_step + x) = dp[id];\r
+ }\r
+ }\r
+ }\r
+}\r
+\r
+namespace cv { namespace gpu { namespace bf \r
+{\r
+ template <typename T> \r
+ void bilateral_filter_caller(const DevMem2D_<T>& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)\r
+ {\r
+ dim3 threads(32, 8, 1);\r
+ dim3 grid(1, 1, 1);\r
+ grid.x = divUp(disp.cols, threads.x << 1);\r
+ grid.y = divUp(disp.rows, threads.y);\r
+\r
+ switch (channels)\r
+ {\r
+ case 1:\r
+ for (int i = 0; i < iters; ++i)\r
+ {\r
+ bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);\r
+ bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);\r
+ }\r
+ break;\r
+ case 3:\r
+ for (int i = 0; i < iters; ++i)\r
+ {\r
+ bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);\r
+ bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.ptr, disp.step/sizeof(T), img.ptr, img.step, disp.rows, disp.cols);\r
+ }\r
+ break;\r
+ default:\r
+ cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);\r
+ } \r
+\r
+ if (stream != 0)\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+\r
+ void bilateral_filter_gpu(const DevMem2D& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)\r
+ {\r
+ bilateral_filter_caller(disp, img, channels, iters, stream);\r
+ }\r
+\r
+ void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2D& img, int channels, int iters, cudaStream_t stream)\r
+ {\r
+ bilateral_filter_caller(disp, img, channels, iters, stream);\r
+ }\r
+}}}\r
using namespace cv::gpu;\r
\r
/////////////////////////////////// Remap ///////////////////////////////////////////////\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;\r
\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc \r
+namespace cv { namespace gpu { namespace imgproc \r
{\r
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)\r
{\r
grid.x = divUp(dst.cols, threads.x);\r
grid.y = divUp(dst.rows, threads.y);\r
\r
- imgproc::tex_remap.filterMode = cudaFilterModeLinear; \r
- imgproc::tex_remap.addressMode[0] = imgproc::tex_remap.addressMode[1] = cudaAddressModeWrap;\r
+ imgproc_krnls::tex_remap.filterMode = cudaFilterModeLinear; \r
+ imgproc_krnls::tex_remap.addressMode[0] = imgproc_krnls::tex_remap.addressMode[1] = cudaAddressModeWrap;\r
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();\r
- cudaSafeCall( cudaBindTexture2D(0, imgproc::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );\r
+ cudaSafeCall( cudaBindTexture2D(0, imgproc_krnls::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );\r
\r
- imgproc::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);\r
+ imgproc_krnls::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);\r
\r
cudaSafeCall( cudaThreadSynchronize() ); \r
- cudaSafeCall( cudaUnbindTexture(imgproc::tex_remap) );\r
+ cudaSafeCall( cudaUnbindTexture(imgproc_krnls::tex_remap) );\r
}\r
\r
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)\r
grid.x = divUp(dst.cols, threads.x);\r
grid.y = divUp(dst.rows, threads.y);\r
\r
- imgproc::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);\r
+ imgproc_krnls::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);\r
\r
cudaSafeCall( cudaThreadSynchronize() ); \r
}\r
\r
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
texture<uchar4, 2> tex_meanshift;\r
\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc \r
+namespace cv { namespace gpu { namespace imgproc \r
{\r
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)\r
{ \r
grid.y = divUp(src.rows, threads.y);\r
\r
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();\r
- cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );\r
+ cudaSafeCall( cudaBindTexture2D( 0, imgproc_krnls::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );\r
\r
- imgproc::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );\r
+ imgproc_krnls::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );\r
cudaSafeCall( cudaThreadSynchronize() );\r
- cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) ); \r
+ cudaSafeCall( cudaUnbindTexture( imgproc_krnls::tex_meanshift ) ); \r
}\r
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps) \r
{\r
grid.y = divUp(src.rows, threads.y);\r
\r
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();\r
- cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );\r
+ cudaSafeCall( cudaBindTexture2D( 0, imgproc_krnls::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );\r
\r
- imgproc::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );\r
+ imgproc_krnls::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );\r
cudaSafeCall( cudaThreadSynchronize() );\r
- cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) ); \r
+ cudaSafeCall( cudaUnbindTexture( imgproc_krnls::tex_meanshift ) ); \r
}\r
}}}\r
\r
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
template <typename T>\r
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc \r
+namespace cv { namespace gpu { namespace imgproc \r
{\r
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)\r
{\r
grid.x = divUp(src.cols, threads.x << 2);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);\r
+ imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() ); \r
grid.x = divUp(src.cols, threads.x << 1);\r
grid.y = divUp(src.rows, threads.y);\r
\r
- imgproc::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);\r
+ imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
\r
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////\r
\r
-namespace imgproc\r
+namespace imgproc_krnls\r
{\r
__constant__ float cq[16];\r
\r
}\r
}\r
\r
-namespace cv { namespace gpu { namespace improc \r
+namespace cv { namespace gpu { namespace imgproc \r
{\r
template <typename T>\r
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)\r
grid.x = divUp(disp.cols, threads.x);\r
grid.y = divUp(disp.rows, threads.y);\r
\r
- cudaSafeCall( cudaMemcpyToSymbol(imgproc::cq, q, 16 * sizeof(float)) );\r
+ cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cq, q, 16 * sizeof(float)) );\r
\r
- imgproc::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);\r
+ imgproc_krnls::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
//M*/\r
\r
#include "cuda_shared.hpp"\r
+#include "saturate_cast.hpp"\r
+#include "transform.hpp"\r
+#include "vecmath.hpp"\r
\r
using namespace cv::gpu;\r
\r
#define CV_PI 3.1415926535897932384626433832795f\r
#endif\r
\r
+//////////////////////////////////////////////////////////////////////////////////////\r
+// Cart <-> Polar\r
+\r
namespace mathfunc_krnls \r
{\r
struct Nothing\r
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;\r
\r
mathfunc_krnls::cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(\r
- x.ptr, x.step / sizeof(float), y.ptr, y.step / sizeof(float), \r
- mag.ptr, mag.step / sizeof(float), angle.ptr, angle.step / sizeof(float), scale, x.cols, x.rows);\r
+ x.ptr, x.elem_step, y.ptr, y.elem_step, \r
+ mag.ptr, mag.elem_step, angle.ptr, angle.elem_step, scale, x.cols, x.rows);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
\r
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;\r
\r
- mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.step / sizeof(float), \r
- angle.ptr, angle.step / sizeof(float), scale, x.ptr, x.step / sizeof(float), y.ptr, y.step / sizeof(float), mag.cols, mag.rows);\r
+ mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.elem_step, \r
+ angle.ptr, angle.elem_step, scale, x.ptr, x.elem_step, y.ptr, y.elem_step, mag.cols, mag.rows);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
callers[mag.ptr == 0](mag, angle, x, y, angleInDegrees, stream);\r
}\r
}}}\r
+\r
+//////////////////////////////////////////////////////////////////////////////////////\r
+// Compare\r
+\r
+namespace mathfunc_krnls \r
+{\r
+ template <typename T1, typename T2>\r
+ struct NotEqual\r
+ {\r
+ __device__ uchar operator()(const T1& src1, const T2& src2, int, int)\r
+ {\r
+ return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);\r
+ }\r
+ };\r
+}\r
+\r
+namespace cv { namespace gpu { namespace mathfunc \r
+{\r
+ template <typename T1, typename T2>\r
+ inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)\r
+ {\r
+ mathfunc_krnls::NotEqual<T1, T2> op;\r
+ transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);\r
+ }\r
+\r
+ void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)\r
+ {\r
+ compare_ne<uint, uint>(src1, src2, dst);\r
+ }\r
+ void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)\r
+ {\r
+ compare_ne<float, float>(src1, src2, dst);\r
+ }\r
+}}}\r
#include "saturate_cast.hpp"
using namespace cv::gpu;
-using namespace cv::gpu::matrix_operations;
-
-namespace mat_operators
+namespace matop_krnls
{
- __constant__ double scalar_d[4];
-
-
- template <typename T>
- class shift_and_sizeof;
-
- template <>
- class shift_and_sizeof<char>
+ template <typename T> struct shift_and_sizeof;
+ template <> struct shift_and_sizeof<char> { enum { shift = 0 }; };
+ template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
+ template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
+ template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
+ template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
+ template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
+ template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
+
+ template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
+ struct ReadWriteTraits
{
- public:
- enum { shift = 0 };
- };
+ enum {shift=1};
- template <>
- class shift_and_sizeof<unsigned char>
- {
- public:
- enum { shift = 0 };
+ typedef T read_type;
+ typedef DT write_type;
};
-
- template <>
- class shift_and_sizeof<short>
+ template <typename T, typename DT>
+ struct ReadWriteTraits<T, DT, 1, 1>
{
- public:
- enum { shift = 1 };
- };
+ enum {shift=4};
- template <>
- class shift_and_sizeof<unsigned short>
- {
- public:
- enum { shift = 1 };
+ typedef char4 read_type;
+ typedef char4 write_type;
};
-
- template <>
- class shift_and_sizeof<int>
+ template <typename T, typename DT>
+ struct ReadWriteTraits<T, DT, 2, 1>
{
- public:
- enum { shift = 2 };
- };
+ enum {shift=4};
- template <>
- class shift_and_sizeof<float>
+ typedef short4 read_type;
+ typedef char4 write_type;
+ };
+ template <typename T, typename DT>
+ struct ReadWriteTraits<T, DT, 4, 1>
{
- public:
- enum { shift = 2 };
+ enum {shift=4};
+
+ typedef int4 read_type;
+ typedef char4 write_type;
};
+ template <typename T, typename DT>
+ struct ReadWriteTraits<T, DT, 1, 2>
+ {
+ enum {shift=2};
- template <>
- class shift_and_sizeof<double>
+ typedef char2 read_type;
+ typedef short2 write_type;
+ };
+ template <typename T, typename DT>
+ struct ReadWriteTraits<T, DT, 2, 2>
{
- public:
- enum { shift = 3 };
+ enum {shift=2};
+
+ typedef short2 read_type;
+ typedef short2 write_type;
};
+ template <typename T, typename DT>
+ struct ReadWriteTraits<T, DT, 4, 2>
+ {
+ enum {shift=2};
+ typedef int2 read_type;
+ typedef short2 write_type;
+ };
+}
- ///////////////////////////////////////////////////////////////////////////
- ////////////////////////////////// CopyTo /////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
+////////////////////////////////// CopyTo /////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
+namespace matop_krnls
+{
template<typename T>
- __global__ void kernel_copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
+ __global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, int step_mat, int step_mask, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
mat_dst[idx] = mat_src[idx];
}
}
+}
+
+namespace cv { namespace gpu { namespace matrix_operations
+{
+ typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
+
+ template<typename T>
+ void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream)
+ {
+ dim3 threadsPerBlock(16,16, 1);
+ dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
+ if (stream == 0)
+ {
+ ::matop_krnls::copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
+ ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
+ cudaSafeCall ( cudaThreadSynchronize() );
+ }
+ else
+ {
+ ::matop_krnls::copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
+ ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
+ }
+ }
+
+ void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
+ {
+ static CopyToFunc tab[8] =
+ {
+ copy_to_with_mask_run<unsigned char>,
+ copy_to_with_mask_run<char>,
+ copy_to_with_mask_run<unsigned short>,
+ copy_to_with_mask_run<short>,
+ copy_to_with_mask_run<int>,
+ copy_to_with_mask_run<float>,
+ copy_to_with_mask_run<double>,
+ 0
+ };
+
+ CopyToFunc func = tab[depth];
+
+ if (func == 0) cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__);
+
+ func(mat_src, mat_dst, mask, channels, stream);
+ }
+}}}
+
+///////////////////////////////////////////////////////////////////////////
+////////////////////////////////// SetTo //////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
- ////////////////////////////////// SetTo //////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
+namespace matop_krnls
+{
+ __constant__ double scalar_d[4];
template<typename T>
- __global__ void kernel_set_to_without_mask(T * mat, int cols, int rows, int step, int channels)
+ __global__ void set_to_without_mask(T * mat, int cols, int rows, int step, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
}
template<typename T>
- __global__ void kernel_set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int channels, int step_mask)
+ __global__ void set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, int step, int channels, int step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
mat[idx] = scalar_d[ x % channels ];
}
}
+}
+namespace cv { namespace gpu { namespace matrix_operations
+{
+ typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
+ typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
- ///////////////////////////////////////////////////////////////////////////
- //////////////////////////////// ConvertTo ////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
-
- template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
- struct ReadWriteTraits
+ template <typename T>
+ void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream)
{
- enum {shift=1};
+ dim3 threadsPerBlock(32, 8, 1);
+ dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
- typedef T read_type;
- typedef DT write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 1, 1>
- {
- enum {shift=4};
+ if (stream == 0)
+ {
+ ::matop_krnls::set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
+ cudaSafeCall ( cudaThreadSynchronize() );
+ }
+ else
+ {
+ ::matop_krnls::set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
+ }
- typedef char4 read_type;
- typedef char4 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 2, 1>
- {
- enum {shift=4};
+ }
- typedef short4 read_type;
- typedef char4 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 4, 1>
+ template <typename T>
+ void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream)
{
- enum {shift=4};
+ dim3 threadsPerBlock(32, 8, 1);
+ dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
- typedef int4 read_type;
- typedef char4 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 1, 2>
- {
- enum {shift=2};
+ if (stream == 0)
+ {
+ matop_krnls::set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
+ cudaSafeCall ( cudaThreadSynchronize() );
+ }
+ else
+ {
+ matop_krnls::set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
+ }
+ }
- typedef char2 read_type;
- typedef short2 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 2, 2>
+ void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
{
- enum {shift=2};
+ cudaSafeCall( cudaMemcpyToSymbol(matop_krnls::scalar_d, scalar, sizeof(double) * 4));
- typedef short2 read_type;
- typedef short2 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 4, 2>
+ static SetToFunc_without_mask tab[8] =
+ {
+ set_to_without_mask_run<unsigned char>,
+ set_to_without_mask_run<char>,
+ set_to_without_mask_run<unsigned short>,
+ set_to_without_mask_run<short>,
+ set_to_without_mask_run<int>,
+ set_to_without_mask_run<float>,
+ set_to_without_mask_run<double>,
+ 0
+ };
+
+ SetToFunc_without_mask func = tab[depth];
+
+ if (func == 0)
+ cv::gpu::error("Unsupported setTo operation", __FILE__, __LINE__);
+
+ func(mat, channels, stream);
+ }
+
+ void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
{
- enum {shift=2};
+ cudaSafeCall( cudaMemcpyToSymbol(matop_krnls::scalar_d, scalar, sizeof(double) * 4));
- typedef int2 read_type;
- typedef short2 write_type;
- };
+ static SetToFunc_with_mask tab[8] =
+ {
+ set_to_with_mask_run<unsigned char>,
+ set_to_with_mask_run<char>,
+ set_to_with_mask_run<unsigned short>,
+ set_to_with_mask_run<short>,
+ set_to_with_mask_run<int>,
+ set_to_with_mask_run<float>,
+ set_to_with_mask_run<double>,
+ 0
+ };
+
+ SetToFunc_with_mask func = tab[depth];
+
+ if (func == 0)
+ cv::gpu::error("Unsupported setTo operation", __FILE__, __LINE__);
+
+ func(mat, mask, channels, stream);
+ }
+}}}
+
+///////////////////////////////////////////////////////////////////////////
+//////////////////////////////// ConvertTo ////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
+namespace matop_krnls
+{
template <typename T, typename DT>
- __global__ static void kernel_convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
+ __global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
{
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
}
}
- }
+ }
+}
- ///////////////////////////////////////////////////////////////////////////
- /////////////////////////////// compare_ne ////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
+namespace cv { namespace gpu { namespace matrix_operations
+{
+ typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
- template <typename T>
- __global__ void kernel_compare_ne(uchar* src1, size_t src1_step, uchar* src2, size_t src2_step, uchar* dst, size_t dst_step, int cols, int rows)
+ template<typename T, typename DT>
+ void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
{
- const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
- const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
+ const int shift = ::matop_krnls::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
- if (x < cols && y < rows)
+ dim3 block(32, 8);
+ dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
+
+ if (stream == 0)
+ {
+ matop_krnls::convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
+ cudaSafeCall( cudaThreadSynchronize() );
+ }
+ else
{
- T src1_pix = ((T*)(src1 + y * src1_step))[x];
- T src2_pix = ((T*)(src2 + y * src2_step))[x];
- uchar res = (uchar)(src1_pix != src2_pix) * 255;
- ((dst + y * dst_step))[x] = res;
+ matop_krnls::convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
}
}
-} // namespace mat_operators
-namespace cv
-{
- namespace gpu
+ void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
{
- namespace matrix_operations
+ static CvtFunc tab[8][8] =
{
+ {cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
+ cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
- ///////////////////////////////////////////////////////////////////////////
- ////////////////////////////////// CopyTo /////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
-
- typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
-
- template<typename T>
- void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream)
- {
- dim3 threadsPerBlock(16,16, 1);
- dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
- if (stream == 0)
- {
- ::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
- ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
- cudaSafeCall ( cudaThreadSynchronize() );
- }
- else
- {
- ::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
- ((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
- }
- }
-
- extern "C" void copy_to_with_mask(const DevMem2D& mat_src, DevMem2D mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
- {
- static CopyToFunc tab[8] =
- {
- copy_to_with_mask_run<unsigned char>,
- copy_to_with_mask_run<char>,
- copy_to_with_mask_run<unsigned short>,
- copy_to_with_mask_run<short>,
- copy_to_with_mask_run<int>,
- copy_to_with_mask_run<float>,
- copy_to_with_mask_run<double>,
- 0
- };
-
- CopyToFunc func = tab[depth];
-
- if (func == 0) cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__);
-
- func(mat_src, mat_dst, mask, channels, stream);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////
- ////////////////////////////////// SetTo //////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
-
- typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
- typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
-
- template <typename T>
- void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream)
- {
- dim3 threadsPerBlock(32, 8, 1);
- dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
-
- if (stream == 0)
- {
- ::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
- cudaSafeCall ( cudaThreadSynchronize() );
- }
- else
- {
- ::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
- }
-
- }
-
- template <typename T>
- void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream)
- {
- dim3 threadsPerBlock(32, 8, 1);
- dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
-
- if (stream == 0)
- {
- mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
- cudaSafeCall ( cudaThreadSynchronize() );
- }
- else
- {
- mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
- }
- }
-
- extern "C" void set_to_without_mask(DevMem2D mat, int depth, const double *scalar, int channels, const cudaStream_t & stream)
- {
- cudaSafeCall( cudaMemcpyToSymbol(mat_operators::scalar_d, scalar, sizeof(double) * 4));
-
- static SetToFunc_without_mask tab[8] =
- {
- set_to_without_mask_run<unsigned char>,
- set_to_without_mask_run<char>,
- set_to_without_mask_run<unsigned short>,
- set_to_without_mask_run<short>,
- set_to_without_mask_run<int>,
- set_to_without_mask_run<float>,
- set_to_without_mask_run<double>,
- 0
- };
-
- SetToFunc_without_mask func = tab[depth];
-
- if (func == 0)
- cv::gpu::error("Unsupported setTo operation", __FILE__, __LINE__);
-
- func(mat, channels, stream);
- }
-
-
- extern "C" void set_to_with_mask(DevMem2D mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
- {
- cudaSafeCall( cudaMemcpyToSymbol(mat_operators::scalar_d, scalar, sizeof(double) * 4));
-
- static SetToFunc_with_mask tab[8] =
- {
- set_to_with_mask_run<unsigned char>,
- set_to_with_mask_run<char>,
- set_to_with_mask_run<unsigned short>,
- set_to_with_mask_run<short>,
- set_to_with_mask_run<int>,
- set_to_with_mask_run<float>,
- set_to_with_mask_run<double>,
- 0
- };
-
- SetToFunc_with_mask func = tab[depth];
-
- if (func == 0)
- cv::gpu::error("Unsupported setTo operation", __FILE__, __LINE__);
-
- func(mat, mask, channels, stream);
- }
-
-
- ///////////////////////////////////////////////////////////////////////////
- //////////////////////////////// ConvertTo ////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
-
- typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
-
- template<typename T, typename DT>
- void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
- {
- const int shift = ::mat_operators::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
-
- dim3 block(32, 8);
- dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
-
- if (stream == 0)
- {
- mat_operators::kernel_convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
- cudaSafeCall( cudaThreadSynchronize() );
- }
- else
- {
- mat_operators::kernel_convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
- }
- }
-
- extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
- {
- static CvtFunc tab[8][8] =
- {
- {cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
- cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
-
- {cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
- cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
-
- {cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
- cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
-
- {cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
- cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
-
- {cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
- cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
-
- {cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
- cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
-
- {cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
- cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
+ {cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
+ cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
- {0,0,0,0,0,0,0,0}
- };
+ {cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
+ cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
- CvtFunc func = tab[sdepth][ddepth];
- if (func == 0)
- cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
- func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
- }
+ {cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
+ cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
- ///////////////////////////////////////////////////////////////////////////
- /////////////////////////////// compare_ne ////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
+ {cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
+ cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
- void compare_ne_8u(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
- {
- dim3 block(32, 8);
- dim3 grid(divUp(src1.cols, block.x), divUp(src1.rows, block.y));
+ {cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
+ cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
- mat_operators::kernel_compare_ne<uint><<<grid, block>>>(src1.ptr, src1.step, src2.ptr, src2.step, dst.ptr, dst.step, src1.cols, src1.rows);
- cudaSafeCall( cudaThreadSynchronize() );
- }
+ {cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
+ cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
- void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
- {
- dim3 block(32, 8);
- dim3 grid(divUp(src1.cols, block.x), divUp(src1.rows, block.y));
+ {0,0,0,0,0,0,0,0}
+ };
- mat_operators::kernel_compare_ne<float><<<grid, block>>>(src1.ptr, src1.step, src2.ptr, src2.step, dst.ptr, dst.step, src1.cols, src1.rows);
- cudaSafeCall( cudaThreadSynchronize() );
- }
- } // namespace matrix_operations
- } // namespace gpu
-} // namespace cv
+ CvtFunc func = tab[sdepth][ddepth];
+ if (func == 0)
+ cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
+ func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
+ }
+}}}
{\r
namespace gpu\r
{\r
- // To fix link error: this func already defined in other obj file\r
- namespace \r
+ template<typename _Tp> static __device__ _Tp saturate_cast(uchar v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(schar v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(ushort v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(short v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(uint v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(int v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(float v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(double v) { return _Tp(v); }\r
+\r
+ template<> static __device__ uchar saturate_cast<uchar>(schar v)\r
+ { return (uchar)max((int)v, 0); }\r
+ template<> static __device__ uchar saturate_cast<uchar>(ushort v)\r
+ { return (uchar)min((uint)v, (uint)UCHAR_MAX); }\r
+ template<> static __device__ uchar saturate_cast<uchar>(int v)\r
+ { return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }\r
+ template<> static __device__ uchar saturate_cast<uchar>(uint v)\r
+ { return (uchar)min(v, (uint)UCHAR_MAX); }\r
+ template<> static __device__ uchar saturate_cast<uchar>(short v)\r
+ { return saturate_cast<uchar>((uint)v); }\r
+\r
+ template<> static __device__ uchar saturate_cast<uchar>(float v)\r
+ { int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); }\r
+ template<> static __device__ uchar saturate_cast<uchar>(double v)\r
+ {\r
+ #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
+ int iv = __double2int_rn(v); return saturate_cast<uchar>(iv);\r
+ #else\r
+ return saturate_cast<uchar>((float)v);\r
+ #endif\r
+ }\r
+\r
+ template<> static __device__ schar saturate_cast<schar>(uchar v)\r
+ { return (schar)min((int)v, SCHAR_MAX); }\r
+ template<> static __device__ schar saturate_cast<schar>(ushort v)\r
+ { return (schar)min((uint)v, (uint)SCHAR_MAX); }\r
+ template<> static __device__ schar saturate_cast<schar>(int v)\r
+ {\r
+ return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ?\r
+ v : v > 0 ? SCHAR_MAX : SCHAR_MIN);\r
+ }\r
+ template<> static __device__ schar saturate_cast<schar>(short v)\r
+ { return saturate_cast<schar>((int)v); }\r
+ template<> static __device__ schar saturate_cast<schar>(uint v)\r
+ { return (schar)min(v, (uint)SCHAR_MAX); }\r
+\r
+ template<> static __device__ schar saturate_cast<schar>(float v)\r
+ { int iv = __float2int_rn(v); return saturate_cast<schar>(iv); }\r
+ template<> static __device__ schar saturate_cast<schar>(double v)\r
+ { \r
+ #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
+ int iv = __double2int_rn(v); return saturate_cast<schar>(iv);\r
+ #else\r
+ return saturate_cast<schar>((float)v);\r
+ #endif\r
+ }\r
+\r
+ template<> static __device__ ushort saturate_cast<ushort>(schar v)\r
+ { return (ushort)max((int)v, 0); }\r
+ template<> static __device__ ushort saturate_cast<ushort>(short v)\r
+ { return (ushort)max((int)v, 0); }\r
+ template<> static __device__ ushort saturate_cast<ushort>(int v)\r
+ { return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }\r
+ template<> static __device__ ushort saturate_cast<ushort>(uint v)\r
+ { return (ushort)min(v, (uint)USHRT_MAX); }\r
+ template<> static __device__ ushort saturate_cast<ushort>(float v)\r
+ { int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); }\r
+ template<> static __device__ ushort saturate_cast<ushort>(double v)\r
+ { \r
+ #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
+ int iv = __double2int_rn(v); return saturate_cast<ushort>(iv);\r
+ #else\r
+ return saturate_cast<ushort>((float)v);\r
+ #endif\r
+ }\r
+\r
+ template<> static __device__ short saturate_cast<short>(ushort v)\r
+ { return (short)min((int)v, SHRT_MAX); }\r
+ template<> static __device__ short saturate_cast<short>(int v)\r
{\r
- template<typename _Tp> __device__ _Tp saturate_cast(uchar v) { return _Tp(v); }\r
- template<typename _Tp> __device__ _Tp saturate_cast(schar v) { return _Tp(v); }\r
- template<typename _Tp> __device__ _Tp saturate_cast(ushort v) { return _Tp(v); }\r
- template<typename _Tp> __device__ _Tp saturate_cast(short v) { return _Tp(v); }\r
- template<typename _Tp> __device__ _Tp saturate_cast(uint v) { return _Tp(v); }\r
- template<typename _Tp> __device__ _Tp saturate_cast(int v) { return _Tp(v); }\r
- template<typename _Tp> __device__ _Tp saturate_cast(float v) { return _Tp(v); }\r
- template<typename _Tp> __device__ _Tp saturate_cast(double v) { return _Tp(v); }\r
-\r
- template<> __device__ uchar saturate_cast<uchar>(schar v)\r
- { return (uchar)max((int)v, 0); }\r
- template<> __device__ uchar saturate_cast<uchar>(ushort v)\r
- { return (uchar)min((uint)v, (uint)UCHAR_MAX); }\r
- template<> __device__ uchar saturate_cast<uchar>(int v)\r
- { return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }\r
- template<> __device__ uchar saturate_cast<uchar>(uint v)\r
- { return (uchar)min(v, (uint)UCHAR_MAX); }\r
- template<> __device__ uchar saturate_cast<uchar>(short v)\r
- { return saturate_cast<uchar>((uint)v); }\r
-\r
- template<> __device__ uchar saturate_cast<uchar>(float v)\r
- { int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); }\r
- template<> __device__ uchar saturate_cast<uchar>(double v)\r
- {\r
- #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
- int iv = __double2int_rn(v); return saturate_cast<uchar>(iv);\r
- #else\r
- return saturate_cast<uchar>((float)v);\r
- #endif\r
- }\r
-\r
- template<> __device__ schar saturate_cast<schar>(uchar v)\r
- { return (schar)min((int)v, SCHAR_MAX); }\r
- template<> __device__ schar saturate_cast<schar>(ushort v)\r
- { return (schar)min((uint)v, (uint)SCHAR_MAX); }\r
- template<> __device__ schar saturate_cast<schar>(int v)\r
- {\r
- return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ?\r
- v : v > 0 ? SCHAR_MAX : SCHAR_MIN);\r
- }\r
- template<> __device__ schar saturate_cast<schar>(short v)\r
- { return saturate_cast<schar>((int)v); }\r
- template<> __device__ schar saturate_cast<schar>(uint v)\r
- { return (schar)min(v, (uint)SCHAR_MAX); }\r
-\r
- template<> __device__ schar saturate_cast<schar>(float v)\r
- { int iv = __float2int_rn(v); return saturate_cast<schar>(iv); }\r
- template<> __device__ schar saturate_cast<schar>(double v)\r
- { \r
- #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
- int iv = __double2int_rn(v); return saturate_cast<schar>(iv);\r
- #else\r
- return saturate_cast<schar>((float)v);\r
- #endif\r
- }\r
-\r
- template<> __device__ ushort saturate_cast<ushort>(schar v)\r
- { return (ushort)max((int)v, 0); }\r
- template<> __device__ ushort saturate_cast<ushort>(short v)\r
- { return (ushort)max((int)v, 0); }\r
- template<> __device__ ushort saturate_cast<ushort>(int v)\r
- { return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }\r
- template<> __device__ ushort saturate_cast<ushort>(uint v)\r
- { return (ushort)min(v, (uint)USHRT_MAX); }\r
- template<> __device__ ushort saturate_cast<ushort>(float v)\r
- { int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); }\r
- template<> __device__ ushort saturate_cast<ushort>(double v)\r
- { \r
- #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
- int iv = __double2int_rn(v); return saturate_cast<ushort>(iv);\r
- #else\r
- return saturate_cast<ushort>((float)v);\r
- #endif\r
- }\r
-\r
- template<> __device__ short saturate_cast<short>(ushort v)\r
- { return (short)min((int)v, SHRT_MAX); }\r
- template<> __device__ short saturate_cast<short>(int v)\r
- {\r
- return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ?\r
- v : v > 0 ? SHRT_MAX : SHRT_MIN);\r
- }\r
- template<> __device__ short saturate_cast<short>(uint v)\r
- { return (short)min(v, (uint)SHRT_MAX); }\r
- template<> __device__ short saturate_cast<short>(float v)\r
- { int iv = __float2int_rn(v); return saturate_cast<short>(iv); }\r
- template<> __device__ short saturate_cast<short>(double v)\r
- { \r
- #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
- int iv = __double2int_rn(v); return saturate_cast<short>(iv);\r
- #else\r
- return saturate_cast<short>((float)v);\r
- #endif\r
- }\r
-\r
- template<> __device__ int saturate_cast<int>(float v) { return __float2int_rn(v); }\r
- template<> __device__ int saturate_cast<int>(double v) \r
- {\r
- #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 \r
- return __double2int_rn(v);\r
- #else\r
- return saturate_cast<int>((float)v);\r
- #endif\r
- }\r
-\r
- template<> __device__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); }\r
- template<> __device__ uint saturate_cast<uint>(double v) \r
- { \r
- #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
- return __double2uint_rn(v);\r
- #else\r
- return saturate_cast<uint>((float)v);\r
- #endif\r
- }\r
+ return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ?\r
+ v : v > 0 ? SHRT_MAX : SHRT_MIN);\r
}\r
+ template<> static __device__ short saturate_cast<short>(uint v)\r
+ { return (short)min(v, (uint)SHRT_MAX); }\r
+ template<> static __device__ short saturate_cast<short>(float v)\r
+ { int iv = __float2int_rn(v); return saturate_cast<short>(iv); }\r
+ template<> static __device__ short saturate_cast<short>(double v)\r
+ { \r
+ #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
+ int iv = __double2int_rn(v); return saturate_cast<short>(iv);\r
+ #else\r
+ return saturate_cast<short>((float)v);\r
+ #endif\r
+ }\r
+\r
+ template<> static __device__ int saturate_cast<int>(float v) { return __float2int_rn(v); }\r
+ template<> static __device__ int saturate_cast<int>(double v) \r
+ {\r
+ #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130 \r
+ return __double2int_rn(v);\r
+ #else\r
+ return saturate_cast<int>((float)v);\r
+ #endif\r
+ }\r
+\r
+ template<> static __device__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); }\r
+ template<> static __device__ uint saturate_cast<uint>(double v) \r
+ { \r
+ #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130\r
+ return __double2uint_rn(v);\r
+ #else\r
+ return saturate_cast<uint>((float)v);\r
+ #endif\r
+ }\r
+\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(uchar4 v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(char4 v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(ushort4 v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(short4 v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(uint4 v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(int4 v) { return _Tp(v); }\r
+ template<typename _Tp> static __device__ _Tp saturate_cast(float4 v) { return _Tp(v); }\r
+\r
+ template<> static __device__ uchar4 saturate_cast<uchar4>(char4 v)\r
+ { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }\r
+ template<> static __device__ uchar4 saturate_cast<uchar4>(ushort4 v)\r
+ { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }\r
+ template<> static __device__ uchar4 saturate_cast<uchar4>(short4 v)\r
+ { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }\r
+ template<> static __device__ uchar4 saturate_cast<uchar4>(uint4 v)\r
+ { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }\r
+ template<> static __device__ uchar4 saturate_cast<uchar4>(int4 v)\r
+ { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }\r
+ template<> static __device__ uchar4 saturate_cast<uchar4>(float4 v)\r
+ { return make_uchar4(saturate_cast<uchar>(v.x), saturate_cast<uchar>(v.y), saturate_cast<uchar>(v.z), saturate_cast<uchar>(v.w)); }\r
+\r
+ template<> static __device__ char4 saturate_cast<char4>(uchar4 v)\r
+ { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }\r
+ template<> static __device__ char4 saturate_cast<char4>(ushort4 v)\r
+ { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }\r
+ template<> static __device__ char4 saturate_cast<char4>(short4 v)\r
+ { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }\r
+ template<> static __device__ char4 saturate_cast<char4>(uint4 v)\r
+ { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }\r
+ template<> static __device__ char4 saturate_cast<char4>(int4 v)\r
+ { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }\r
+ template<> static __device__ char4 saturate_cast<char4>(float4 v)\r
+ { return make_char4(saturate_cast<char>(v.x), saturate_cast<char>(v.y), saturate_cast<char>(v.z), saturate_cast<char>(v.w)); }\r
+\r
+ template<> static __device__ ushort4 saturate_cast<ushort4>(uchar4 v)\r
+ { return make_ushort4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ ushort4 saturate_cast<ushort4>(char4 v)\r
+ { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }\r
+ template<> static __device__ ushort4 saturate_cast<ushort4>(short4 v)\r
+ { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }\r
+ template<> static __device__ ushort4 saturate_cast<ushort4>(uint4 v)\r
+ { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }\r
+ template<> static __device__ ushort4 saturate_cast<ushort4>(int4 v)\r
+ { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }\r
+ template<> static __device__ ushort4 saturate_cast<ushort4>(float4 v)\r
+ { return make_ushort4(saturate_cast<ushort>(v.x), saturate_cast<ushort>(v.y), saturate_cast<ushort>(v.z), saturate_cast<ushort>(v.w)); }\r
+\r
+ template<> static __device__ short4 saturate_cast<short4>(uchar4 v)\r
+ { return make_short4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ short4 saturate_cast<short4>(char4 v)\r
+ { return make_short4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ short4 saturate_cast<short4>(ushort4 v)\r
+ { return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); }\r
+ template<> static __device__ short4 saturate_cast<short4>(uint4 v)\r
+ { return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); }\r
+ template<> static __device__ short4 saturate_cast<short4>(int4 v)\r
+ { return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); }\r
+ template<> static __device__ short4 saturate_cast<short4>(float4 v)\r
+ { return make_short4(saturate_cast<short>(v.x), saturate_cast<short>(v.y), saturate_cast<short>(v.z), saturate_cast<short>(v.w)); }\r
+ \r
+ template<> static __device__ uint4 saturate_cast<uint4>(uchar4 v)\r
+ { return make_uint4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ uint4 saturate_cast<uint4>(char4 v)\r
+ { return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); }\r
+ template<> static __device__ uint4 saturate_cast<uint4>(ushort4 v)\r
+ { return make_uint4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ uint4 saturate_cast<uint4>(short4 v)\r
+ { return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); }\r
+ template<> static __device__ uint4 saturate_cast<uint4>(int4 v)\r
+ { return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); }\r
+ template<> static __device__ uint4 saturate_cast<uint4>(float4 v)\r
+ { return make_uint4(saturate_cast<uint>(v.x), saturate_cast<uint>(v.y), saturate_cast<uint>(v.z), saturate_cast<uint>(v.w)); }\r
+ \r
+ template<> static __device__ int4 saturate_cast<int4>(uchar4 v)\r
+ { return make_int4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ int4 saturate_cast<int4>(char4 v)\r
+ { return make_int4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ int4 saturate_cast<int4>(ushort4 v)\r
+ { return make_int4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ int4 saturate_cast<int4>(short4 v)\r
+ { return make_int4(v.x, v.y, v.z, v.w); }\r
+ template<> static __device__ int4 saturate_cast<int4>(uint4 v)\r
+ { return make_int4(saturate_cast<int>(v.x), saturate_cast<int>(v.y), saturate_cast<int>(v.z), saturate_cast<int>(v.w)); }\r
+ template<> static __device__ int4 saturate_cast<int4>(float4 v)\r
+ { return make_int4(saturate_cast<int>(v.x), saturate_cast<int>(v.y), saturate_cast<int>(v.z), saturate_cast<int>(v.w)); }\r
}\r
}\r
\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef __OPENCV_GPU_TRANSFORM_HPP__\r
+#define __OPENCV_GPU_TRANSFORM_HPP__\r
+\r
+#include "cuda_shared.hpp"\r
+#include "saturate_cast.hpp"\r
+#include "vecmath.hpp"\r
+\r
+namespace cv { namespace gpu { namespace algo_krnls\r
+{\r
+ template <typename T, typename D, typename UnOp>\r
+ static __global__ void transform(const T* src, size_t src_step, \r
+ D* dst, size_t dst_step, int width, int height, UnOp op)\r
+ {\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+ if (x < width && y < height)\r
+ {\r
+ T src_data = src[y * src_step + x];\r
+ dst[y * dst_step + x] = op(src_data, x, y);\r
+ }\r
+ }\r
+ template <typename T1, typename T2, typename D, typename BinOp>\r
+ static __global__ void transform(const T1* src1, size_t src1_step, const T2* src2, size_t src2_step, \r
+ D* dst, size_t dst_step, int width, int height, BinOp op)\r
+ {\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+ if (x < width && y < height)\r
+ {\r
+ T1 src1_data = src1[y * src1_step + x];\r
+ T2 src2_data = src2[y * src2_step + x];\r
+ dst[y * dst_step + x] = op(src1_data, src2_data, x, y);\r
+ }\r
+ }\r
+}}}\r
+\r
+namespace cv \r
+{ \r
+ namespace gpu \r
+ {\r
+ template <typename T, typename D, typename UnOp>\r
+ static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)\r
+ {\r
+ dim3 threads(16, 16, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src.cols, threads.x);\r
+ grid.y = divUp(src.rows, threads.y); \r
+\r
+ algo_krnls::transform<<<grid, threads, 0, stream>>>(src.ptr, src.elem_step, \r
+ dst.ptr, dst.elem_step, src.cols, src.rows, op);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+ template <typename T1, typename T2, typename D, typename BinOp>\r
+ static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, cudaStream_t stream)\r
+ {\r
+ dim3 threads(16, 16, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src1.cols, threads.x);\r
+ grid.y = divUp(src1.rows, threads.y); \r
+\r
+ algo_krnls::transform<<<grid, threads, 0, stream>>>(src1.ptr, src1.elem_step, \r
+ src2.ptr, src2.elem_step, dst.ptr, dst.elem_step, src1.cols, src1.rows, op);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+ }\r
+}\r
+\r
+#endif // __OPENCV_GPU_TRANSFORM_HPP__\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef __OPENCV_GPU_VECMATH_HPP__\r
+#define __OPENCV_GPU_VECMATH_HPP__\r
+\r
+#include "cuda_shared.hpp"\r
+\r
+namespace cv\r
+{\r
+ namespace gpu\r
+ {\r
+ template<typename T, int N> struct TypeVec;\r
+ template<typename T> struct TypeVec<T, 1> { typedef T vec_t; };\r
+ template<> struct TypeVec<unsigned char, 2> { typedef uchar2 vec_t; };\r
+ template<> struct TypeVec<uchar2, 2> { typedef uchar2 vec_t; };\r
+ template<> struct TypeVec<unsigned char, 3> { typedef uchar3 vec_t; };;\r
+ template<> struct TypeVec<uchar3, 3> { typedef uchar3 vec_t; };\r
+ template<> struct TypeVec<unsigned char, 4> { typedef uchar4 vec_t; };;\r
+ template<> struct TypeVec<uchar4, 4> { typedef uchar4 vec_t; };\r
+ template<> struct TypeVec<char, 2> { typedef char2 vec_t; };\r
+ template<> struct TypeVec<char2, 2> { typedef char2 vec_t; };\r
+ template<> struct TypeVec<char, 3> { typedef char3 vec_t; };\r
+ template<> struct TypeVec<char3, 3> { typedef char3 vec_t; };\r
+ template<> struct TypeVec<char, 4> { typedef char4 vec_t; };\r
+ template<> struct TypeVec<char4, 4> { typedef char4 vec_t; };\r
+ template<> struct TypeVec<unsigned short, 2> { typedef ushort2 vec_t; };\r
+ template<> struct TypeVec<ushort2, 2> { typedef ushort2 vec_t; };\r
+ template<> struct TypeVec<unsigned short, 3> { typedef ushort3 vec_t; };\r
+ template<> struct TypeVec<ushort3, 3> { typedef ushort3 vec_t; };\r
+ template<> struct TypeVec<unsigned short, 4> { typedef ushort4 vec_t; };\r
+ template<> struct TypeVec<ushort4, 4> { typedef ushort4 vec_t; };\r
+ template<> struct TypeVec<short, 2> { typedef short2 vec_t; };\r
+ template<> struct TypeVec<short2, 2> { typedef short2 vec_t; };\r
+ template<> struct TypeVec<short, 3> { typedef short3 vec_t; };\r
+ template<> struct TypeVec<short3, 3> { typedef short3 vec_t; };\r
+ template<> struct TypeVec<short, 4> { typedef short4 vec_t; };\r
+ template<> struct TypeVec<short4, 4> { typedef short4 vec_t; };\r
+ template<> struct TypeVec<unsigned int, 2> { typedef uint2 vec_t; };\r
+ template<> struct TypeVec<uint2, 2> { typedef uint2 vec_t; };\r
+ template<> struct TypeVec<unsigned int, 3> { typedef uint3 vec_t; };\r
+ template<> struct TypeVec<uint3, 3> { typedef uint3 vec_t; };\r
+ template<> struct TypeVec<unsigned int, 4> { typedef uint4 vec_t; };\r
+ template<> struct TypeVec<uint4, 4> { typedef uint4 vec_t; };\r
+ template<> struct TypeVec<int, 2> { typedef int2 vec_t; };\r
+ template<> struct TypeVec<int2, 2> { typedef int2 vec_t; };\r
+ template<> struct TypeVec<int, 3> { typedef int3 vec_t; };\r
+ template<> struct TypeVec<int3, 3> { typedef int3 vec_t; };\r
+ template<> struct TypeVec<int, 4> { typedef int4 vec_t; };\r
+ template<> struct TypeVec<int4, 4> { typedef int4 vec_t; };\r
+ template<> struct TypeVec<float, 2> { typedef float2 vec_t; };\r
+ template<> struct TypeVec<float2, 2> { typedef float2 vec_t; };\r
+ template<> struct TypeVec<float, 3> { typedef float3 vec_t; };\r
+ template<> struct TypeVec<float3, 3> { typedef float3 vec_t; };\r
+ template<> struct TypeVec<float, 4> { typedef float4 vec_t; };\r
+ template<> struct TypeVec<float4, 4> { typedef float4 vec_t; }; \r
+\r
+ static __device__ uchar4 operator+(const uchar4& a, const uchar4& b)\r
+ {\r
+ return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);\r
+ }\r
+ static __device__ uchar4 operator-(const uchar4& a, const uchar4& b)\r
+ {\r
+ return make_uchar4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);\r
+ }\r
+ static __device__ uchar4 operator*(const uchar4& a, const uchar4& b)\r
+ {\r
+ return make_uchar4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);\r
+ }\r
+ static __device__ uchar4 operator/(const uchar4& a, const uchar4& b)\r
+ {\r
+ return make_uchar4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);\r
+ }\r
+ template <typename T>\r
+ static __device__ uchar4 operator*(const uchar4& a, T s)\r
+ {\r
+ return make_uchar4(a.x * s, a.y * s, a.z * s, a.w * s);\r
+ }\r
+ template <typename T>\r
+ static __device__ uchar4 operator*(T s, const uchar4& a)\r
+ {\r
+ return a * s;\r
+ }\r
+ }\r
+}\r
+\r
+#endif // __OPENCV_GPU_VECMATH_HPP__
\ No newline at end of file
\r
#include "opencv2/gpu/stream_accessor.hpp"\r
\r
+namespace cv \r
+{\r
+ namespace gpu\r
+ {\r
+ namespace matrix_operations\r
+ { \r
+ void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);\r
+\r
+ void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);\r
+ void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);\r
+\r
+ void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);\r
+ }\r
+ }\r
+}\r
+\r
struct Stream::Impl\r
{\r
cudaStream_t stream;\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#include "precomp.hpp"\r
+\r
+using namespace cv;\r
+using namespace cv::gpu;\r
+\r
+\r
+#if !defined (HAVE_CUDA)\r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }\r
+Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }\r
+Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int, int, const Size&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }\r
+Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
+Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
+Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
+\r
+void cv::gpu::boxFilter(const GpuMat&, GpuMat&, int, Size, Point) { throw_nogpu(); }\r
+void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }\r
+void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }\r
+void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }\r
+void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point) { throw_nogpu(); }\r
+void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point) { throw_nogpu(); }\r
+void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double) { throw_nogpu(); }\r
+void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double) { throw_nogpu(); }\r
+void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double) { throw_nogpu(); }\r
+void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double) { throw_nogpu(); }\r
+\r
+#else\r
+\r
+namespace\r
+{\r
+ inline void normalizeAnchor(int& anchor, int ksize)\r
+ {\r
+ if (anchor < 0)\r
+ anchor = ksize >> 1;\r
+\r
+ CV_Assert(0 <= anchor && anchor < ksize);\r
+ }\r
+\r
+ inline void normalizeAnchor(Point& anchor, const Size& ksize)\r
+ {\r
+ normalizeAnchor(anchor.x, ksize.width);\r
+ normalizeAnchor(anchor.y, ksize.height);\r
+ }\r
+\r
+ inline void normalizeROI(Rect& roi, const Size& ksize, const Point& anchor, const Size& src_size)\r
+ {\r
+ if (roi == Rect(0,0,-1,-1))\r
+ roi = Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height);\r
+\r
+ CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height);\r
+ }\r
+\r
+ inline void normalizeKernel(const Mat& kernel, GpuMat& gpu_krnl, int type = CV_8U, int* nDivisor = 0, bool reverse = false)\r
+ {\r
+ int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;\r
+ if (nDivisor) *nDivisor = scale;\r
+ \r
+ Mat temp(kernel.size(), type);\r
+ kernel.convertTo(temp, type, scale);\r
+ Mat cont_krnl = temp.reshape(1, 1);\r
+\r
+ if (reverse)\r
+ {\r
+ int count = cont_krnl.cols >> 1;\r
+ for (int i = 0; i < count; ++i)\r
+ {\r
+ std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i));\r
+ }\r
+ }\r
+\r
+ gpu_krnl.upload(cont_krnl);\r
+ } \r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Filter2D\r
+\r
+namespace\r
+{\r
+ class Filter2DEngine_GPU : public FilterEngine_GPU\r
+ {\r
+ public:\r
+ Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_) : filter2D(filter2D_) {}\r
+\r
+ virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
+ {\r
+ Size src_size = src.size();\r
+\r
+ dst.create(src_size, src.type());\r
+ dst = Scalar(0.0);\r
+\r
+ normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);\r
+\r
+ GpuMat srcROI = src(roi);\r
+ GpuMat dstROI = dst(roi);\r
+\r
+ (*filter2D)(srcROI, dstROI);\r
+ }\r
+\r
+ Ptr<BaseFilter_GPU> filter2D;\r
+ };\r
+}\r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D)\r
+{\r
+ return Ptr<FilterEngine_GPU>(new Filter2DEngine_GPU(filter2D));\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// SeparableFilter\r
+\r
+namespace\r
+{\r
+ class SeparableFilterEngine_GPU : public FilterEngine_GPU\r
+ {\r
+ public:\r
+ SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, \r
+ const Ptr<BaseColumnFilter_GPU>& columnFilter_) :\r
+ rowFilter(rowFilter_), columnFilter(columnFilter_)\r
+ {\r
+ ksize = Size(rowFilter->ksize, columnFilter->ksize);\r
+ anchor = Point(rowFilter->anchor, columnFilter->anchor);\r
+ }\r
+\r
+ virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
+ {\r
+ Size src_size = src.size();\r
+ int src_type = src.type();\r
+\r
+ dst.create(src_size, src_type);\r
+ dst = Scalar(0.0);\r
+ dstBuf.create(src_size, src_type);\r
+ dstBuf = Scalar(0.0);\r
+\r
+ normalizeROI(roi, ksize, anchor, src_size);\r
+\r
+ srcROI = src(roi);\r
+ dstROI = dst(roi);\r
+ dstBufROI = dstBuf(roi);\r
+ \r
+ (*rowFilter)(srcROI, dstBufROI);\r
+ (*columnFilter)(dstBufROI, dstROI);\r
+ }\r
+\r
+ Ptr<BaseRowFilter_GPU> rowFilter;\r
+ Ptr<BaseColumnFilter_GPU> columnFilter;\r
+ Size ksize;\r
+ Point anchor;\r
+ GpuMat dstBuf;\r
+ GpuMat srcROI;\r
+ GpuMat dstROI;\r
+ GpuMat dstBufROI;\r
+ };\r
+}\r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter, \r
+ const Ptr<BaseColumnFilter_GPU>& columnFilter)\r
+{\r
+ return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// 1D Sum Filter\r
+\r
+namespace\r
+{\r
+ class NppRowSumFilter : public BaseRowFilter_GPU\r
+ {\r
+ public:\r
+ NppRowSumFilter(int ksize_, int anchor_) : BaseRowFilter_GPU(ksize_, anchor_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );\r
+ }\r
+ };\r
+}\r
+\r
+Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor)\r
+{\r
+ CV_Assert(srcType == CV_8UC1 && sumType == CV_32FC1);\r
+\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseRowFilter_GPU>(new NppRowSumFilter(ksize, anchor));\r
+}\r
+\r
+namespace\r
+{\r
+ class NppColumnSumFilter : public BaseColumnFilter_GPU\r
+ {\r
+ public:\r
+ NppColumnSumFilter(int ksize_, int anchor_) : BaseColumnFilter_GPU(ksize_, anchor_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );\r
+ }\r
+ };\r
+}\r
+\r
+Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor)\r
+{\r
+ CV_Assert(sumType == CV_8UC1 && dstType == CV_32FC1);\r
+\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseColumnFilter_GPU>(new NppColumnSumFilter(ksize, anchor));\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Box Filter\r
+\r
+namespace\r
+{\r
+ typedef NppStatus (*nppFilterBox_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, \r
+ NppiSize oMaskSize, NppiPoint oAnchor);\r
+\r
+ class NPPBoxFilter : public BaseFilter_GPU\r
+ {\r
+ public:\r
+ NPPBoxFilter(const Size& ksize_, const Point& anchor_, nppFilterBox_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+ NppiSize oKernelSize;\r
+ oKernelSize.height = ksize.height;\r
+ oKernelSize.width = ksize.width;\r
+ NppiPoint oAnchor;\r
+ oAnchor.x = anchor.x;\r
+ oAnchor.y = anchor.y;\r
+ \r
+ nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );\r
+ }\r
+\r
+ nppFilterBox_t func;\r
+ };\r
+}\r
+\r
+Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)\r
+{\r
+ static const nppFilterBox_t nppFilterBox_callers[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R};\r
+\r
+ CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); \r
+\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseFilter_GPU>(new NPPBoxFilter(ksize, anchor, nppFilterBox_callers[CV_MAT_CN(srcType)]));\r
+}\r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, const Point& anchor)\r
+{\r
+ Ptr<BaseFilter_GPU> boxFilter = getBoxFilter_GPU(srcType, dstType, ksize, anchor);\r
+ return createFilter2D_GPU(boxFilter);\r
+}\r
+\r
+void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor)\r
+{\r
+ int sdepth = src.depth(), cn = src.channels();\r
+ if( ddepth < 0 )\r
+ ddepth = sdepth;\r
+\r
+ dst.create(src.size(), CV_MAKETYPE(ddepth, cn));\r
+\r
+ Ptr<FilterEngine_GPU> f = createBoxFilter_GPU(src.type(), dst.type(), ksize, anchor);\r
+ f->apply(src, dst);\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Morphology Filter\r
+\r
+namespace\r
+{\r
+ typedef NppStatus (*nppMorfFilter_t)(const Npp8u*, Npp32s, Npp8u*, Npp32s, NppiSize, const Npp8u*, NppiSize, NppiPoint);\r
+\r
+ class NPPMorphFilter : public BaseFilter_GPU\r
+ {\r
+ public:\r
+ NPPMorphFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, nppMorfFilter_t func_) : \r
+ BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+ NppiSize oKernelSize;\r
+ oKernelSize.height = ksize.height;\r
+ oKernelSize.width = ksize.width;\r
+ NppiPoint oAnchor;\r
+ oAnchor.x = anchor.x;\r
+ oAnchor.y = anchor.y;\r
+\r
+ nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );\r
+ }\r
+\r
+ GpuMat kernel;\r
+ nppMorfFilter_t func;\r
+ };\r
+}\r
+\r
+Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, Point anchor)\r
+{\r
+ static const nppMorfFilter_t nppMorfFilter_callers[2][5] = \r
+ {\r
+ {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R },\r
+ {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R }\r
+ };\r
+ \r
+ CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE); \r
+ CV_Assert(type == CV_8UC1 || type == CV_8UC4); \r
+ \r
+ GpuMat gpu_krnl;\r
+ normalizeKernel(kernel, gpu_krnl);\r
+ normalizeAnchor(anchor, ksize);\r
+ \r
+ return Ptr<BaseFilter_GPU>(new NPPMorphFilter(ksize, anchor, gpu_krnl, nppMorfFilter_callers[op][CV_MAT_CN(type)]));\r
+}\r
+\r
+namespace\r
+{\r
+ class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU\r
+ {\r
+ public:\r
+ MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int iters_) : \r
+ Filter2DEngine_GPU(filter2D_), iters(iters_) {}\r
+\r
+ virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
+ {\r
+ if (iters > 1)\r
+ morfBuf.create(src.size(), src.type());\r
+\r
+ Filter2DEngine_GPU::apply(src, dst);\r
+ for(int i = 1; i < iters; ++i)\r
+ {\r
+ dst.swap(morfBuf);\r
+ Filter2DEngine_GPU::apply(morfBuf, dst);\r
+ }\r
+ }\r
+\r
+ int iters;\r
+ GpuMat morfBuf;\r
+ };\r
+}\r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Point& anchor, int iterations)\r
+{\r
+ CV_Assert(iterations > 0);\r
+\r
+ Size ksize = kernel.size();\r
+\r
+ Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor);\r
+\r
+ return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, iterations));\r
+}\r
+\r
+namespace\r
+{\r
+ void morphOp(int op, const GpuMat& src, GpuMat& dst, const Mat& _kernel, Point anchor, int iterations)\r
+ {\r
+ Mat kernel;\r
+ Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);\r
+\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ if (iterations == 0 || _kernel.rows * _kernel.cols == 1)\r
+ {\r
+ src.copyTo(dst);\r
+ return;\r
+ }\r
+\r
+ dst.create(src.size(), src.type());\r
+\r
+ if (!_kernel.data)\r
+ {\r
+ kernel = getStructuringElement(MORPH_RECT, Size(1 + iterations * 2, 1 + iterations * 2));\r
+ anchor = Point(iterations, iterations);\r
+ iterations = 1;\r
+ }\r
+ else if (iterations > 1 && countNonZero(_kernel) == _kernel.rows * _kernel.cols)\r
+ {\r
+ anchor = Point(anchor.x * iterations, anchor.y * iterations);\r
+ kernel = getStructuringElement(MORPH_RECT, Size(ksize.width + iterations * (ksize.width - 1), \r
+ ksize.height + iterations * (ksize.height - 1)), anchor);\r
+ iterations = 1;\r
+ }\r
+ else\r
+ kernel = _kernel;\r
+\r
+ Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);\r
+\r
+ f->apply(src, dst);\r
+ }\r
+}\r
+\r
+void cv::gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations)\r
+{\r
+ morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations);\r
+}\r
+\r
+void cv::gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations)\r
+{\r
+ morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations);\r
+}\r
+\r
+void cv::gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations)\r
+{\r
+ GpuMat temp;\r
+ switch( op )\r
+ {\r
+ case MORPH_ERODE: erode( src, dst, kernel, anchor, iterations); break; \r
+ case MORPH_DILATE: dilate( src, dst, kernel, anchor, iterations); break; \r
+ case MORPH_OPEN:\r
+ erode( src, temp, kernel, anchor, iterations);\r
+ dilate( temp, dst, kernel, anchor, iterations);\r
+ break;\r
+ case CV_MOP_CLOSE:\r
+ dilate( src, temp, kernel, anchor, iterations);\r
+ erode( temp, dst, kernel, anchor, iterations);\r
+ break;\r
+ case CV_MOP_GRADIENT:\r
+ erode( src, temp, kernel, anchor, iterations);\r
+ dilate( src, dst, kernel, anchor, iterations); \r
+ subtract(dst, temp, dst);\r
+ break;\r
+ case CV_MOP_TOPHAT:\r
+ erode( src, dst, kernel, anchor, iterations);\r
+ dilate( dst, temp, kernel, anchor, iterations); \r
+ subtract(src, temp, dst);\r
+ break;\r
+ case CV_MOP_BLACKHAT:\r
+ dilate( src, dst, kernel, anchor, iterations);\r
+ erode( dst, temp, kernel, anchor, iterations);\r
+ subtract(temp, src, dst);\r
+ break;\r
+ default:\r
+ CV_Error( CV_StsBadArg, "unknown morphological operation" );\r
+ }\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Linear Filter\r
+\r
+namespace\r
+{\r
+ typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, \r
+ const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor);\r
+\r
+ class NPPLinearFilter : public BaseFilter_GPU\r
+ {\r
+ public:\r
+ NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) : \r
+ BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+ NppiSize oKernelSize;\r
+ oKernelSize.height = ksize.height;\r
+ oKernelSize.width = ksize.width;\r
+ NppiPoint oAnchor;\r
+ oAnchor.x = anchor.x;\r
+ oAnchor.y = anchor.y;\r
+ \r
+ nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, \r
+ kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );\r
+ }\r
+\r
+ GpuMat kernel;\r
+ Npp32s nDivisor;\r
+ nppFilter2D_t func;\r
+ };\r
+}\r
+\r
+Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize, Point anchor)\r
+{\r
+ static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};\r
+\r
+ CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);\r
+ \r
+ GpuMat gpu_krnl;\r
+ int nDivisor;\r
+ normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));\r
+} \r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Point& anchor)\r
+{\r
+ Size ksize = kernel.size();\r
+\r
+ Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, ksize, anchor);\r
+\r
+ return createFilter2D_GPU(linearFilter);\r
+}\r
+\r
+void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor)\r
+{\r
+ if( ddepth < 0 )\r
+ ddepth = src.depth();\r
+\r
+ dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));\r
+\r
+ Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor);\r
+ f->apply(src, dst);\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Separable Linear Filter\r
+\r
+namespace cv { namespace gpu { namespace filters\r
+{\r
+ void linearRowFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ void linearRowFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ void linearRowFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ void linearRowFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+\r
+ void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ void linearColumnFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ void linearColumnFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+ void linearColumnFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+}}}\r
+\r
+namespace\r
+{\r
+ typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI, \r
+ const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);\r
+\r
+ typedef void (*gpuFilter1D_t)(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor);\r
+\r
+ class NppLinearRowFilter : public BaseRowFilter_GPU\r
+ {\r
+ public:\r
+ NppLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) : \r
+ BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );\r
+ }\r
+\r
+ GpuMat kernel;\r
+ Npp32s nDivisor;\r
+ nppFilter1D_t func;\r
+ };\r
+\r
+ class GpuLinearRowFilter : public BaseRowFilter_GPU\r
+ {\r
+ public:\r
+ GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_) : \r
+ BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ func(src, dst, kernel.ptr<float>(), ksize, anchor);\r
+ }\r
+\r
+ Mat kernel;\r
+ gpuFilter1D_t func;\r
+ };\r
+}\r
+\r
+Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor)\r
+{\r
+ using namespace cv::gpu::filters;\r
+ static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};\r
+ static const gpuFilter1D_t gpuFilter1D_callers[6][6] =\r
+ {\r
+ {0,0,0,0,0,0},\r
+ {0,0,0,0,0,0},\r
+ {0,0,0,0,0,0},\r
+ {0,0,0,0,0,0},\r
+ {0,0,0,0,linearRowFilter_gpu_32s32s, linearRowFilter_gpu_32s32f},\r
+ {0,0,0,0,linearRowFilter_gpu_32f32s, linearRowFilter_gpu_32f32f}\r
+ };\r
+ \r
+ if ((srcType == CV_8UC1 || srcType == CV_8UC4) && bufType == srcType)\r
+ {\r
+ GpuMat gpu_row_krnl;\r
+ int nDivisor;\r
+ normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nDivisor, true);\r
+\r
+ int ksize = gpu_row_krnl.cols;\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor,\r
+ nppFilter1D_callers[CV_MAT_CN(srcType)]));\r
+ }\r
+ else if ((srcType == CV_32SC1 || srcType == CV_32FC1) && (bufType == CV_32SC1 || bufType == CV_32FC1))\r
+ {\r
+ Mat temp(rowKernel.size(), CV_32FC1);\r
+ rowKernel.convertTo(temp, CV_32FC1);\r
+ Mat cont_krnl = temp.reshape(1, 1);\r
+\r
+ int ksize = cont_krnl.cols;\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, cont_krnl, \r
+ gpuFilter1D_callers[CV_MAT_DEPTH(srcType)][CV_MAT_DEPTH(bufType)]));\r
+ }\r
+\r
+ CV_Assert(!"Unsupported types"); \r
+ return Ptr<BaseRowFilter_GPU>(0);\r
+}\r
+\r
+namespace\r
+{\r
+ class NppLinearColumnFilter : public BaseColumnFilter_GPU\r
+ {\r
+ public:\r
+ NppLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) : \r
+ BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );\r
+ }\r
+\r
+ GpuMat kernel;\r
+ Npp32s nDivisor;\r
+ nppFilter1D_t func;\r
+ };\r
+\r
+ class GpuLinearColumnFilter : public BaseColumnFilter_GPU\r
+ {\r
+ public:\r
+ GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_) : \r
+ BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ func(src, dst, kernel.ptr<float>(), ksize, anchor);\r
+ }\r
+\r
+ Mat kernel;\r
+ gpuFilter1D_t func;\r
+ };\r
+}\r
+\r
+Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor)\r
+{\r
+ using namespace cv::gpu::filters;\r
+ static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};\r
+ static const gpuFilter1D_t gpuFilter1D_callers[6][6] =\r
+ {\r
+ {0,0,0,0,0,0},\r
+ {0,0,0,0,0,0},\r
+ {0,0,0,0,0,0},\r
+ {0,0,0,0,0,0},\r
+ {0,0,0,0,linearColumnFilter_gpu_32s32s, linearColumnFilter_gpu_32s32f},\r
+ {0,0,0,0,linearColumnFilter_gpu_32f32s, linearColumnFilter_gpu_32f32f}\r
+ };\r
+ \r
+ if ((bufType == CV_8UC1 || bufType == CV_8UC4) && dstType == bufType)\r
+ {\r
+ GpuMat gpu_col_krnl;\r
+ int nDivisor;\r
+ normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nDivisor, true);\r
+\r
+ int ksize = gpu_col_krnl.cols;\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor, \r
+ nppFilter1D_callers[CV_MAT_CN(bufType)]));\r
+ }\r
+ else if ((bufType == CV_32SC1 || bufType == CV_32FC1) && (dstType == CV_32SC1 || dstType == CV_32FC1))\r
+ {\r
+ Mat temp(columnKernel.size(), CV_32FC1);\r
+ columnKernel.convertTo(temp, CV_32FC1);\r
+ Mat cont_krnl = temp.reshape(1, 1);\r
+\r
+ int ksize = cont_krnl.cols;\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl, \r
+ gpuFilter1D_callers[CV_MAT_DEPTH(bufType)][CV_MAT_DEPTH(dstType)]));\r
+ }\r
+\r
+ CV_Assert(!"Unsupported types"); \r
+ return Ptr<BaseColumnFilter_GPU>(0);\r
+}\r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, \r
+ const Point& anchor)\r
+{\r
+ int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);\r
+ int cn = CV_MAT_CN(srcType);\r
+ int bdepth = std::max(sdepth, ddepth);\r
+ int bufType = CV_MAKETYPE(bdepth, cn);\r
+\r
+ Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x);\r
+ Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y);\r
+\r
+ return createSeparableFilter_GPU(rowFilter, columnFilter);\r
+}\r
+\r
+void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor)\r
+{\r
+ if( ddepth < 0 )\r
+ ddepth = src.depth();\r
+\r
+ dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));\r
+\r
+ Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor);\r
+ f->apply(src, dst);\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Deriv Filter\r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize)\r
+{\r
+ Mat kx, ky;\r
+ getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);\r
+ return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1));\r
+}\r
+\r
+void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale)\r
+{\r
+ Mat kx, ky;\r
+ getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);\r
+\r
+ if (scale != 1)\r
+ {\r
+ // usually the smoothing part is the slowest to compute,\r
+ // so try to scale it instead of the faster differenciating part\r
+ if (dx == 0)\r
+ kx *= scale;\r
+ else\r
+ ky *= scale;\r
+ }\r
+ \r
+ sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1));\r
+}\r
+\r
+void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale)\r
+{\r
+ Mat kx, ky;\r
+ getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);\r
+\r
+ if( scale != 1 )\r
+ {\r
+ // usually the smoothing part is the slowest to compute,\r
+ // so try to scale it instead of the faster differenciating part\r
+ if( dx == 0 )\r
+ kx *= scale;\r
+ else\r
+ ky *= scale;\r
+ }\r
+\r
+ sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1));\r
+}\r
+\r
+void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale)\r
+{\r
+ CV_Assert(ksize == 1 || ksize == 3);\r
+\r
+ static const int K[2][9] =\r
+ {\r
+ {0, 1, 0, 1, -4, 1, 0, 1, 0},\r
+ {2, 0, 2, 0, -8, 0, 2, 0, 2}\r
+ };\r
+ Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]);\r
+ if (scale != 1)\r
+ kernel *= scale;\r
+ \r
+ filter2D(src, dst, ddepth, kernel, Point(-1,-1));\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Gaussian Filter\r
+\r
+Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2)\r
+{ \r
+ int depth = CV_MAT_DEPTH(type);\r
+\r
+ if (sigma2 <= 0)\r
+ sigma2 = sigma1;\r
+\r
+ // automatic detection of kernel size from sigma\r
+ if (ksize.width <= 0 && sigma1 > 0)\r
+ ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;\r
+ if (ksize.height <= 0 && sigma2 > 0)\r
+ ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;\r
+\r
+ CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 );\r
+\r
+ sigma1 = std::max(sigma1, 0.0);\r
+ sigma2 = std::max(sigma2, 0.0);\r
+\r
+ Mat kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) );\r
+ Mat ky;\r
+ if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON )\r
+ ky = kx;\r
+ else\r
+ ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) );\r
+\r
+ return createSeparableLinearFilter_GPU(type, type, kx, ky);\r
+}\r
+\r
+void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2)\r
+{\r
+ if (ksize.width == 1 && ksize.height == 1)\r
+ {\r
+ src.copyTo(dst);\r
+ return;\r
+ }\r
+\r
+ dst.create(src.size(), src.type());\r
+ \r
+ Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2);\r
+ f->apply(src, dst);\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////////////////////////////////\r
+// Image Rank Filter\r
+\r
+namespace\r
+{\r
+ typedef NppStatus (*nppFilterRank_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, \r
+ NppiSize oMaskSize, NppiPoint oAnchor);\r
+\r
+ class NPPRankFilter : public BaseFilter_GPU\r
+ {\r
+ public:\r
+ NPPRankFilter(const Size& ksize_, const Point& anchor_, nppFilterRank_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}\r
+\r
+ virtual void operator()(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+ NppiSize oKernelSize;\r
+ oKernelSize.height = ksize.height;\r
+ oKernelSize.width = ksize.width;\r
+ NppiPoint oAnchor;\r
+ oAnchor.x = anchor.x;\r
+ oAnchor.y = anchor.y;\r
+ \r
+ nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );\r
+ }\r
+\r
+ nppFilterRank_t func;\r
+ };\r
+}\r
+\r
+Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)\r
+{\r
+ static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R};\r
+\r
+ CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); \r
+\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseFilter_GPU>(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)]));\r
+}\r
+\r
+Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)\r
+{\r
+ static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R};\r
+\r
+ CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); \r
+\r
+ normalizeAnchor(anchor, ksize);\r
+\r
+ return Ptr<BaseFilter_GPU>(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)]));\r
+}\r
+\r
+#endif\r
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
-//\r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other materials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#include "precomp.hpp"\r
-\r
-using namespace cv;\r
-using namespace cv::gpu;\r
-\r
-\r
-#if !defined (HAVE_CUDA)\r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, bool) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }\r
-Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }\r
-Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int, int, const Size&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const GpuMat&, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const GpuMat&, const Size&, Point, int) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const GpuMat&, int, int) { throw_nogpu(); return Ptr<BaseRowFilter_GPU>(0); }\r
-Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const GpuMat&, int, int) { throw_nogpu(); return Ptr<BaseColumnFilter_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, bool) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }\r
-Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
-Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }\r
-\r
-void cv::gpu::boxFilter(const GpuMat&, GpuMat&, int, Size, Point) { throw_nogpu(); }\r
-void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }\r
-void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }\r
-void cv::gpu::morphologyEx( const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_nogpu(); }\r
-void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point) { throw_nogpu(); }\r
-void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, bool) { throw_nogpu(); }\r
-void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double) { throw_nogpu(); }\r
-void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double) { throw_nogpu(); }\r
-void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double) { throw_nogpu(); }\r
-void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double) { throw_nogpu(); }\r
-\r
-#else\r
-\r
-namespace\r
-{\r
- inline void normalizeAnchor(int& anchor, int ksize)\r
- {\r
- if (anchor < 0)\r
- anchor = ksize >> 1;\r
-\r
- CV_Assert(0 <= anchor && anchor < ksize);\r
- }\r
-\r
- inline void normalizeAnchor(Point& anchor, const Size& ksize)\r
- {\r
- normalizeAnchor(anchor.x, ksize.width);\r
- normalizeAnchor(anchor.y, ksize.height);\r
- }\r
-\r
- inline void normalizeROI(Rect& roi, const Size& ksize, const Point& anchor, const Size& src_size)\r
- {\r
- if (roi == Rect(0,0,-1,-1))\r
- roi = Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height);\r
-\r
- CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height);\r
- }\r
-\r
- inline void normalizeKernel(const Mat& kernel, GpuMat& gpu_krnl, int type = CV_8U, int* nDivisor = 0, bool reverse = false)\r
- {\r
- int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;\r
- if (nDivisor) *nDivisor = scale;\r
- \r
- Mat temp(kernel.size(), type);\r
- kernel.convertTo(temp, type, scale);\r
- Mat cont_krnl = temp.reshape(1, 1);\r
-\r
- if (reverse)\r
- {\r
- int count = cont_krnl.cols >> 1;\r
- for (int i = 0; i < count; ++i)\r
- {\r
- std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i));\r
- }\r
- }\r
-\r
- gpu_krnl.upload(cont_krnl);\r
- } \r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// Filter2D\r
-\r
-namespace\r
-{\r
- class Filter2DEngine_GPU : public FilterEngine_GPU\r
- {\r
- public:\r
- Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_) : filter2D(filter2D_) {}\r
-\r
- virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
- {\r
- Size src_size = src.size();\r
-\r
- dst.create(src_size, src.type());\r
- dst = Scalar(0.0);\r
-\r
- normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size);\r
-\r
- GpuMat srcROI = src(roi);\r
- GpuMat dstROI = dst(roi);\r
-\r
- (*filter2D)(srcROI, dstROI);\r
- }\r
-\r
- Ptr<BaseFilter_GPU> filter2D;\r
- };\r
-}\r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU> filter2D)\r
-{\r
- return Ptr<FilterEngine_GPU>(new Filter2DEngine_GPU(filter2D));\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// SeparableFilter\r
-\r
-namespace\r
-{\r
- struct RowColumnFilterApply\r
- {\r
- void operator()(Ptr<BaseRowFilter_GPU>& rowFilter, Ptr<BaseColumnFilter_GPU>& columnFilter, \r
- GpuMat& srcROI, GpuMat& dstROI, GpuMat& dstBufROI)\r
- {\r
- (*rowFilter)(srcROI, dstBufROI);\r
- (*columnFilter)(dstBufROI, dstROI);\r
- }\r
- };\r
- struct ColumnRowFilterApply\r
- {\r
- void operator()(Ptr<BaseRowFilter_GPU>& rowFilter, Ptr<BaseColumnFilter_GPU>& columnFilter, \r
- GpuMat& srcROI, GpuMat& dstROI, GpuMat& dstBufROI)\r
- {\r
- (*columnFilter)(srcROI, dstBufROI);\r
- (*rowFilter)(dstBufROI, dstROI);\r
- }\r
- };\r
- class SeparableFilterEngine_GPU_base : public FilterEngine_GPU\r
- {\r
- public:\r
- SeparableFilterEngine_GPU_base(const Ptr<BaseRowFilter_GPU>& rowFilter_, \r
- const Ptr<BaseColumnFilter_GPU>& columnFilter_) :\r
- rowFilter(rowFilter_), columnFilter(columnFilter_)\r
- {\r
- ksize = Size(rowFilter->ksize, columnFilter->ksize);\r
- anchor = Point(rowFilter->anchor, columnFilter->anchor);\r
- }\r
-\r
- virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
- {\r
- Size src_size = src.size();\r
- int src_type = src.type();\r
-\r
- dst.create(src_size, src_type);\r
- dst = Scalar(0.0);\r
- dstBuf.create(src_size, src_type);\r
- dstBuf = Scalar(0.0);\r
-\r
- normalizeROI(roi, ksize, anchor, src_size);\r
-\r
- srcROI = src(roi);\r
- dstROI = dst(roi);\r
- dstBufROI = dstBuf(roi);\r
- }\r
-\r
- Ptr<BaseRowFilter_GPU> rowFilter;\r
- Ptr<BaseColumnFilter_GPU> columnFilter;\r
- Size ksize;\r
- Point anchor;\r
- GpuMat dstBuf;\r
- GpuMat srcROI;\r
- GpuMat dstROI;\r
- GpuMat dstBufROI;\r
- };\r
- template <typename FA>\r
- class SeparableFilterEngine_GPU : public SeparableFilterEngine_GPU_base\r
- {\r
- public:\r
- SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, \r
- const Ptr<BaseColumnFilter_GPU>& columnFilter_, FA fa_) :\r
- SeparableFilterEngine_GPU_base(rowFilter_, columnFilter_), fa(fa_)\r
- {\r
- }\r
-\r
- virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
- {\r
- SeparableFilterEngine_GPU_base::apply(src, dst, roi);\r
- fa(rowFilter, columnFilter, srcROI, dstROI, dstBufROI);\r
- }\r
-\r
- FA fa;\r
- };\r
-}\r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter, \r
- const Ptr<BaseColumnFilter_GPU>& columnFilter, bool rowFilterFirst)\r
-{\r
- if (rowFilterFirst)\r
- return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU<RowColumnFilterApply>(rowFilter, columnFilter, RowColumnFilterApply()));\r
- return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU<ColumnRowFilterApply>(rowFilter, columnFilter, ColumnRowFilterApply()));\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// 1D Sum Filter\r
-\r
-namespace\r
-{\r
- class NppRowSumFilter : public BaseRowFilter_GPU\r
- {\r
- public:\r
- NppRowSumFilter(int ksize_, int anchor_) : BaseRowFilter_GPU(ksize_, anchor_) {}\r
-\r
- virtual void operator()(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );\r
- }\r
- };\r
-}\r
-\r
-Ptr<BaseRowFilter_GPU> cv::gpu::getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor)\r
-{\r
- CV_Assert(srcType == CV_8UC1 && sumType == CV_32FC1);\r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- return Ptr<BaseRowFilter_GPU>(new NppRowSumFilter(ksize, anchor));\r
-}\r
-\r
-namespace\r
-{\r
- class NppColumnSumFilter : public BaseColumnFilter_GPU\r
- {\r
- public:\r
- NppColumnSumFilter(int ksize_, int anchor_) : BaseColumnFilter_GPU(ksize_, anchor_) {}\r
-\r
- virtual void operator()(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, ksize, anchor) );\r
- }\r
- };\r
-}\r
-\r
-Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor)\r
-{\r
- CV_Assert(sumType == CV_8UC1 && dstType == CV_32FC1);\r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- return Ptr<BaseColumnFilter_GPU>(new NppColumnSumFilter(ksize, anchor));\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// Box Filter\r
-\r
-namespace\r
-{\r
- typedef NppStatus (*nppFilterBox_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, \r
- NppiSize oMaskSize, NppiPoint oAnchor);\r
-\r
- class NPPBoxFilter : public BaseFilter_GPU\r
- {\r
- public:\r
- NPPBoxFilter(const Size& ksize_, const Point& anchor_, nppFilterBox_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}\r
-\r
- virtual void operator()(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
- NppiSize oKernelSize;\r
- oKernelSize.height = ksize.height;\r
- oKernelSize.width = ksize.width;\r
- NppiPoint oAnchor;\r
- oAnchor.x = anchor.x;\r
- oAnchor.y = anchor.y;\r
- \r
- nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );\r
- }\r
-\r
- nppFilterBox_t func;\r
- };\r
-}\r
-\r
-Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)\r
-{\r
- static const nppFilterBox_t nppFilterBox_callers[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R};\r
-\r
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); \r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- return Ptr<BaseFilter_GPU>(new NPPBoxFilter(ksize, anchor, nppFilterBox_callers[CV_MAT_CN(srcType)]));\r
-}\r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, const Point& anchor)\r
-{\r
- Ptr<BaseFilter_GPU> boxFilter = getBoxFilter_GPU(srcType, dstType, ksize, anchor);\r
- return createFilter2D_GPU(boxFilter);\r
-}\r
-\r
-void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor)\r
-{\r
- int sdepth = src.depth(), cn = src.channels();\r
- if( ddepth < 0 )\r
- ddepth = sdepth;\r
-\r
- dst.create(src.size(), CV_MAKETYPE(ddepth, cn));\r
-\r
- Ptr<FilterEngine_GPU> f = createBoxFilter_GPU(src.type(), dst.type(), ksize, anchor);\r
- f->apply(src, dst);\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// Morphology Filter\r
-\r
-namespace\r
-{\r
- typedef NppStatus (*nppMorfFilter_t)(const Npp8u*, Npp32s, Npp8u*, Npp32s, NppiSize, const Npp8u*, NppiSize, NppiPoint);\r
-\r
- class NPPMorphFilter : public BaseFilter_GPU\r
- {\r
- public:\r
- NPPMorphFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, nppMorfFilter_t func_) : \r
- BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}\r
-\r
- virtual void operator()(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
- NppiSize oKernelSize;\r
- oKernelSize.height = ksize.height;\r
- oKernelSize.width = ksize.width;\r
- NppiPoint oAnchor;\r
- oAnchor.x = anchor.x;\r
- oAnchor.y = anchor.y;\r
-\r
- nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );\r
- }\r
-\r
- GpuMat kernel;\r
- nppMorfFilter_t func;\r
- };\r
-}\r
-\r
-Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const GpuMat& kernel, const Size& ksize, Point anchor)\r
-{\r
- static const nppMorfFilter_t nppMorfFilter_callers[2][5] = \r
- {\r
- {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R },\r
- {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R }\r
- };\r
- \r
- CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE); \r
- CV_Assert(type == CV_8UC1 || type == CV_8UC4); \r
- CV_Assert(kernel.type() == CV_8UC1 && kernel.rows == 1 && kernel.cols == ksize.area());\r
- \r
- normalizeAnchor(anchor, ksize);\r
- \r
- return Ptr<BaseFilter_GPU>(new NPPMorphFilter(ksize, anchor, kernel, nppMorfFilter_callers[op][CV_MAT_CN(type)]));\r
-}\r
-\r
-namespace\r
-{\r
- class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU\r
- {\r
- public:\r
- MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int iters_) : \r
- Filter2DEngine_GPU(filter2D_), iters(iters_) {}\r
-\r
- virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1))\r
- {\r
- if (iters > 1)\r
- morfBuf.create(src.size(), src.type());\r
-\r
- Filter2DEngine_GPU::apply(src, dst);\r
- for(int i = 1; i < iters; ++i)\r
- {\r
- dst.swap(morfBuf);\r
- Filter2DEngine_GPU::apply(morfBuf, dst);\r
- }\r
- }\r
-\r
- int iters;\r
- GpuMat morfBuf;\r
- };\r
-}\r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Point& anchor, int iterations)\r
-{\r
- CV_Assert(iterations > 0);\r
-\r
- Size ksize = kernel.size();\r
-\r
- GpuMat gpu_krnl;\r
- normalizeKernel(kernel, gpu_krnl);\r
-\r
- Ptr<BaseFilter_GPU> filter2D = getMorphologyFilter_GPU(op, type, gpu_krnl, ksize, anchor);\r
-\r
- return Ptr<FilterEngine_GPU>(new MorphologyFilterEngine_GPU(filter2D, iterations));\r
-}\r
-\r
-namespace\r
-{\r
- void morphOp(int op, const GpuMat& src, GpuMat& dst, const Mat& _kernel, Point anchor, int iterations)\r
- {\r
- Mat kernel;\r
- Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);\r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- if (iterations == 0 || _kernel.rows * _kernel.cols == 1)\r
- {\r
- src.copyTo(dst);\r
- return;\r
- }\r
-\r
- dst.create(src.size(), src.type());\r
-\r
- if (!_kernel.data)\r
- {\r
- kernel = getStructuringElement(MORPH_RECT, Size(1 + iterations * 2, 1 + iterations * 2));\r
- anchor = Point(iterations, iterations);\r
- iterations = 1;\r
- }\r
- else if (iterations > 1 && countNonZero(_kernel) == _kernel.rows * _kernel.cols)\r
- {\r
- anchor = Point(anchor.x * iterations, anchor.y * iterations);\r
- kernel = getStructuringElement(MORPH_RECT, Size(ksize.width + iterations * (ksize.width - 1), \r
- ksize.height + iterations * (ksize.height - 1)), anchor);\r
- iterations = 1;\r
- }\r
- else\r
- kernel = _kernel;\r
-\r
- Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);\r
-\r
- f->apply(src, dst);\r
- }\r
-}\r
-\r
-void cv::gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations)\r
-{\r
- morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations);\r
-}\r
-\r
-void cv::gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations)\r
-{\r
- morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations);\r
-}\r
-\r
-void cv::gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations)\r
-{\r
- GpuMat temp;\r
- switch( op )\r
- {\r
- case MORPH_ERODE: erode( src, dst, kernel, anchor, iterations); break; \r
- case MORPH_DILATE: dilate( src, dst, kernel, anchor, iterations); break; \r
- case MORPH_OPEN:\r
- erode( src, temp, kernel, anchor, iterations);\r
- dilate( temp, dst, kernel, anchor, iterations);\r
- break;\r
- case CV_MOP_CLOSE:\r
- dilate( src, temp, kernel, anchor, iterations);\r
- erode( temp, dst, kernel, anchor, iterations);\r
- break;\r
- case CV_MOP_GRADIENT:\r
- erode( src, temp, kernel, anchor, iterations);\r
- dilate( src, dst, kernel, anchor, iterations); \r
- subtract(dst, temp, dst);\r
- break;\r
- case CV_MOP_TOPHAT:\r
- erode( src, dst, kernel, anchor, iterations);\r
- dilate( dst, temp, kernel, anchor, iterations); \r
- subtract(src, temp, dst);\r
- break;\r
- case CV_MOP_BLACKHAT:\r
- dilate( src, dst, kernel, anchor, iterations);\r
- erode( dst, temp, kernel, anchor, iterations);\r
- subtract(temp, src, dst);\r
- break;\r
- default:\r
- CV_Error( CV_StsBadArg, "unknown morphological operation" );\r
- }\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// Linear Filter\r
-\r
-namespace\r
-{\r
- typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, \r
- const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor);\r
-\r
- class NPPLinearFilter : public BaseFilter_GPU\r
- {\r
- public:\r
- NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) : \r
- BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}\r
-\r
- virtual void operator()(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
- NppiSize oKernelSize;\r
- oKernelSize.height = ksize.height;\r
- oKernelSize.width = ksize.width;\r
- NppiPoint oAnchor;\r
- oAnchor.x = anchor.x;\r
- oAnchor.y = anchor.y;\r
- \r
- nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, \r
- kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );\r
- }\r
-\r
- GpuMat kernel;\r
- Npp32s nDivisor;\r
- nppFilter2D_t func;\r
- };\r
-}\r
-\r
-Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const GpuMat& kernel, const Size& ksize, Point anchor, int nDivisor)\r
-{\r
- static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};\r
-\r
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); \r
- CV_Assert(kernel.type() == CV_32SC1 && kernel.rows == 1 && kernel.cols == ksize.area());\r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, kernel, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));\r
-} \r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Point& anchor)\r
-{\r
- Size ksize = kernel.size();\r
-\r
- GpuMat gpu_krnl;\r
- int nDivisor;\r
- normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);\r
-\r
- Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, gpu_krnl, ksize, anchor, nDivisor);\r
-\r
- return createFilter2D_GPU(linearFilter);\r
-}\r
-\r
-void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor)\r
-{\r
- if( ddepth < 0 )\r
- ddepth = src.depth();\r
-\r
- dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));\r
-\r
- Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst.type(), kernel, anchor);\r
- f->apply(src, dst);\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// Separable Linear Filter\r
-\r
-namespace\r
-{\r
- typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI, \r
- const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);\r
-\r
- class NppLinearRowFilter : public BaseRowFilter_GPU\r
- {\r
- public:\r
- NppLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) : \r
- BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}\r
-\r
- virtual void operator()(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );\r
- }\r
-\r
- GpuMat kernel;\r
- Npp32s nDivisor;\r
- nppFilter1D_t func;\r
- };\r
-}\r
-\r
-Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const GpuMat& rowKernel, int anchor, int nDivisor)\r
-{\r
- static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};\r
-\r
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && bufType == srcType);\r
- CV_Assert(rowKernel.type() == CV_32SC1 && rowKernel.rows == 1);\r
-\r
- int ksize = rowKernel.cols;\r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, rowKernel, nDivisor, nppFilter1D_callers[CV_MAT_CN(srcType)]));\r
-}\r
-\r
-namespace\r
-{\r
- class NppLinearColumnFilter : public BaseColumnFilter_GPU\r
- {\r
- public:\r
- NppLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) : \r
- BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}\r
-\r
- virtual void operator()(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );\r
- }\r
-\r
- GpuMat kernel;\r
- Npp32s nDivisor;\r
- nppFilter1D_t func;\r
- };\r
-}\r
-\r
-Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const GpuMat& columnKernel, int anchor, int nDivisor)\r
-{\r
- static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};\r
-\r
- CV_Assert((bufType == CV_8UC1 || bufType == CV_8UC4) && dstType == bufType);\r
- CV_Assert(columnKernel.type() == CV_32SC1 && columnKernel.rows == 1);\r
-\r
- int ksize = columnKernel.cols;\r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, columnKernel, nDivisor, nppFilter1D_callers[CV_MAT_CN(bufType)]));\r
-}\r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, \r
- const Point& anchor, bool rowFilterFirst)\r
-{\r
- int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);\r
- int cn = CV_MAT_CN(srcType);\r
- int bdepth = std::max(sdepth, ddepth);\r
- int bufType = CV_MAKETYPE(bdepth, cn);\r
-\r
- GpuMat gpu_row_krnl, gpu_col_krnl;\r
- int nRowDivisor, nColDivisor;\r
- normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nRowDivisor, true);\r
- normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nColDivisor, true);\r
-\r
- Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, gpu_row_krnl, anchor.x, nRowDivisor);\r
- Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, gpu_col_krnl, anchor.y, nColDivisor);\r
-\r
- return createSeparableFilter_GPU(rowFilter, columnFilter, rowFilterFirst);\r
-}\r
-\r
-void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor, bool rowFilterFirst)\r
-{\r
- if( ddepth < 0 )\r
- ddepth = src.depth();\r
-\r
- dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));\r
-\r
- Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowFilterFirst);\r
- f->apply(src, dst);\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// Deriv Filter\r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize)\r
-{\r
- Mat kx, ky;\r
- getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);\r
- return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1), dx >= dy);\r
-}\r
-\r
-void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale)\r
-{\r
- Mat kx, ky;\r
- getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);\r
-\r
- if (scale != 1)\r
- {\r
- // usually the smoothing part is the slowest to compute,\r
- // so try to scale it instead of the faster differenciating part\r
- if (dx == 0)\r
- kx *= scale;\r
- else\r
- ky *= scale;\r
- }\r
- \r
- sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), dx >= dy);\r
-}\r
-\r
-void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale)\r
-{\r
- Mat kx, ky;\r
- getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);\r
-\r
- if( scale != 1 )\r
- {\r
- // usually the smoothing part is the slowest to compute,\r
- // so try to scale it instead of the faster differenciating part\r
- if( dx == 0 )\r
- kx *= scale;\r
- else\r
- ky *= scale;\r
- }\r
-\r
- sepFilter2D(src, dst, ddepth, kx, ky, Point(-1,-1), dx >= dy);\r
-}\r
-\r
-void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale)\r
-{\r
- CV_Assert(ksize == 1 || ksize == 3);\r
-\r
- static const int K[2][9] =\r
- {\r
- {0, 1, 0, 1, -4, 1, 0, 1, 0},\r
- {2, 0, 2, 0, -8, 0, 2, 0, 2}\r
- };\r
- Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]);\r
- if (scale != 1)\r
- kernel *= scale;\r
- \r
- filter2D(src, dst, ddepth, kernel, Point(-1,-1));\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// Gaussian Filter\r
-\r
-Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2)\r
-{ \r
- int depth = CV_MAT_DEPTH(type);\r
-\r
- if (sigma2 <= 0)\r
- sigma2 = sigma1;\r
-\r
- // automatic detection of kernel size from sigma\r
- if (ksize.width <= 0 && sigma1 > 0)\r
- ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;\r
- if (ksize.height <= 0 && sigma2 > 0)\r
- ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1;\r
-\r
- CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 );\r
-\r
- sigma1 = std::max(sigma1, 0.0);\r
- sigma2 = std::max(sigma2, 0.0);\r
-\r
- Mat kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) );\r
- Mat ky;\r
- if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON )\r
- ky = kx;\r
- else\r
- ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) );\r
-\r
- return createSeparableLinearFilter_GPU(type, type, kx, ky);\r
-}\r
-\r
-void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2)\r
-{\r
- if (ksize.width == 1 && ksize.height == 1)\r
- {\r
- src.copyTo(dst);\r
- return;\r
- }\r
-\r
- dst.create(src.size(), src.type());\r
- \r
- Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2);\r
- f->apply(src, dst);\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////////////////////////////////\r
-// Image Rank Filter\r
-\r
-namespace\r
-{\r
- typedef NppStatus (*nppFilterRank_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, \r
- NppiSize oMaskSize, NppiPoint oAnchor);\r
-\r
- class NPPRankFilter : public BaseFilter_GPU\r
- {\r
- public:\r
- NPPRankFilter(const Size& ksize_, const Point& anchor_, nppFilterRank_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}\r
-\r
- virtual void operator()(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
- NppiSize oKernelSize;\r
- oKernelSize.height = ksize.height;\r
- oKernelSize.width = ksize.width;\r
- NppiPoint oAnchor;\r
- oAnchor.x = anchor.x;\r
- oAnchor.y = anchor.y;\r
- \r
- nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );\r
- }\r
-\r
- nppFilterRank_t func;\r
- };\r
-}\r
-\r
-Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)\r
-{\r
- static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R};\r
-\r
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); \r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- return Ptr<BaseFilter_GPU>(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)]));\r
-}\r
-\r
-Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)\r
-{\r
- static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R};\r
-\r
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); \r
-\r
- normalizeAnchor(anchor, ksize);\r
-\r
- return Ptr<BaseFilter_GPU>(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)]));\r
-}\r
-\r
-#endif\r
\r
namespace cv { namespace gpu \r
{ \r
- namespace improc \r
+ namespace imgproc \r
{\r
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap)\r
{\r
typedef void (*remap_gpu_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);\r
- static const remap_gpu_t callers[] = {improc::remap_gpu_1c, 0, improc::remap_gpu_3c};\r
+ static const remap_gpu_t callers[] = {imgproc::remap_gpu_1c, 0, imgproc::remap_gpu_3c};\r
\r
CV_Assert((src.type() == CV_8U || src.type() == CV_8UC3) && xmap.type() == CV_32F && ymap.type() == CV_32F);\r
\r
eps = 1.f;\r
eps = (float)std::max(criteria.epsilon, 0.0); \r
\r
- improc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps); \r
+ imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps); \r
}\r
\r
////////////////////////////////////////////////////////////////////////\r
eps = 1.f;\r
eps = (float)std::max(criteria.epsilon, 0.0); \r
\r
- improc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps); \r
+ imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps); \r
}\r
\r
////////////////////////////////////////////////////////////////////////\r
out = dst;\r
out.create(src.size(), CV_8UC4);\r
\r
- improc::drawColorDisp_gpu((DevMem2D_<T>)src, out, ndisp, stream);\r
+ imgproc::drawColorDisp_gpu((DevMem2D_<T>)src, out, ndisp, stream);\r
\r
dst = out;\r
}\r
void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)\r
{ \r
xyzw.create(disp.rows, disp.cols, CV_32FC4);\r
- improc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);\r
+ imgproc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);\r
}\r
\r
typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);\r
case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: \r
{\r
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);\r
- static const func_t funcs[] = {improc::RGB2RGB_gpu_8u, 0, improc::RGB2RGB_gpu_16u, 0, 0, improc::RGB2RGB_gpu_32f};\r
+ static const func_t funcs[] = {imgproc::RGB2RGB_gpu_8u, 0, imgproc::RGB2RGB_gpu_16u, 0, 0, imgproc::RGB2RGB_gpu_32f};\r
\r
CV_Assert(scn == 3 || scn == 4);\r
\r
\r
dst.create(sz, CV_8UC2);\r
\r
- improc::RGB2RGB5x5_gpu(src, scn, dst, green_bits, bidx, stream);\r
+ imgproc::RGB2RGB5x5_gpu(src, scn, dst, green_bits, bidx, stream);\r
break;\r
}\r
\r
\r
dst.create(sz, CV_MAKETYPE(depth, dcn));\r
\r
- improc::RGB5x52RGB_gpu(src, green_bits, dst, dcn, bidx, stream);\r
+ imgproc::RGB5x52RGB_gpu(src, green_bits, dst, dcn, bidx, stream);\r
break;\r
}\r
\r
case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:\r
{\r
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);\r
- static const func_t funcs[] = {improc::RGB2Gray_gpu_8u, 0, improc::RGB2Gray_gpu_16u, 0, 0, improc::RGB2Gray_gpu_32f};\r
+ static const func_t funcs[] = {imgproc::RGB2Gray_gpu_8u, 0, imgproc::RGB2Gray_gpu_16u, 0, 0, imgproc::RGB2Gray_gpu_32f};\r
\r
CV_Assert(scn == 3 || scn == 4);\r
\r
\r
dst.create(sz, CV_8UC1);\r
\r
- improc::RGB5x52Gray_gpu(src, green_bits, dst, stream);\r
+ imgproc::RGB5x52Gray_gpu(src, green_bits, dst, stream);\r
break;\r
}\r
\r
case CV_GRAY2BGR: case CV_GRAY2BGRA:\r
{\r
typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);\r
- static const func_t funcs[] = {improc::Gray2RGB_gpu_8u, 0, improc::Gray2RGB_gpu_16u, 0, 0, improc::Gray2RGB_gpu_32f};\r
+ static const func_t funcs[] = {imgproc::Gray2RGB_gpu_8u, 0, imgproc::Gray2RGB_gpu_16u, 0, 0, imgproc::Gray2RGB_gpu_32f};\r
\r
if (dcn <= 0) dcn = 3;\r
\r
\r
dst.create(sz, CV_8UC2);\r
\r
- improc::Gray2RGB5x5_gpu(src, dst, green_bits, stream);\r
+ imgproc::Gray2RGB5x5_gpu(src, dst, green_bits, stream);\r
break;\r
}\r
\r
{\r
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, \r
const void* coeffs, cudaStream_t stream);\r
- static const func_t funcs[] = {improc::RGB2YCrCb_gpu_8u, 0, improc::RGB2YCrCb_gpu_16u, 0, 0, improc::RGB2YCrCb_gpu_32f};\r
+ static const func_t funcs[] = {imgproc::RGB2YCrCb_gpu_8u, 0, imgproc::RGB2YCrCb_gpu_16u, 0, 0, imgproc::RGB2YCrCb_gpu_32f};\r
\r
if (dcn <= 0) dcn = 3;\r
CV_Assert((scn == 3 || scn == 4) && (dcn == 3 || dcn == 4));\r
{\r
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, \r
const void* coeffs, cudaStream_t stream);\r
- static const func_t funcs[] = {improc::YCrCb2RGB_gpu_8u, 0, improc::YCrCb2RGB_gpu_16u, 0, 0, improc::YCrCb2RGB_gpu_32f};\r
+ static const func_t funcs[] = {imgproc::YCrCb2RGB_gpu_8u, 0, imgproc::YCrCb2RGB_gpu_16u, 0, 0, imgproc::YCrCb2RGB_gpu_32f};\r
\r
if (dcn <= 0) dcn = 3;\r
\r
{\r
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, \r
const void* coeffs, cudaStream_t stream);\r
- static const func_t funcs[] = {improc::RGB2XYZ_gpu_8u, 0, improc::RGB2XYZ_gpu_16u, 0, 0, improc::RGB2XYZ_gpu_32f};\r
+ static const func_t funcs[] = {imgproc::RGB2XYZ_gpu_8u, 0, imgproc::RGB2XYZ_gpu_16u, 0, 0, imgproc::RGB2XYZ_gpu_32f};\r
\r
if (dcn <= 0) dcn = 3;\r
\r
{\r
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, \r
const void* coeffs, cudaStream_t stream);\r
- static const func_t funcs[] = {improc::XYZ2RGB_gpu_8u, 0, improc::XYZ2RGB_gpu_16u, 0, 0, improc::XYZ2RGB_gpu_32f};\r
+ static const func_t funcs[] = {imgproc::XYZ2RGB_gpu_8u, 0, imgproc::XYZ2RGB_gpu_16u, 0, 0, imgproc::XYZ2RGB_gpu_32f};\r
\r
if (dcn <= 0) dcn = 3;\r
\r
{\r
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, \r
int hrange, cudaStream_t stream);\r
- static const func_t funcs_hsv[] = {improc::RGB2HSV_gpu_8u, 0, 0, 0, 0, improc::RGB2HSV_gpu_32f};\r
- static const func_t funcs_hls[] = {improc::RGB2HLS_gpu_8u, 0, 0, 0, 0, improc::RGB2HLS_gpu_32f};\r
+ static const func_t funcs_hsv[] = {imgproc::RGB2HSV_gpu_8u, 0, 0, 0, 0, imgproc::RGB2HSV_gpu_32f};\r
+ static const func_t funcs_hls[] = {imgproc::RGB2HLS_gpu_8u, 0, 0, 0, 0, imgproc::RGB2HLS_gpu_32f};\r
\r
if (dcn <= 0) dcn = 3;\r
\r
{\r
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, \r
int hrange, cudaStream_t stream);\r
- static const func_t funcs_hsv[] = {improc::HSV2RGB_gpu_8u, 0, 0, 0, 0, improc::HSV2RGB_gpu_32f};\r
- static const func_t funcs_hls[] = {improc::HLS2RGB_gpu_8u, 0, 0, 0, 0, improc::HLS2RGB_gpu_32f};\r
+ static const func_t funcs_hsv[] = {imgproc::HSV2RGB_gpu_8u, 0, 0, 0, 0, imgproc::HSV2RGB_gpu_32f};\r
+ static const func_t funcs_hls[] = {imgproc::HLS2RGB_gpu_8u, 0, 0, 0, 0, imgproc::HLS2RGB_gpu_32f};\r
\r
if (dcn <= 0) dcn = 3;\r
\r
\r
#else /* !defined (HAVE_CUDA) */\r
\r
+namespace cv \r
+{\r
+ namespace gpu\r
+ {\r
+ namespace matrix_operations\r
+ { \r
+ void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);\r
+\r
+ void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);\r
+ void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);\r
+\r
+ void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);\r
+ }\r
+ }\r
+}\r
+\r
void cv::gpu::GpuMat::upload(const Mat& m)\r
{\r
CV_DbgAssert(!m.empty());\r
//"GPU-NppImageMeanStdDev", // different precision
//"GPU-NppImageExp", // different precision
//"GPU-NppImageLog", // different precision
- //"GPU-NppImageMagnitude", // different precision
"GPU-NppImageCanny", // NPP_TEXTURE_BIND_ERROR
//"GPU-NppImageResize", // different precision
//"GPU-NppImageWarpPerspective", // different precision
//"GPU-NppImageIntegral", // different precision
- //"GPU-NppImageSobel", // ???
- //"GPU-NppImageScharr", // ???
+ //"GPU-NppImageSobel", // sign error
+ //"GPU-NppImageScharr", // sign error
//"GPU-NppImageGaussianBlur", // different precision
0
};