\r
namespace cv { namespace gpu { namespace device\r
{\r
- template <typename T, typename D, typename UnOp>\r
- static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, UnOp op)\r
+ //! Mask accessor\r
+ template<class T> struct MaskReader_\r
+ {\r
+ PtrStep_<T> mask;\r
+ explicit MaskReader_(PtrStep_<T> mask): mask(mask) {} \r
+\r
+ __device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }\r
+ };\r
+\r
+ //! Stub mask accessor\r
+ struct NoMask \r
+ {\r
+ __device__ bool operator()(int y, int x) const { return true; } \r
+ };\r
+\r
+ //! Transform kernels\r
+\r
+ template <typename T, typename D, typename Mask, typename UnOp>\r
+ static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)\r
{\r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
- if (x < src.cols && y < src.rows)\r
+ if (x < src.cols && y < src.rows && mask(y, x))\r
{\r
T src_data = src.ptr(y)[x];\r
- dst.ptr(y)[x] = op(src_data, x, y);\r
+ dst.ptr(y)[x] = op(src_data);\r
}\r
}\r
- template <typename T1, typename T2, typename D, typename BinOp>\r
- static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, BinOp op)\r
+\r
+ template <typename T1, typename T2, typename D, typename Mask, typename BinOp>\r
+ static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)\r
{\r
const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
\r
- if (x < src1.cols && y < src1.rows)\r
+ if (x < src1.cols && y < src1.rows && mask(y, x))\r
{\r
T1 src1_data = src1.ptr(y)[x];\r
T2 src2_data = src2.ptr(y)[x];\r
- dst.ptr(y)[x] = op(src1_data, src2_data, x, y);\r
+ dst.ptr(y)[x] = op(src1_data, src2_data);\r
}\r
- }\r
+ } \r
}}}\r
\r
namespace cv \r
grid.x = divUp(src.cols, threads.x);\r
grid.y = divUp(src.rows, threads.y); \r
\r
- device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, op);\r
+ device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() );\r
grid.x = divUp(src1.cols, threads.x);\r
grid.y = divUp(src1.rows, threads.y); \r
\r
- device::transform<T1, T2, D, BinOp><<<grid, threads, 0, stream>>>(src1, src2, dst, op);\r
+ device::transform<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, device::NoMask(), op);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaThreadSynchronize() ); \r