Applies a fixed-level threshold to each array element.\r
\r
\cvdefCpp{\r
-double threshold(const GpuMat\& src, GpuMat\& dst, double thresh);\r
+double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, \par double maxval, int type);\newline\r
+double threshold(const GpuMat\& src, GpuMat\& dst, double thresh, \par double maxval, int type, const Stream\& stream);\r
}\r
\r
\begin{description}\r
-\cvarg{src}{Source array. Supports only \texttt{CV\_32FC1} type.}\r
+\cvarg{src}{Source array (single-channel, \texttt{CV\_64F} depth doesn't supported).}\r
\cvarg{dst}{Destination array; will have the same size and the same type as \texttt{src}.}\r
\cvarg{thresh}{Threshold value.}\r
+\cvarg{maxVal}{Maximum value to use with \texttt{THRESH\_BINARY} and \texttt{THRESH\_BINARY\_INV} thresholding types.}\r
+\cvarg{thresholdType}{Thresholding type. For details see \cvCppCross{threshold}. \texttt{THRESH\_OTSU} thresholding type doesn't supported.}\r
+\cvarg{stream}{Stream for the asynchronous version.}\r
\end{description}\r
\r
See also: \cvCppCross{threshold}.\r
//! async version\r
CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const Stream& stream);\r
\r
- //! applies fixed threshold to the image.\r
- //! Now supports only THRESH_TRUNC threshold type and one channels float source.\r
- CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh);\r
+ //! applies fixed threshold to the image\r
+ CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type);\r
+ //! async version\r
+ CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, const Stream& stream);\r
\r
//! resizes the image\r
//! Supports INTER_NEAREST, INTER_LINEAR\r
//M*/\r
\r
#include "opencv2/gpu/device/vecmath.hpp"\r
-#include "transform.hpp"\r
+#include "opencv2/gpu/device/transform.hpp"\r
+#include "opencv2/gpu/device/saturate_cast.hpp"\r
#include "internal_shared.hpp"\r
\r
using namespace cv::gpu;\r
template void max_gpu<int >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);\r
template void max_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);\r
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);\r
+\r
+ \r
+ //////////////////////////////////////////////////////////////////////////\r
+ // threshold\r
+\r
+ class ThreshOp\r
+ {\r
+ public:\r
+ ThreshOp(float thresh_, float maxVal_) : thresh(thresh_), maxVal(maxVal_) {}\r
+\r
+ protected:\r
+ float thresh;\r
+ float maxVal;\r
+ };\r
+\r
+ class ThreshBinary : public ThreshOp\r
+ {\r
+ public:\r
+ ThreshBinary(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}\r
+\r
+ template<typename T>\r
+ __device__ T operator()(const T& src) const\r
+ {\r
+ return (float)src > thresh ? saturate_cast<T>(maxVal) : 0;\r
+ }\r
+ };\r
+\r
+ class ThreshBinaryInv : public ThreshOp\r
+ {\r
+ public:\r
+ ThreshBinaryInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}\r
+\r
+ template<typename T>\r
+ __device__ T operator()(const T& src) const\r
+ {\r
+ return (float)src > thresh ? 0 : saturate_cast<T>(maxVal);\r
+ }\r
+ };\r
+\r
+ class ThreshTrunc : public ThreshOp\r
+ {\r
+ public:\r
+ ThreshTrunc(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}\r
+\r
+ template<typename T>\r
+ __device__ T operator()(const T& src) const\r
+ {\r
+ return saturate_cast<T>(fmin((float)src, thresh));\r
+ }\r
+ };\r
+\r
+ class ThreshToZero : public ThreshOp\r
+ {\r
+ public:\r
+ ThreshToZero(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}\r
+\r
+ template<typename T>\r
+ __device__ T operator()(const T& src) const\r
+ {\r
+ return (float)src > thresh ? src : 0;\r
+ }\r
+ };\r
+\r
+ class ThreshToZeroInv : public ThreshOp\r
+ {\r
+ public:\r
+ ThreshToZeroInv(float thresh_, float maxVal_) : ThreshOp(thresh_, maxVal_) {}\r
+\r
+ template<typename T>\r
+ __device__ T operator()(const T& src) const\r
+ {\r
+ return (float)src > thresh ? 0 : src;\r
+ }\r
+ };\r
+\r
+ template <class Op, typename T>\r
+ void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal, \r
+ cudaStream_t stream)\r
+ {\r
+ Op op(thresh, maxVal);\r
+ transform(src, dst, op, stream);\r
+ }\r
+\r
+ template <typename T>\r
+ void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,\r
+ cudaStream_t stream)\r
+ {\r
+ typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, float thresh, float maxVal, \r
+ cudaStream_t stream);\r
+\r
+ static const caller_t callers[] = \r
+ {\r
+ threshold_caller<ThreshBinary, T>, \r
+ threshold_caller<ThreshBinaryInv, T>, \r
+ threshold_caller<ThreshTrunc, T>, \r
+ threshold_caller<ThreshToZero, T>, \r
+ threshold_caller<ThreshToZeroInv, T>\r
+ };\r
+\r
+ callers[type]((DevMem2D_<T>)src, (DevMem2D_<T>)dst, thresh, maxVal, stream);\r
+ }\r
+\r
+ template void threshold_gpu<uchar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);\r
+ template void threshold_gpu<schar>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);\r
+ template void threshold_gpu<ushort>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);\r
+ template void threshold_gpu<short>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);\r
+ template void threshold_gpu<int>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);\r
+ template void threshold_gpu<float>(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type, cudaStream_t stream);\r
}}}\r
#include "opencv2/gpu/device/limits_gpu.hpp"\r
#include "opencv2/gpu/device/saturate_cast.hpp"\r
#include "opencv2/gpu/device/vecmath.hpp"\r
-#include "transform.hpp"\r
+#include "opencv2/gpu/device/transform.hpp"\r
#include "internal_shared.hpp"\r
\r
using namespace cv::gpu;\r
#include "internal_shared.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
+#include "opencv2/gpu/device/transform.hpp"
using namespace cv::gpu::device;
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
-
- template <typename T, typename DT, size_t src_elem_size, size_t dst_elem_size>
- struct ReadWriteTraits
- {
- enum {shift=1};
-
- typedef T read_type;
- typedef DT write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 1, 1>
- {
- enum {shift=4};
-
- typedef char4 read_type;
- typedef char4 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 2, 1>
- {
- enum {shift=4};
-
- typedef short4 read_type;
- typedef char4 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 4, 1>
- {
- enum {shift=4};
-
- typedef int4 read_type;
- typedef char4 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 1, 2>
- {
- enum {shift=2};
-
- typedef char2 read_type;
- typedef short2 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 2, 2>
- {
- enum {shift=2};
-
- typedef short2 read_type;
- typedef short2 write_type;
- };
- template <typename T, typename DT>
- struct ReadWriteTraits<T, DT, 4, 2>
- {
- enum {shift=2};
-
- typedef int2 read_type;
- typedef short2 write_type;
- };
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
- template <typename T, typename DT>
- __global__ static void convert_to(uchar* srcmat, size_t src_step, uchar* dstmat, size_t dst_step, size_t width, size_t height, double alpha, double beta)
+ template <typename T, typename D>
+ class Convertor
{
- typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::read_type read_type;
- typedef typename ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::write_type write_type;
- const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
-
- const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
- const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
+ public:
+ Convertor(double alpha_, double beta_): alpha(alpha_), beta(beta_) {}
- if (y < height)
+ __device__ D operator()(const T& src)
{
- const T* src = (const T*)(srcmat + src_step * y);
- DT* dst = (DT*)(dstmat + dst_step * y);
- if ((x * shift) + shift - 1 < width)
- {
- read_type srcn_el = ((read_type*)src)[x];
- write_type dstn_el;
-
- const T* src1_el = (const T*) &srcn_el;
- DT* dst1_el = (DT*) &dstn_el;
-
- for (int i = 0; i < shift; ++i)
- dst1_el[i] = saturate_cast<DT>(alpha * src1_el[i] + beta);
-
- ((write_type*)dst)[x] = dstn_el;
- }
- else
- {
- for (int i = 0; i < shift - 1; ++i)
- if ((x * shift) + i < width)
- dst[(x * shift) + i] = saturate_cast<DT>(alpha * src[(x * shift) + i] + beta);
- }
+ return saturate_cast<D>(alpha * src + beta);
}
- }
-
- typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
- template<typename T, typename DT>
- void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
+ private:
+ double alpha, beta;
+ };
+
+ template<typename T, typename D>
+ void cvt_(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, cudaStream_t stream)
{
- const int shift = ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
-
- dim3 block(32, 8);
- dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
-
- convert_to<T, DT><<<grid, block, 0, stream>>>(src.data, src.step, dst.data, dst.step, width, height, alpha, beta);
- if (stream == 0)
- cudaSafeCall( cudaThreadSynchronize() );
+ Convertor<T, D> op(alpha, beta);
+ transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
}
- void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream)
+ void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta,
+ cudaStream_t stream = 0)
{
- static CvtFunc tab[8][8] =
- {
- {cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,
- cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},
-
- {cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,
- cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},
-
- {cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,
- cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},
-
- {cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,
- cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},
-
- {cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,
- cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},
-
- {cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,
- cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},
-
- {cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,
- cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},
-
- {0,0,0,0,0,0,0,0}
+ typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta,
+ cudaStream_t stream);
+
+ static const caller_t tab[8][8] =\r
+ {\r
+ {cvt_<uchar, uchar>, cvt_<uchar, schar>, cvt_<uchar, ushort>, cvt_<uchar, short>,\r
+ cvt_<uchar, int>, cvt_<uchar, float>, cvt_<uchar, double>, 0},\r
+\r
+ {cvt_<schar, uchar>, cvt_<schar, schar>, cvt_<schar, ushort>, cvt_<schar, short>,\r
+ cvt_<schar, int>, cvt_<schar, float>, cvt_<schar, double>, 0},\r
+\r
+ {cvt_<ushort, uchar>, cvt_<ushort, schar>, cvt_<ushort, ushort>, cvt_<ushort, short>,\r
+ cvt_<ushort, int>, cvt_<ushort, float>, cvt_<ushort, double>, 0},\r
+\r
+ {cvt_<short, uchar>, cvt_<short, schar>, cvt_<short, ushort>, cvt_<short, short>,\r
+ cvt_<short, int>, cvt_<short, float>, cvt_<short, double>, 0},\r
+\r
+ {cvt_<int, uchar>, cvt_<int, schar>, cvt_<int, ushort>,\r
+ cvt_<int, short>, cvt_<int, int>, cvt_<int, float>, cvt_<int, double>, 0},\r
+\r
+ {cvt_<float, uchar>, cvt_<float, schar>, cvt_<float, ushort>,\r
+ cvt_<float, short>, cvt_<float, int>, cvt_<float, float>, cvt_<float, double>, 0},\r
+\r
+ {cvt_<double, uchar>, cvt_<double, schar>, cvt_<double, ushort>,\r
+ cvt_<double, short>, cvt_<double, int>, cvt_<double, float>, cvt_<double, double>, 0},\r
+\r
+ {0,0,0,0,0,0,0,0}\r
};
- CvtFunc func = tab[sdepth][ddepth];
- if (func == 0)
- cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
- func(src, dst, src.cols * channels, src.rows, alpha, beta, stream);
+ caller_t func = tab[sdepth][ddepth];\r
+ if (!func)\r
+ cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);\r
+\r
+ func(src, dst, alpha, beta, stream);
}
}}}
#include "opencv2/gpu/device/limits_gpu.hpp"\r
#include "opencv2/gpu/device/saturate_cast.hpp"\r
#include "opencv2/gpu/device/vecmath.hpp"\r
-#include "transform.hpp"\r
+#include "opencv2/gpu/device/transform.hpp"\r
#include "internal_shared.hpp"\r
\r
using namespace cv::gpu;\r
//M*/\r
\r
#include "internal_shared.hpp"\r
-#include "surf_key_point.h"\r
#include "opencv2/gpu/device/limits_gpu.hpp"\r
\r
using namespace cv::gpu;\r
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
-//\r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other materials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#ifndef __OPENCV_SURF_KEY_POINT_H__\r
-#define __OPENCV_SURF_KEY_POINT_H__\r
-\r
-namespace cv\r
-{\r
- namespace gpu\r
- {\r
- \r
- }\r
-}\r
-\r
-#endif // __OPENCV_SURF_KEY_POINT_H__\r
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
-//\r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other materials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#ifndef __OPENCV_GPU_TRANSFORM_HPP__\r
-#define __OPENCV_GPU_TRANSFORM_HPP__\r
-\r
-#include "internal_shared.hpp"\r
-\r
-namespace cv { namespace gpu { namespace device\r
-{\r
- //! Mask accessor\r
- template<class T> struct MaskReader_\r
- {\r
- PtrStep_<T> mask;\r
- explicit MaskReader_(PtrStep_<T> mask): mask(mask) {} \r
-\r
- __device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }\r
- };\r
-\r
- //! Stub mask accessor\r
- struct NoMask \r
- {\r
- __device__ bool operator()(int y, int x) const { return true; } \r
- };\r
-\r
- //! Transform kernels\r
-\r
- template <typename T, typename D, typename UnOp, typename Mask>\r
- static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)\r
- {\r
- const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
- if (x < src.cols && y < src.rows && mask(y, x))\r
- {\r
- T src_data = src.ptr(y)[x];\r
- dst.ptr(y)[x] = op(src_data);\r
- }\r
- }\r
-\r
- template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
- static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)\r
- {\r
- const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
- const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
-\r
- if (x < src1.cols && y < src1.rows && mask(y, x))\r
- {\r
- T1 src1_data = src1.ptr(y)[x];\r
- T2 src2_data = src2.ptr(y)[x];\r
- dst.ptr(y)[x] = op(src1_data, src2_data);\r
- }\r
- } \r
-}}}\r
-\r
-namespace cv \r
-{ \r
- namespace gpu \r
- {\r
- template <typename T, typename D, typename UnOp>\r
- static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)\r
- {\r
- dim3 threads(16, 16, 1);\r
- dim3 grid(1, 1, 1);\r
-\r
- grid.x = divUp(src.cols, threads.x);\r
- grid.y = divUp(src.rows, threads.y); \r
-\r
- device::transform<T, D><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);\r
-\r
- if (stream == 0)\r
- cudaSafeCall( cudaThreadSynchronize() );\r
- }\r
- template <typename T1, typename T2, typename D, typename BinOp>\r
- static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, cudaStream_t stream)\r
- {\r
- dim3 threads(16, 16, 1);\r
- dim3 grid(1, 1, 1);\r
-\r
- grid.x = divUp(src1.cols, threads.x);\r
- grid.y = divUp(src1.rows, threads.y); \r
-\r
- device::transform<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, device::NoMask(), op);\r
-\r
- if (stream == 0)\r
- cudaSafeCall( cudaThreadSynchronize() ); \r
- }\r
- }\r
-}\r
-\r
-#endif // __OPENCV_GPU_TRANSFORM_HPP__\r
void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);\r
void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);\r
\r
- void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);\r
+ void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);\r
}\r
}\r
}\r
psrc = &(temp = src);\r
\r
dst.create( src.size(), rtype );\r
- matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta, impl->stream);\r
+ matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);\r
}\r
\r
\r
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }\r
void cv::gpu::max(const GpuMat&, double, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); }\r
+double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int) {throw_nogpu(); return 0.0;}\r
+double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, const Stream&) {throw_nogpu(); return 0.0;}\r
\r
#else\r
\r
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));\r
}\r
\r
+////////////////////////////////////////////////////////////////////////\r
+// threshold\r
+\r
+namespace cv { namespace gpu { namespace mathfunc\r
+{\r
+ template <typename T>\r
+ void threshold_gpu(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,\r
+ cudaStream_t stream);\r
+}}}\r
+\r
+namespace\r
+{\r
+ void threshold_caller(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, \r
+ cudaStream_t stream = 0)\r
+ {\r
+ using namespace cv::gpu::mathfunc;\r
+\r
+ typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, float thresh, float maxVal, int type,\r
+ cudaStream_t stream);\r
+\r
+ static const caller_t callers[] = \r
+ {\r
+ threshold_gpu<unsigned char>, threshold_gpu<signed char>, \r
+ threshold_gpu<unsigned short>, threshold_gpu<short>, threshold_gpu<int>, threshold_gpu<float>, 0\r
+ };\r
+\r
+ CV_Assert(src.channels() == 1 && src.depth() < CV_64F);\r
+ CV_Assert(type <= THRESH_TOZERO_INV);\r
+\r
+ dst.create(src.size(), src.type());\r
+\r
+ if (src.depth() != CV_32F)\r
+ {\r
+ thresh = cvFloor(thresh);\r
+ maxVal = cvRound(maxVal);\r
+ }\r
+\r
+ callers[src.depth()](src, dst, static_cast<float>(thresh), static_cast<float>(maxVal), type, stream);\r
+ }\r
+}\r
+\r
+double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type)\r
+{\r
+ if (src.type() == CV_32FC1 && type == THRESH_TRUNC)\r
+ {\r
+ dst.create(src.size(), src.type());\r
+\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step,\r
+ dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );\r
+ }\r
+ else\r
+ {\r
+ threshold_caller(src, dst, thresh, maxVal, type);\r
+ }\r
+\r
+ return thresh;\r
+}\r
+\r
+double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, const Stream& stream)\r
+{\r
+ threshold_caller(src, dst, thresh, maxVal, type, StreamAccessor::getStream(stream));\r
+ return thresh;\r
+}\r
+\r
#endif
\ No newline at end of file
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, const Stream&) { throw_nogpu(); }\r
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&) { throw_nogpu(); }\r
void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, const Stream&) { throw_nogpu(); }\r
-double cv::gpu::threshold(const GpuMat&, GpuMat&, double) { throw_nogpu(); return 0.0; }\r
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int) { throw_nogpu(); }\r
void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&) { throw_nogpu(); }\r
void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }\r
reprojectImageTo3D_callers[disp.type()](disp, xyzw, Q, StreamAccessor::getStream(stream));\r
}\r
\r
-////////////////////////////////////////////////////////////////////////\r
-// threshold\r
-\r
-double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh)\r
-{\r
- CV_Assert(src.type() == CV_32FC1);\r
-\r
- dst.create( src.size(), src.type() );\r
-\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- nppSafeCall( nppiThreshold_32f_C1R(src.ptr<Npp32f>(), src.step,\r
- dst.ptr<Npp32f>(), dst.step, sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );\r
-\r
- return thresh;\r
-}\r
-\r
////////////////////////////////////////////////////////////////////////\r
// resize\r
\r
void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);\r
void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);\r
\r
- void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);\r
+ void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);\r
}\r
}\r
}\r
\r
void convertToKernelCaller(const GpuMat& src, GpuMat& dst)\r
{\r
- matrix_operations::convert_to(src, src.depth(), dst, dst.depth(), src.channels(), 1.0, 0.0);\r
+ matrix_operations::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);\r
}\r
}\r
\r
dst.create( size(), rtype );\r
\r
if (!noScale)\r
- matrix_operations::convert_to(*psrc, sdepth, dst, ddepth, psrc->channels(), alpha, beta);\r
+ matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta);\r
else\r
{\r
typedef void (*convert_caller_t)(const GpuMat& src, GpuMat& dst);\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef __OPENCV_GPU_TRANSFORM_HPP__\r
+#define __OPENCV_GPU_TRANSFORM_HPP__\r
+\r
+#include "internal_shared.hpp"\r
+#include "vecmath.hpp"\r
+\r
+namespace cv { namespace gpu { namespace device\r
+{\r
+ //! Mask accessor\r
+\r
+ class MaskReader\r
+ {\r
+ public:\r
+ explicit MaskReader(const PtrStep& mask_): mask(mask_) {}\r
+\r
+ __device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }\r
+\r
+ private:\r
+ PtrStep mask;\r
+ };\r
+\r
+ struct NoMask \r
+ {\r
+ __device__ bool operator()(int y, int x) const { return true; } \r
+ };\r
+\r
+ //! Read Write Traits\r
+\r
+ template <size_t src_elem_size, size_t dst_elem_size>
+ struct UnReadWriteTraits_
+ {
+ enum {shift=1};
+ };
+ template <size_t src_elem_size>
+ struct UnReadWriteTraits_<src_elem_size, 1>
+ {
+ enum {shift=4};
+ };
+ template <size_t src_elem_size>
+ struct UnReadWriteTraits_<src_elem_size, 2>
+ {
+ enum {shift=2};
+ };\r
+ template <typename T, typename D> struct UnReadWriteTraits
+ {
+ enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
+
+ typedef typename TypeVec<T, shift>::vec_t read_type;
+ typedef typename TypeVec<D, shift>::vec_t write_type;
+ };\r
+\r
+ template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
+ struct BinReadWriteTraits_
+ {
+ enum {shift=1};
+ };\r
+ template <size_t src_elem_size1, size_t src_elem_size2>
+ struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
+ {
+ enum {shift=4};
+ };\r
+ template <size_t src_elem_size1, size_t src_elem_size2>
+ struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
+ {
+ enum {shift=2};
+ };\r
+ template <typename T1, typename T2, typename D> struct BinReadWriteTraits
+ {
+ enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
+
+ typedef typename TypeVec<T1, shift>::vec_t read_type1;
+ typedef typename TypeVec<T2, shift>::vec_t read_type2;
+ typedef typename TypeVec<D , shift>::vec_t write_type;
+ };\r
+\r
+ //! Transform kernels\r
+\r
+ template <int shift> struct OpUnroller;\r
+ template <> struct OpUnroller<1>\r
+ {\r
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)\r
+ {
+ if (mask(y, x_shifted))\r
+ dst.x = op(src.x);\r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)\r
+ {
+ if (mask(y, x_shifted))\r
+ dst.x = op(src1.x, src2.x);\r
+ }\r
+ };\r
+ template <> struct OpUnroller<2>\r
+ {\r
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)\r
+ {
+ if (mask(y, x_shifted))\r
+ dst.x = op(src.x);
+ if (mask(y, x_shifted + 1))\r
+ dst.y = op(src.y);\r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)\r
+ {
+ if (mask(y, x_shifted))\r
+ dst.x = op(src1.x, src2.x);
+ if (mask(y, x_shifted + 1))\r
+ dst.y = op(src1.y, src2.y);\r
+ }\r
+ };\r
+ template <> struct OpUnroller<3>\r
+ {\r
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)\r
+ {
+ if (mask(y, x_shifted))\r
+ dst.x = op(src.x);
+ if (mask(y, x_shifted + 1))\r
+ dst.y = op(src.y);
+ if (mask(y, x_shifted + 2))\r
+ dst.z = op(src.z);\r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)\r
+ {
+ if (mask(y, x_shifted))\r
+ dst.x = op(src1.x, src2.x);
+ if (mask(y, x_shifted + 1))\r
+ dst.y = op(src1.y, src2.y);
+ if (mask(y, x_shifted + 2))\r
+ dst.z = op(src1.z, src2.z);\r
+ }\r
+ };\r
+ template <> struct OpUnroller<4>\r
+ {\r
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)\r
+ {
+ if (mask(y, x_shifted))\r
+ dst.x = op(src.x);
+ if (mask(y, x_shifted + 1))\r
+ dst.y = op(src.y);
+ if (mask(y, x_shifted + 2))\r
+ dst.z = op(src.z);
+ if (mask(y, x_shifted + 3))\r
+ dst.w = op(src.w);\r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)\r
+ {
+ if (mask(y, x_shifted))\r
+ dst.x = op(src1.x, src2.x);
+ if (mask(y, x_shifted + 1))\r
+ dst.y = op(src1.y, src2.y);
+ if (mask(y, x_shifted + 2))\r
+ dst.z = op(src1.z, src2.z);
+ if (mask(y, x_shifted + 3))\r
+ dst.w = op(src1.w, src2.w);\r
+ }\r
+ };\r
+\r
+ template <typename T, typename D, typename UnOp, typename Mask>
+ __global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
+ {
+ typedef typename UnReadWriteTraits<T, D>::read_type read_type;
+ typedef typename UnReadWriteTraits<T, D>::write_type write_type;
+ const int shift = UnReadWriteTraits<T, D>::shift;
+
+ const int x = threadIdx.x + blockIdx.x * blockDim.x;
+ const int y = threadIdx.y + blockIdx.y * blockDim.y;
+ const int x_shifted = x * shift;
+
+ if (y < src_.rows)
+ {
+ const T* src = src_.ptr(y);
+ D* dst = dst_.ptr(y);
+
+ if (x_shifted + shift - 1 < src_.cols)
+ {
+ read_type src_n_el = ((const read_type*)src)[x];
+ write_type dst_n_el;
+
+ OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
+
+ ((write_type*)dst)[x] = dst_n_el;
+ }
+ else
+ {
+ for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
+ {
+ if (mask(y, real_x))
+ dst[real_x] = op(src[real_x]);
+ }
+ }
+ }
+ }\r
+\r
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ static __global__ void transformSimple(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)\r
+ {\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+ if (x < src.cols && y < src.rows && mask(y, x))\r
+ {\r
+ dst.ptr(y)[x] = op(src.ptr(y)[x]);\r
+ }\r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+ __global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
+ const Mask mask, BinOp op)
+ {
+ typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
+ typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
+ typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
+ const int shift = BinReadWriteTraits<T1, T2, D>::shift;
+
+ const int x = threadIdx.x + blockIdx.x * blockDim.x;
+ const int y = threadIdx.y + blockIdx.y * blockDim.y;
+ const int x_shifted = x * shift;
+
+ if (y < src1_.rows)
+ {
+ const T1* src1 = src1_.ptr(y);
+ const T2* src2 = src2_.ptr(y);
+ D* dst = dst_.ptr(y);
+
+ if (x_shifted + shift - 1 < src1_.cols)
+ {
+ read_type1 src1_n_el = ((const read_type1*)src1)[x];
+ read_type2 src2_n_el = ((const read_type2*)src2)[x];
+ write_type dst_n_el;
+
+ OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
+
+ ((write_type*)dst)[x] = dst_n_el;
+ }
+ else
+ {
+ for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
+ {
+ if (mask(y, real_x))
+ dst[real_x] = op(src1[real_x], src2[real_x]);
+ }
+ }
+ }
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ static __global__ void transformSimple(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, \r
+ const Mask mask, BinOp op)\r
+ {\r
+ const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+ const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+ if (x < src1.cols && y < src1.rows && mask(y, x))\r
+ {\r
+ T1 src1_data = src1.ptr(y)[x];\r
+ T2 src2_data = src2.ptr(y)[x];\r
+ dst.ptr(y)[x] = op(src1_data, src2_data);\r
+ }\r
+ } \r
+}}}\r
+\r
+namespace cv \r
+{ \r
+ namespace gpu \r
+ {\r
+ template <bool UseSmart> struct TransformChooser;\r
+ template<> struct TransformChooser<false>\r
+ {\r
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, \r
+ cudaStream_t stream = 0)\r
+ {\r
+ dim3 threads(16, 16, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src.cols, threads.x);\r
+ grid.y = divUp(src.rows, threads.y); \r
+\r
+ device::transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() ); \r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, \r
+ BinOp op, const Mask& mask, cudaStream_t stream = 0)\r
+ {\r
+ dim3 threads(16, 16, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src1.cols, threads.x);\r
+ grid.y = divUp(src1.rows, threads.y); \r
+\r
+ device::transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() ); \r
+ }\r
+ };\r
+ template<> struct TransformChooser<true>\r
+ {\r
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, \r
+ cudaStream_t stream = 0)\r
+ {
+ const int shift = device::UnReadWriteTraits<T, D>::shift;\r
+\r
+ dim3 threads(16, 16, 1);\r
+ dim3 grid(1, 1, 1);
+\r
+ grid.x = divUp(src.cols, threads.x * shift);\r
+ grid.y = divUp(src.rows, threads.y); \r
+\r
+ device::transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() );\r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, \r
+ BinOp op, const Mask& mask, cudaStream_t stream = 0)\r
+ {
+ const int shift = device::BinReadWriteTraits<T1, T2, D>::shift;\r
+\r
+ dim3 threads(16, 16, 1);\r
+ dim3 grid(1, 1, 1);\r
+\r
+ grid.x = divUp(src1.cols, threads.x * shift);\r
+ grid.y = divUp(src1.rows, threads.y); \r
+\r
+ device::transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaThreadSynchronize() ); \r
+ }\r
+ };\r
+\r
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, \r
+ cudaStream_t stream = 0)\r
+ {
+ TransformChooser<device::VecTraits<T>::cn == 1 && device::VecTraits<D>::cn == 1 && device::UnReadWriteTraits<T, D>::shift != 1>::call(src, dst, op, mask, stream);\r
+ }\r
+\r
+ template <typename T, typename D, typename UnOp>\r
+ static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream = 0)\r
+ {\r
+ transform_caller(src, dst, op, device::NoMask(), stream);\r
+ }\r
+ template <typename T, typename D, typename UnOp>\r
+ static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStep& mask, UnOp op, \r
+ cudaStream_t stream = 0)\r
+ {\r
+ transform_caller(src, dst, op, device::MaskReader(mask), stream);\r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, \r
+ BinOp op, const Mask& mask, cudaStream_t stream = 0)\r
+ {\r
+ TransformChooser<device::VecTraits<T1>::cn == 1 && device::VecTraits<T2>::cn == 1 && device::VecTraits<D>::cn == 1 && device::BinReadWriteTraits<T1, T2, D>::shift != 1>::call(src1, src2, dst, op, mask, stream);\r
+ }\r
+\r
+ template <typename T1, typename T2, typename D, typename BinOp>\r
+ static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, \r
+ BinOp op, cudaStream_t stream = 0)\r
+ {\r
+ transform_caller(src1, src2, dst, op, device::NoMask(), stream);\r
+ }\r
+ template <typename T1, typename T2, typename D, typename BinOp>\r
+ static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, \r
+ const PtrStep& mask, BinOp op, cudaStream_t stream = 0)\r
+ {\r
+ transform_caller(src1, src2, dst, op, device::MaskReader(mask), stream);\r
+ }\r
+ }\r
+}\r
+\r
+#endif // __OPENCV_GPU_TRANSFORM_HPP__\r
template<> struct TypeVec<uchar4, 4> { typedef uchar4 vec_t; };\r
\r
template<> struct TypeVec<char, 1> { typedef char vec_t; };\r
+ template<> struct TypeVec<schar, 1> { typedef char vec_t; };\r
template<> struct TypeVec<char1, 1> { typedef char1 vec_t; };\r
template<> struct TypeVec<char, 2> { typedef char2 vec_t; };\r
+ template<> struct TypeVec<schar, 2> { typedef char2 vec_t; };\r
template<> struct TypeVec<char2, 2> { typedef char2 vec_t; };\r
template<> struct TypeVec<char, 3> { typedef char3 vec_t; };\r
+ template<> struct TypeVec<schar, 3> { typedef char3 vec_t; };\r
template<> struct TypeVec<char3, 3> { typedef char3 vec_t; };\r
template<> struct TypeVec<char, 4> { typedef char4 vec_t; };\r
+ template<> struct TypeVec<schar, 4> { typedef char4 vec_t; };\r
template<> struct TypeVec<char4, 4> { typedef char4 vec_t; };\r
\r
template<> struct TypeVec<ushort, 1> { typedef ushort vec_t; };\r
template<> struct TypeVec<float, 4> { typedef float4 vec_t; };\r
template<> struct TypeVec<float4, 4> { typedef float4 vec_t; };\r
\r
+ template<> struct TypeVec<double, 1> { typedef double vec_t; };\r
+ template<> struct TypeVec<double1, 1> { typedef double1 vec_t; };\r
+ template<> struct TypeVec<double, 2> { typedef double2 vec_t; };\r
+ template<> struct TypeVec<double2, 2> { typedef double2 vec_t; };\r
+ template<> struct TypeVec<double, 3> { typedef double3 vec_t; };\r
+ template<> struct TypeVec<double3, 3> { typedef double3 vec_t; };\r
+ template<> struct TypeVec<double, 4> { typedef double4 vec_t; };\r
+ template<> struct TypeVec<double4, 4> { typedef double4 vec_t; };\r
+\r
template<typename T> struct VecTraits;\r
\r
template<> struct VecTraits<uchar> \r
static __device__ __host__ char all(char v) {return v;}\r
static __device__ __host__ char make(char x) {return x;}\r
};\r
+ template<> struct VecTraits<schar> \r
+ { \r
+ typedef schar elem_t; \r
+ enum {cn=1}; \r
+ static __device__ __host__ schar all(schar v) {return v;}\r
+ static __device__ __host__ schar make(schar x) {return x;}\r
+ };\r
template<> struct VecTraits<char1> \r
{ \r
- typedef char elem_t; \r
+ typedef schar elem_t; \r
enum {cn=1}; \r
- static __device__ __host__ char1 all(char v) {return make_char1(v);}\r
- static __device__ __host__ char1 make(char x) {return make_char1(x);}\r
+ static __device__ __host__ char1 all(schar v) {return make_char1(v);}\r
+ static __device__ __host__ char1 make(schar x) {return make_char1(x);}\r
};\r
template<> struct VecTraits<char2> \r
{ \r
- typedef char elem_t; \r
+ typedef schar elem_t; \r
enum {cn=2}; \r
- static __device__ __host__ char2 all(char v) {return make_char2(v, v);}\r
- static __device__ __host__ char2 make(char x, char y) {return make_char2(x, y);}\r
+ static __device__ __host__ char2 all(schar v) {return make_char2(v, v);}\r
+ static __device__ __host__ char2 make(schar x, schar y) {return make_char2(x, y);}\r
};\r
template<> struct VecTraits<char3> \r
{ \r
- typedef char elem_t; \r
+ typedef schar elem_t; \r
enum {cn=3}; \r
- static __device__ __host__ char3 all(char v) {return make_char3(v, v, v);}\r
- static __device__ __host__ char3 make(char x, char y, char z) {return make_char3(x, y, z);}\r
+ static __device__ __host__ char3 all(schar v) {return make_char3(v, v, v);}\r
+ static __device__ __host__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}\r
};\r
template<> struct VecTraits<char4> \r
{ \r
- typedef char elem_t; \r
+ typedef schar elem_t; \r
enum {cn=4}; \r
- static __device__ __host__ char4 all(char v) {return make_char4(v, v, v, v);}\r
- static __device__ __host__ char4 make(char x, char y, char z, char w) {return make_char4(x, y, z, w);}\r
+ static __device__ __host__ char4 all(schar v) {return make_char4(v, v, v, v);}\r
+ static __device__ __host__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}\r
};\r
\r
template<> struct VecTraits<ushort> \r
static __device__ __host__ float4 make(float x, float y, float z, float w) {return make_float4(x, y, z, w);}\r
};\r
\r
+ template<> struct VecTraits<double> \r
+ { \r
+ typedef double elem_t; \r
+ enum {cn=1}; \r
+ static __device__ __host__ double all(double v) {return v;}\r
+ static __device__ __host__ double make(double x) {return x;}\r
+ };\r
+ template<> struct VecTraits<double1> \r
+ { \r
+ typedef double elem_t; \r
+ enum {cn=1}; \r
+ static __device__ __host__ double1 all(double v) {return make_double1(v);}\r
+ static __device__ __host__ double1 make(double x) {return make_double1(x);}\r
+ };\r
+ template<> struct VecTraits<double2> \r
+ { \r
+ typedef double elem_t; \r
+ enum {cn=2}; \r
+ static __device__ __host__ double2 all(double v) {return make_double2(v, v);}\r
+ static __device__ __host__ double2 make(double x, double y) {return make_double2(x, y);}\r
+ };\r
+ template<> struct VecTraits<double3> \r
+ { \r
+ typedef double elem_t; \r
+ enum {cn=3}; \r
+ static __device__ __host__ double3 all(double v) {return make_double3(v, v, v);}\r
+ static __device__ __host__ double3 make(double x, double y, double z) {return make_double3(x, y, z);}\r
+ };\r
+ template<> struct VecTraits<double4> \r
+ { \r
+ typedef double elem_t;\r
+ enum {cn=4}; \r
+ static __device__ __host__ double4 all(double v) {return make_double4(v, v, v, v);}\r
+ static __device__ __host__ double4 make(double x, double y, double z, double w) {return make_double4(x, y, z, w);}\r
+ };\r
+\r
template <int cn, typename VecD> struct SatCast;\r
template <typename VecD> struct SatCast<1, VecD>\r
{\r
#include "opencv2/gpu/stream_accessor.hpp"\r
#include "npp.h" \r
#include "npp_staging.h"\r
- #include "surf_key_point.h"\r
\r
#include "nvidia/NCV.hpp"\r
#include "nvidia/NCVHaarObjectDetection.hpp"\r
\r
////////////////////////////////////////////////////////////////////////////////\r
// threshold\r
-struct CV_GpuNppImageThresholdTest : public CV_GpuImageProcTest\r
+struct CV_GpuImageThresholdTest : public CV_GpuImageProcTest\r
{\r
public:\r
- CV_GpuNppImageThresholdTest() : CV_GpuImageProcTest( "GPU-NppImageThreshold", "threshold" ) {}\r
+ CV_GpuImageThresholdTest() : CV_GpuImageProcTest( "GPU-ImageThreshold", "threshold" ) {}\r
\r
int test(const Mat& img)\r
{\r
- if (img.type() != CV_32FC1)\r
+ if (img.type() != CV_8UC1 && img.type() != CV_32FC1)\r
{\r
ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
return CvTS::OK;\r
}\r
\r
+ const double maxVal = img.type() == CV_8UC1 ? 255 : 1.0;\r
+\r
cv::RNG rng(*ts->get_rng());\r
- const double thresh = rng;\r
\r
- cv::Mat cpuRes;\r
- cv::threshold(img, cpuRes, thresh, 0.0, THRESH_TRUNC);\r
+ int res = CvTS::OK;\r
\r
- GpuMat gpu1(img);\r
- GpuMat gpuRes;\r
- cv::gpu::threshold(gpu1, gpuRes, thresh);\r
+ for (int type = THRESH_BINARY; type <= THRESH_TOZERO_INV; ++type)\r
+ {\r
+ const double thresh = rng.uniform(0.0, maxVal);\r
+\r
+ cv::Mat cpuRes;\r
+ cv::threshold(img, cpuRes, thresh, maxVal, type);\r
+\r
+ GpuMat gpu1(img);\r
+ GpuMat gpuRes;\r
+ cv::gpu::threshold(gpu1, gpuRes, thresh, maxVal, type);\r
+\r
+ if (CheckNorm(cpuRes, gpuRes) != CvTS::OK)\r
+ res = CvTS::FAIL_GENERIC;\r
+ }\r
\r
- return CheckNorm(cpuRes, gpuRes);\r
+ return res;\r
}\r
};\r
\r
// Placing all test definitions in one place\r
// makes us know about what tests are commented.\r
\r
-CV_GpuNppImageThresholdTest CV_GpuNppImageThreshold_test;\r
+CV_GpuImageThresholdTest CV_GpuImageThreshold_test;\r
CV_GpuNppImageResizeTest CV_GpuNppImageResize_test;\r
CV_GpuNppImageCopyMakeBorderTest CV_GpuNppImageCopyMakeBorder_test;\r
CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test;\r