else()
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../3rdparty/zlib")
endif()
-define_opencv_module(core ${ZLIB_LIBRARY})
+
+#define_opencv_module(core ${ZLIB_LIBRARY})
+
+set(name "core")
+
+project(opencv_${name})
+
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
+ "${CMAKE_CURRENT_SOURCE_DIR}/src"
+ "${CMAKE_CURRENT_BINARY_DIR}")
+
+file(GLOB lib_srcs "src/*.cpp")
+file(GLOB lib_int_hdrs "src/*.h*")
+file(GLOB lib_hdrs "include/opencv2/${name}/*.h*")
+file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.h*")
+
+if(COMMAND get_module_external_sources)
+ get_module_external_sources(${name})
+endif()
+
+source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
+source_group("Include" FILES ${lib_hdrs})
+source_group("Include\\detail" FILES ${lib_hdrs_detail})
+list(APPEND lib_hdrs ${lib_hdrs_detail})
+
+if (HAVE_CUDA)
+ file(GLOB lib_cuda "src/cuda/*.cu")
+ source_group("Cuda" FILES "${lib_cuda}")
+
+ include_directories(${CUDA_INCLUDE_DIRS})
+ include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/src")
+ include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/src/cuda")
+
+ set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_10,code=sm_10
+ -gencode arch=compute_11,code=sm_11
+ -gencode arch=compute_12,code=sm_12
+ -gencode arch=compute_13,code=sm_13
+ -gencode arch=compute_20,code=sm_20
+ -gencode arch=compute_20,code=sm_21)
+
+ if (UNIX OR APPLE)
+ set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fPIC;")
+ #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-fPIC")
+ endif()
+
+ #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-keep")
+ #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/EHsc-;")
+
+ if (APPLE)
+ set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fno-finite-math-only;")
+ endif()
+
+ CUDA_COMPILE(cuda_objs ${lib_cuda})
+ #CUDA_BUILD_CLEAN_TARGET()
+endif()
+
+set(the_target "opencv_${name}")
+add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${cuda_objs})
+
+# For dynamic link numbering convenions
+if(NOT ANDROID)
+ # Android SDK build scripts can include only .so files into final .apk
+ # As result we should not set version properties for Android
+ set_target_properties(${the_target} PROPERTIES
+ VERSION ${OPENCV_VERSION}
+ SOVERSION ${OPENCV_SOVERSION}
+ )
+endif()
+
+set_target_properties(${the_target} PROPERTIES OUTPUT_NAME "${the_target}${OPENCV_DLLVERSION}" )
+
+if(ENABLE_SOLUTION_FOLDERS)
+ set_target_properties(${the_target} PROPERTIES FOLDER "modules")
+endif()
+
+if (BUILD_SHARED_LIBS)
+ if(MSVC)
+ set_target_properties(${the_target} PROPERTIES DEFINE_SYMBOL CVAPI_EXPORTS)
+ else()
+ add_definitions(-DCVAPI_EXPORTS)
+ endif()
+endif()
+
+# Additional target properties
+set_target_properties(${the_target} PROPERTIES
+ DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+ ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
+ RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
+ INSTALL_NAME_DIR lib
+ )
+
+# Add the required libraries for linking:
+target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ZLIB_LIBRARY})
+
+if (HAVE_CUDA)
+ target_link_libraries(${the_target} ${CUDA_LIBRARIES})
+
+ unset(CUDA_npp_LIBRARY CACHE)
+ find_cuda_helper_libs(npp)
+ target_link_libraries(${the_target} ${CUDA_npp_LIBRARY})
+endif()
+
+if(MSVC)
+ if(CMAKE_CROSSCOMPILING)
+ set_target_properties(${the_target} PROPERTIES
+ LINK_FLAGS "/NODEFAULTLIB:secchk"
+ )
+ endif()
+ set_target_properties(${the_target} PROPERTIES
+ LINK_FLAGS "/NODEFAULTLIB:libc /DEBUG"
+ )
+endif()
+
+# Dependencies of this target:
+add_dependencies(${the_target} ${ZLIB_LIBRARY})
+
+install(TARGETS ${the_target}
+ RUNTIME DESTINATION bin COMPONENT main
+ LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main
+ ARCHIVE DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main)
+
+install(FILES ${lib_hdrs}
+ DESTINATION ${OPENCV_INCLUDE_PREFIX}/opencv2/${name}
+ COMPONENT main)
+
+add_opencv_precompiled_headers(${the_target})
+
+define_opencv_test(${name})
+define_opencv_perf_test(${name})
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);\r
CV_EXPORTS void ensureSizeIsEnough(Size size, int type, GpuMat& m);\r
\r
- class CV_EXPORTS GpuFuncTable\r
- {\r
- public:\r
- virtual ~GpuFuncTable() {}\r
-\r
- virtual void copy(const Mat& src, GpuMat& dst) const = 0;\r
- virtual void copy(const GpuMat& src, Mat& dst) const = 0;\r
- virtual void copy(const GpuMat& src, GpuMat& dst) const = 0;\r
-\r
- virtual void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const = 0;\r
-\r
- virtual void convert(const GpuMat& src, GpuMat& dst) const = 0;\r
- virtual void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const = 0;\r
-\r
- virtual void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const = 0;\r
-\r
- virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0;\r
- virtual void free(void* devPtr) const = 0;\r
- };\r
+ //////////////////////////////// Error handling ////////////////////////\r
\r
- CV_EXPORTS void setGpuFuncTable(const GpuFuncTable* funcTbl);\r
+ CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);\r
\r
////////////////////////////////////////////////////////////////////////\r
\r
//\r
//M*/\r
\r
-#include "internal_shared.hpp"\r
#include "opencv2/gpu/device/saturate_cast.hpp"\r
#include "opencv2/gpu/device/transform.hpp"\r
#include "opencv2/gpu/device/functional.hpp"\r
}\r
\r
template<typename T>\r
- void copy_to_with_mask_run(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream)\r
+ void copy_to_with_mask_run(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream)\r
{\r
dim3 threadsPerBlock(16,16, 1);\r
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);\r
cudaSafeCall ( cudaDeviceSynchronize() );\r
}\r
\r
- void copy_to_with_mask(const DevMem2Db& mat_src, DevMem2Db mat_dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream)\r
+ void copy_to_with_mask(DevMem2Db mat_src, DevMem2Db mat_dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream)\r
{\r
- typedef void (*CopyToFunc)(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream);\r
+ typedef void (*CopyToFunc)(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream);\r
\r
static CopyToFunc tab[8] =\r
{\r
\r
CopyToFunc func = tab[depth];\r
\r
- if (func == 0) cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__);\r
+ if (func == 0) \r
+ cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__);\r
\r
func(mat_src, mat_dst, mask, channels, stream);\r
}\r
}\r
}\r
template <typename T>\r
- void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream)\r
+ void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream)\r
{\r
writeScalar(scalar);\r
\r
cudaSafeCall ( cudaDeviceSynchronize() );\r
}\r
\r
- template void set_to_gpu<uchar >(const DevMem2Db& mat, const uchar* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
- template void set_to_gpu<schar >(const DevMem2Db& mat, const schar* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
- template void set_to_gpu<ushort>(const DevMem2Db& mat, const ushort* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
- template void set_to_gpu<short >(const DevMem2Db& mat, const short* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
- template void set_to_gpu<int >(const DevMem2Db& mat, const int* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
- template void set_to_gpu<float >(const DevMem2Db& mat, const float* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
- template void set_to_gpu<double>(const DevMem2Db& mat, const double* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<uchar >(DevMem2Db mat, const uchar* scalar, DevMem2Db mask, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<schar >(DevMem2Db mat, const schar* scalar, DevMem2Db mask, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<ushort>(DevMem2Db mat, const ushort* scalar, DevMem2Db mask, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<short >(DevMem2Db mat, const short* scalar, DevMem2Db mask, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<int >(DevMem2Db mat, const int* scalar, DevMem2Db mask, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<float >(DevMem2Db mat, const float* scalar, DevMem2Db mask, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<double>(DevMem2Db mat, const double* scalar, DevMem2Db mask, int channels, cudaStream_t stream);\r
\r
template <typename T>\r
- void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream)\r
+ void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream)\r
{\r
writeScalar(scalar);\r
\r
cudaSafeCall ( cudaDeviceSynchronize() );\r
}\r
\r
- template void set_to_gpu<uchar >(const DevMem2Db& mat, const uchar* scalar, int channels, cudaStream_t stream);\r
- template void set_to_gpu<schar >(const DevMem2Db& mat, const schar* scalar, int channels, cudaStream_t stream);\r
- template void set_to_gpu<ushort>(const DevMem2Db& mat, const ushort* scalar, int channels, cudaStream_t stream);\r
- template void set_to_gpu<short >(const DevMem2Db& mat, const short* scalar, int channels, cudaStream_t stream);\r
- template void set_to_gpu<int >(const DevMem2Db& mat, const int* scalar, int channels, cudaStream_t stream);\r
- template void set_to_gpu<float >(const DevMem2Db& mat, const float* scalar, int channels, cudaStream_t stream);\r
- template void set_to_gpu<double>(const DevMem2Db& mat, const double* scalar, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<uchar >(DevMem2Db mat, const uchar* scalar, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<schar >(DevMem2Db mat, const schar* scalar, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<ushort>(DevMem2Db mat, const ushort* scalar, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<short >(DevMem2Db mat, const short* scalar, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<int >(DevMem2Db mat, const int* scalar, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<float >(DevMem2Db mat, const float* scalar, int channels, cudaStream_t stream);\r
+ template void set_to_gpu<double>(DevMem2Db mat, const double* scalar, int channels, cudaStream_t stream);\r
\r
///////////////////////////////////////////////////////////////////////////\r
//////////////////////////////// ConvertTo ////////////////////////////////\r
};\r
\r
template<typename T, typename D>\r
- void cvt_(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta, cudaStream_t stream)\r
+ void cvt_(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream)\r
{\r
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );\r
cudaSafeCall( cudaSetDoubleForDevice(&beta) );\r
::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);\r
}\r
\r
- void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, \r
- cudaStream_t stream = 0)\r
+ void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream)\r
{\r
- typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta, \r
- cudaStream_t stream);\r
+ typedef void (*caller_t)(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream);\r
\r
static const caller_t tab[8][8] =\r
{\r
#include "precomp.hpp"\r
#include "opencv2/core/gpumat.hpp"\r
\r
+#include <iostream>\r
+#include <sstream>\r
+\r
+#ifdef HAVE_CUDA\r
+ #include <cuda_runtime.h>\r
+ #include <npp.h>\r
+#endif\r
+\r
using namespace std;\r
using namespace cv;\r
using namespace cv::gpu;\r
\r
namespace\r
{\r
+ class CV_EXPORTS GpuFuncTable\r
+ {\r
+ public:\r
+ virtual ~GpuFuncTable() {}\r
+\r
+ virtual void copy(const Mat& src, GpuMat& dst) const = 0;\r
+ virtual void copy(const GpuMat& src, Mat& dst) const = 0;\r
+ virtual void copy(const GpuMat& src, GpuMat& dst) const = 0;\r
+\r
+ virtual void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const = 0;\r
+\r
+ virtual void convert(const GpuMat& src, GpuMat& dst) const = 0;\r
+ virtual void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const = 0;\r
+\r
+ virtual void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const = 0;\r
+\r
+ virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0;\r
+ virtual void free(void* devPtr) const = 0;\r
+ };\r
+}\r
+\r
+#ifndef HAVE_CUDA\r
+\r
+namespace\r
+{\r
void throw_nogpu() \r
{ \r
CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); \r
void free(void*) const {}\r
};\r
\r
- const GpuFuncTable* g_funcTbl = 0;\r
-\r
const GpuFuncTable* gpuFuncTable()\r
{\r
static EmptyFuncTable empty;\r
- return g_funcTbl ? g_funcTbl : ∅\r
+ return ∅\r
}\r
}\r
\r
-void cv::gpu::setGpuFuncTable(const GpuFuncTable* funcTbl)\r
+#else // HAVE_CUDA\r
+\r
+namespace cv { namespace gpu { namespace device \r
+{\r
+ void copy_to_with_mask(DevMem2Db src, DevMem2Db dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream);\r
+\r
+ template <typename T>\r
+ void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream);\r
+\r
+ template <typename T>\r
+ void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream);\r
+\r
+ void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream);\r
+}}}\r
+\r
+namespace\r
+{\r
+#if defined(__GNUC__)\r
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)\r
+ #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)\r
+#else /* defined(__CUDACC__) || defined(__MSVC__) */\r
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)\r
+ #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)\r
+#endif\r
+\r
+ inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")\r
+ {\r
+ if (cudaSuccess != err)\r
+ cv::gpu::error(cudaGetErrorString(err), file, line, func);\r
+ }\r
+\r
+ inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")\r
+ {\r
+ if (err < 0)\r
+ {\r
+ std::ostringstream msg;\r
+ msg << "NPP API Call Error: " << err;\r
+ cv::gpu::error(msg.str().c_str(), file, line, func);\r
+ }\r
+ }\r
+}\r
+\r
+namespace\r
+{\r
+ template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream)\r
+ {\r
+ Scalar_<T> sf = s;\r
+ ::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream);\r
+ }\r
+\r
+ template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)\r
+ {\r
+ Scalar_<T> sf = s;\r
+ ::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);\r
+ }\r
+}\r
+\r
+namespace cv { namespace gpu\r
+{\r
+ CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0) \r
+ { \r
+ ::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels(), stream);\r
+ }\r
+\r
+ CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);\r
+ } \r
+\r
+ CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)\r
+ {\r
+ ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);\r
+ }\r
+\r
+ CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream)\r
+ {\r
+ typedef void (*caller_t)(GpuMat& src, Scalar s, cudaStream_t stream);\r
+\r
+ static const caller_t callers[] = \r
+ {\r
+ kernelSetCaller<uchar>, kernelSetCaller<schar>, kernelSetCaller<ushort>, kernelSetCaller<short>, kernelSetCaller<int>,\r
+ kernelSetCaller<float>, kernelSetCaller<double>\r
+ };\r
+\r
+ callers[src.depth()](src, s, stream);\r
+ }\r
+\r
+ CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)\r
+ {\r
+ typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);\r
+\r
+ static const caller_t callers[] = \r
+ {\r
+ kernelSetCaller<uchar>, kernelSetCaller<schar>, kernelSetCaller<ushort>, kernelSetCaller<short>, kernelSetCaller<int>,\r
+ kernelSetCaller<float>, kernelSetCaller<double>\r
+ };\r
+\r
+ callers[src.depth()](src, s, mask, stream);\r
+ }\r
+\r
+ CV_EXPORTS void setTo(GpuMat& src, Scalar s)\r
+ {\r
+ setTo(src, s, 0);\r
+ }\r
+\r
+ CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask)\r
+ {\r
+ setTo(src, s, mask, 0);\r
+ }\r
+}}\r
+\r
+namespace\r
{\r
- g_funcTbl = funcTbl;\r
+ //////////////////////////////////////////////////////////////////////////\r
+ // Convert\r
+\r
+ template<int n> struct NPPTypeTraits;\r
+ template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };\r
+ template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };\r
+ template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };\r
+ template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };\r
+ template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };\r
+\r
+ template<int SDEPTH, int DDEPTH> struct NppConvertFunc\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+ typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;\r
+\r
+ typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI);\r
+ };\r
+ template<int DDEPTH> struct NppConvertFunc<CV_32F, DDEPTH>\r
+ {\r
+ typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;\r
+\r
+ typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode);\r
+ };\r
+\r
+ template<int SDEPTH, int DDEPTH, typename NppConvertFunc<SDEPTH, DDEPTH>::func_ptr func> struct NppCvt\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+ typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;\r
+\r
+ static void cvt(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+ nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );\r
+\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ };\r
+ template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>\r
+ {\r
+ typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;\r
+\r
+ static void cvt(const GpuMat& src, GpuMat& dst)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+ nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );\r
+\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ }; \r
+\r
+ //////////////////////////////////////////////////////////////////////////\r
+ // Set\r
+ \r
+ template<int SDEPTH, int SCN> struct NppSetFunc\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+\r
+ typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI);\r
+ };\r
+ template<int SDEPTH> struct NppSetFunc<SDEPTH, 1>\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+\r
+ typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI);\r
+ };\r
+\r
+ template<int SDEPTH, int SCN, typename NppSetFunc<SDEPTH, SCN>::func_ptr func> struct NppSet\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+\r
+ static void set(GpuMat& src, Scalar s)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ Scalar_<src_t> nppS = s;\r
+\r
+ nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );\r
+\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ };\r
+ template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+\r
+ static void set(GpuMat& src, Scalar s)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ Scalar_<src_t> nppS = s;\r
+\r
+ nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );\r
+\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ }; \r
+\r
+ template<int SDEPTH, int SCN> struct NppSetMaskFunc\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+\r
+ typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);\r
+ };\r
+ template<int SDEPTH> struct NppSetMaskFunc<SDEPTH, 1>\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+\r
+ typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);\r
+ };\r
+\r
+ template<int SDEPTH, int SCN, typename NppSetMaskFunc<SDEPTH, SCN>::func_ptr func> struct NppSetMask\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+\r
+ static void set(GpuMat& src, Scalar s, const GpuMat& mask)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ Scalar_<src_t> nppS = s;\r
+\r
+ nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );\r
+\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ };\r
+ template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>\r
+ {\r
+ typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
+\r
+ static void set(GpuMat& src, Scalar s, const GpuMat& mask)\r
+ {\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ Scalar_<src_t> nppS = s;\r
+\r
+ nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );\r
+\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ }; \r
+\r
+ class CudaFuncTable : public GpuFuncTable\r
+ {\r
+ public:\r
+ void copy(const Mat& src, GpuMat& dst) const \r
+ { \r
+ cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );\r
+ }\r
+ void copy(const GpuMat& src, Mat& dst) const\r
+ { \r
+ cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );\r
+ }\r
+ void copy(const GpuMat& src, GpuMat& dst) const\r
+ { \r
+ cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );\r
+ }\r
+\r
+ void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const \r
+ { \r
+ ::cv::gpu::copyWithMask(src, dst, mask);\r
+ }\r
+\r
+ void convert(const GpuMat& src, GpuMat& dst) const \r
+ { \r
+ typedef void (*caller_t)(const GpuMat& src, GpuMat& dst);\r
+ static const caller_t callers[7][7][7] =\r
+ {\r
+ { \r
+ /* 8U -> 8U */ {0, 0, 0, 0},\r
+ /* 8U -> 8S */ {::cv::gpu::convertTo, ::cv::gpu::convertTo, ::cv::gpu::convertTo, ::cv::gpu::convertTo},\r
+ /* 8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},\r
+ /* 8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},\r
+ /* 8U -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 8U -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}\r
+ },\r
+ {\r
+ /* 8S -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 8S -> 8S */ {0,0,0,0},\r
+ /* 8S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 8S -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 8S -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 8S -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 8S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}\r
+ },\r
+ {\r
+ /* 16U -> 8U */ {NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},\r
+ /* 16U -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 16U -> 16U */ {0,0,0,0},\r
+ /* 16U -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 16U -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}\r
+ },\r
+ {\r
+ /* 16S -> 8U */ {NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},\r
+ /* 16S -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 16S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 16S -> 16S */ {0,0,0,0},\r
+ /* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 16S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}\r
+ },\r
+ {\r
+ /* 32S -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32S -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32S -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32S -> 32S */ {0,0,0,0},\r
+ /* 32S -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}\r
+ },\r
+ {\r
+ /* 32F -> 8U */ {NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32F -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32F -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 32F -> 32F */ {0,0,0,0},\r
+ /* 32F -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}\r
+ },\r
+ {\r
+ /* 64F -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 64F -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 64F -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 64F -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 64F -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 64F -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo},\r
+ /* 64F -> 64F */ {0,0,0,0}\r
+ }\r
+ };\r
+\r
+ caller_t func = callers[src.depth()][dst.depth()][src.channels() - 1];\r
+ CV_DbgAssert(func != 0);\r
+\r
+ func(src, dst);\r
+ }\r
+\r
+ void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const \r
+ { \r
+ ::cv::gpu::convertTo(src, dst, alpha, beta);\r
+ }\r
+\r
+ void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const\r
+ {\r
+ NppiSize sz;\r
+ sz.width = m.cols;\r
+ sz.height = m.rows;\r
+\r
+ if (mask.empty())\r
+ {\r
+ if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)\r
+ {\r
+ cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );\r
+ return;\r
+ }\r
+\r
+ if (m.depth() == CV_8U)\r
+ {\r
+ int cn = m.channels();\r
+\r
+ if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))\r
+ {\r
+ int val = saturate_cast<uchar>(s[0]);\r
+ cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );\r
+ return;\r
+ }\r
+ }\r
+\r
+ typedef void (*caller_t)(GpuMat& src, Scalar s);\r
+ static const caller_t callers[7][4] =\r
+ {\r
+ {NppSet<CV_8U, 1, nppiSet_8u_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},\r
+ {::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo},\r
+ {NppSet<CV_16U, 1, nppiSet_16u_C1R>::set, NppSet<CV_16U, 2, nppiSet_16u_C2R>::set, ::cv::gpu::setTo, NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},\r
+ {NppSet<CV_16S, 1, nppiSet_16s_C1R>::set, NppSet<CV_16S, 2, nppiSet_16s_C2R>::set, ::cv::gpu::setTo, NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},\r
+ {NppSet<CV_32S, 1, nppiSet_32s_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},\r
+ {NppSet<CV_32F, 1, nppiSet_32f_C1R>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},\r
+ {::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo}\r
+ };\r
+\r
+ callers[m.depth()][m.channels() - 1](m, s);\r
+ }\r
+ else\r
+ {\r
+ typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask);\r
+\r
+ static const caller_t callers[7][4] =\r
+ {\r
+ {NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},\r
+ {::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo},\r
+ {NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},\r
+ {NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},\r
+ {NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},\r
+ {NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},\r
+ {::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo}\r
+ };\r
+\r
+ callers[m.depth()][m.channels() - 1](m, s, mask);\r
+ }\r
+ }\r
+\r
+ void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const\r
+ {\r
+ cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );\r
+ }\r
+\r
+ void free(void* devPtr) const\r
+ {\r
+ cudaFree(devPtr);\r
+ }\r
+ };\r
+ \r
+ const GpuFuncTable* gpuFuncTable()\r
+ {\r
+ static CudaFuncTable funcTable;\r
+ return &funcTable;\r
+ }\r
}\r
\r
+#endif // HAVE_CUDA\r
+\r
void cv::gpu::GpuMat::upload(const Mat& m)\r
{\r
CV_DbgAssert(!m.empty());\r
step = rows = cols = 0;\r
refcount = 0;\r
}\r
+\r
+void cv::gpu::error(const char *error_string, const char *file, const int line, const char *func)\r
+{\r
+ int code = CV_GpuApiCallError;\r
+\r
+ if (uncaught_exception())\r
+ {\r
+ const char* errorStr = cvErrorStr(code); \r
+ const char* function = func ? func : "unknown function"; \r
+\r
+ cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line;\r
+ cerr.flush(); \r
+ }\r
+ else \r
+ cv::error( cv::Exception(code, error_string, func, file, line) );\r
+}\r
int minorVersion_;\r
};\r
\r
-//////////////////////////////// Error handling ////////////////////////\r
-\r
-CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);\r
-\r
//////////////////////////////// CudaMem ////////////////////////////////\r
// CudaMem is limited cv::Mat with page locked memory allocation.\r
// Page locked memory is only needed for async and faster coping to GPU.\r
#include "opencv2/gpu/devmem2d.hpp"\r
#include "safe_call.hpp"\r
\r
-#ifndef CV_PI\r
-#define CV_PI 3.1415926535897932384626433832795\r
-#endif\r
-\r
-#ifndef CV_PI_F\r
- #ifndef CV_PI\r
- #define CV_PI_F 3.14159265f\r
- #else\r
- #define CV_PI_F ((float)CV_PI)\r
- #endif\r
-#endif\r
-\r
-#ifdef __CUDACC__\r
-\r
-namespace cv { namespace gpu { namespace device \r
-{\r
- typedef unsigned char uchar;\r
- typedef unsigned short ushort;\r
- typedef signed char schar;\r
- typedef unsigned int uint;\r
-\r
- template<class T> static inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)\r
- {\r
- cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();\r
- cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );\r
- }\r
-}}}\r
-\r
-#endif\r
-\r
namespace cv { namespace gpu \r
{\r
enum \r
// Returns true if the GPU analogue exists, false otherwise.\r
bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);\r
\r
- static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }\r
-\r
class NppStreamHandler\r
{\r
public:\r
void ncvError(int err, const char *file, const int line, const char *func = "");\r
void cufftError(int err, const char *file, const int line, const char *func = "");\r
void cublasError(int err, const char *file, const int line, const char *func = "");\r
+}}\r
\r
- static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")\r
- {\r
- if (cudaSuccess != err)\r
- cv::gpu::error(cudaGetErrorString(err), file, line, func);\r
- }\r
+static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")\r
+{\r
+ if (cudaSuccess != err)\r
+ cv::gpu::error(cudaGetErrorString(err), file, line, func);\r
+}\r
\r
- static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")\r
- {\r
- if (err < 0)\r
- cv::gpu::nppError(err, file, line, func);\r
- }\r
+static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")\r
+{\r
+ if (err < 0)\r
+ cv::gpu::nppError(err, file, line, func);\r
+}\r
\r
- static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")\r
- {\r
- if (NCV_SUCCESS != err)\r
- cv::gpu::ncvError(err, file, line, func);\r
- }\r
+static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")\r
+{\r
+ if (NCV_SUCCESS != err)\r
+ cv::gpu::ncvError(err, file, line, func);\r
+}\r
\r
- static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")\r
- {\r
- if (CUFFT_SUCCESS != err)\r
- cv::gpu::cufftError(err, file, line, func);\r
- }\r
+static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")\r
+{\r
+ if (CUFFT_SUCCESS != err)\r
+ cv::gpu::cufftError(err, file, line, func);\r
+}\r
\r
- static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")\r
- {\r
- if (CUBLAS_STATUS_SUCCESS != err)\r
- cv::gpu::cublasError(err, file, line, func);\r
- }\r
-}}\r
+static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")\r
+{\r
+ if (CUBLAS_STATUS_SUCCESS != err)\r
+ cv::gpu::cublasError(err, file, line, func);\r
+}\r
\r
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
\ No newline at end of file
\r
#include "opencv2/gpu/stream_accessor.hpp"\r
\r
-namespace cv { namespace gpu { namespace device \r
+namespace cv { namespace gpu\r
{\r
- void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0);\r
-\r
- template <typename T>\r
- void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);\r
- template <typename T>\r
- void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
-\r
- void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);\r
-}}}\r
-\r
-using namespace ::cv::gpu::device;\r
+ void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);\r
+ void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream);\r
+ void setTo(GpuMat& src, Scalar s, cudaStream_t stream);\r
+ void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);\r
+}}\r
\r
struct Stream::Impl\r
{\r
size_t bwidth = src.cols * src.elemSize();\r
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) );\r
};\r
-\r
- template <typename T>\r
- void kernelSet(GpuMat& src, const Scalar& s, cudaStream_t stream)\r
- {\r
- Scalar_<T> sf = s;\r
- set_to_gpu(src, sf.val, src.channels(), stream);\r
- }\r
-\r
- template <typename T>\r
- void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream)\r
- {\r
- Scalar_<T> sf = s;\r
- set_to_gpu(src, sf.val, mask, src.channels(), stream);\r
- }\r
}\r
\r
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl ? stream.impl->stream : 0; };\r
}\r
}\r
\r
- typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream);\r
- static const set_caller_t set_callers[] =\r
- {\r
- kernelSet<uchar>, kernelSet<schar>, kernelSet<ushort>, kernelSet<short>,\r
- kernelSet<int>, kernelSet<float>, kernelSet<double>\r
- };\r
- set_callers[src.depth()](src, s, impl->stream);\r
+ setTo(src, s, impl->stream);\r
}\r
\r
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)\r
\r
CV_Assert(mask.type() == CV_8UC1);\r
\r
- typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream);\r
- static const set_caller_t set_callers[] =\r
- {\r
- kernelSetMask<uchar>, kernelSetMask<schar>, kernelSetMask<ushort>, kernelSetMask<short>,\r
- kernelSetMask<int>, kernelSetMask<float>, kernelSetMask<double>\r
- };\r
- set_callers[src.depth()](src, val, mask, impl->stream);\r
+ setTo(src, val, mask, impl->stream);\r
}\r
\r
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)\r
psrc = &(temp = src);\r
\r
dst.create( src.size(), rtype );\r
- convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);\r
+ convertTo(src, dst, alpha, beta, impl->stream);\r
}\r
\r
cv::gpu::Stream::operator bool() const\r
{\r
namespace gpu\r
{\r
- void error(const char *error_string, const char *file, const int line, const char *func)\r
- { \r
- int code = CV_GpuApiCallError;\r
-\r
- if (uncaught_exception())\r
- {\r
- const char* errorStr = cvErrorStr(code); \r
- const char* function = func ? func : "unknown function"; \r
-\r
- cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line;\r
- cerr.flush(); \r
- }\r
- else \r
- cv::error( cv::Exception(code, error_string, func, file, line) );\r
- }\r
-\r
void nppError(int code, const char *file, const int line, const char *func)\r
{\r
string msg = getErrorString(code, npp_errors, npp_error_num);\r
setDevice(prev_device_id);\r
}\r
\r
-////////////////////////////////////////////////////////////////////\r
-// GpuFuncTable\r
-\r
-namespace cv { namespace gpu { namespace device \r
-{\r
- void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t& stream = 0);\r
-\r
- template <typename T>\r
- void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);\r
- template <typename T>\r
- void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);\r
-\r
- void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);\r
-}}}\r
-\r
-namespace\r
-{\r
- //////////////////////////////////////////////////////////////////////////\r
- // Convert\r
-\r
- template<int n> struct NPPTypeTraits;\r
- template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };\r
- template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };\r
- template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };\r
- template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };\r
- template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };\r
-\r
- template<int SDEPTH, int DDEPTH> struct NppConvertFunc\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
- typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;\r
-\r
- typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI);\r
- };\r
- template<int DDEPTH> struct NppConvertFunc<CV_32F, DDEPTH>\r
- {\r
- typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;\r
-\r
- typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode);\r
- };\r
-\r
- template<int SDEPTH, int DDEPTH, typename NppConvertFunc<SDEPTH, DDEPTH>::func_ptr func> struct NppCvt\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
- typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;\r
-\r
- static void cvt(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
- nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );\r
-\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- };\r
- template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>\r
- {\r
- typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;\r
-\r
- static void cvt(const GpuMat& src, GpuMat& dst)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
- nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );\r
-\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- };\r
-\r
- void convertToKernelCaller(const GpuMat& src, GpuMat& dst)\r
- {\r
- ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);\r
- }\r
-\r
- //////////////////////////////////////////////////////////////////////////\r
- // Set\r
- \r
- template<int SDEPTH, int SCN> struct NppSetFunc\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
-\r
- typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI);\r
- };\r
- template<int SDEPTH> struct NppSetFunc<SDEPTH, 1>\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
-\r
- typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI);\r
- };\r
-\r
- template<int SDEPTH, int SCN, typename NppSetFunc<SDEPTH, SCN>::func_ptr func> struct NppSet\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
-\r
- static void set(GpuMat& src, Scalar s)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- Scalar_<src_t> nppS = s;\r
-\r
- nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );\r
-\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- };\r
- template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
-\r
- static void set(GpuMat& src, Scalar s)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- Scalar_<src_t> nppS = s;\r
-\r
- nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );\r
-\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- };\r
-\r
- template <typename T>\r
- void kernelSet(GpuMat& src, Scalar s)\r
- {\r
- Scalar_<T> sf = s;\r
- ::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), 0);\r
- }\r
-\r
- template<int SDEPTH, int SCN> struct NppSetMaskFunc\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
-\r
- typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);\r
- };\r
- template<int SDEPTH> struct NppSetMaskFunc<SDEPTH, 1>\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
-\r
- typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);\r
- };\r
-\r
- template<int SDEPTH, int SCN, typename NppSetMaskFunc<SDEPTH, SCN>::func_ptr func> struct NppSetMask\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
-\r
- static void set(GpuMat& src, Scalar s, const GpuMat& mask)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- Scalar_<src_t> nppS = s;\r
-\r
- nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );\r
-\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- };\r
- template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>\r
- {\r
- typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;\r
-\r
- static void set(GpuMat& src, Scalar s, const GpuMat& mask)\r
- {\r
- NppiSize sz;\r
- sz.width = src.cols;\r
- sz.height = src.rows;\r
-\r
- Scalar_<src_t> nppS = s;\r
-\r
- nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );\r
-\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- };\r
-\r
- template <typename T>\r
- void kernelSetMask(GpuMat& src, Scalar s, const GpuMat& mask)\r
- {\r
- Scalar_<T> sf = s;\r
- ::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), 0);\r
- }\r
-\r
- class CudaFuncTable : public GpuFuncTable\r
- {\r
- public:\r
- void copy(const Mat& src, GpuMat& dst) const \r
- { \r
- cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );\r
- }\r
- void copy(const GpuMat& src, Mat& dst) const\r
- { \r
- cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );\r
- }\r
- void copy(const GpuMat& src, GpuMat& dst) const\r
- { \r
- cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );\r
- }\r
-\r
- void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const \r
- { \r
- ::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels());\r
- }\r
-\r
- void convert(const GpuMat& src, GpuMat& dst) const \r
- { \r
- typedef void (*caller_t)(const GpuMat& src, GpuMat& dst);\r
- static const caller_t callers[7][7][7] =\r
- {\r
- { \r
- /* 8U -> 8U */ {0, 0, 0, 0},\r
- /* 8U -> 8S */ {convertToKernelCaller, convertToKernelCaller, convertToKernelCaller, convertToKernelCaller},\r
- /* 8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},\r
- /* 8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},\r
- /* 8U -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 8U -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}\r
- },\r
- {\r
- /* 8S -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 8S -> 8S */ {0,0,0,0},\r
- /* 8S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 8S -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 8S -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 8S -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 8S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}\r
- },\r
- {\r
- /* 16U -> 8U */ {NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},\r
- /* 16U -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 16U -> 16U */ {0,0,0,0},\r
- /* 16U -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 16U -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}\r
- },\r
- {\r
- /* 16S -> 8U */ {NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},\r
- /* 16S -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 16S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 16S -> 16S */ {0,0,0,0},\r
- /* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 16S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}\r
- },\r
- {\r
- /* 32S -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32S -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32S -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32S -> 32S */ {0,0,0,0},\r
- /* 32S -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}\r
- },\r
- {\r
- /* 32F -> 8U */ {NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32F -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32F -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 32F -> 32F */ {0,0,0,0},\r
- /* 32F -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}\r
- },\r
- {\r
- /* 64F -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 64F -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 64F -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 64F -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 64F -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 64F -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},\r
- /* 64F -> 64F */ {0,0,0,0}\r
- }\r
- };\r
-\r
- caller_t func = callers[src.depth()][dst.depth()][src.channels() - 1];\r
- CV_DbgAssert(func != 0);\r
-\r
- func(src, dst);\r
- }\r
-\r
- void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const \r
- { \r
- ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta);\r
- }\r
-\r
- void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const\r
- {\r
- NppiSize sz;\r
- sz.width = m.cols;\r
- sz.height = m.rows;\r
-\r
- if (mask.empty())\r
- {\r
- if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)\r
- {\r
- cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );\r
- return;\r
- }\r
-\r
- if (m.depth() == CV_8U)\r
- {\r
- int cn = m.channels();\r
-\r
- if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))\r
- {\r
- int val = saturate_cast<uchar>(s[0]);\r
- cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );\r
- return;\r
- }\r
- }\r
-\r
- typedef void (*caller_t)(GpuMat& src, Scalar s);\r
- static const caller_t callers[7][4] =\r
- {\r
- {NppSet<CV_8U, 1, nppiSet_8u_C1R>::set,kernelSet<uchar>,kernelSet<uchar>,NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},\r
- {kernelSet<schar>,kernelSet<schar>,kernelSet<schar>,kernelSet<schar>},\r
- {NppSet<CV_16U, 1, nppiSet_16u_C1R>::set,NppSet<CV_16U, 2, nppiSet_16u_C2R>::set,kernelSet<ushort>,NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},\r
- {NppSet<CV_16S, 1, nppiSet_16s_C1R>::set,NppSet<CV_16S, 2, nppiSet_16s_C2R>::set,kernelSet<short>,NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},\r
- {NppSet<CV_32S, 1, nppiSet_32s_C1R>::set,kernelSet<int>,kernelSet<int>,NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},\r
- {NppSet<CV_32F, 1, nppiSet_32f_C1R>::set,kernelSet<float>,kernelSet<float>,NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},\r
- {kernelSet<double>,kernelSet<double>,kernelSet<double>,kernelSet<double>}\r
- };\r
-\r
- callers[m.depth()][m.channels() - 1](m, s);\r
- }\r
- else\r
- {\r
- typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask);\r
-\r
- static const caller_t callers[7][4] =\r
- {\r
- {NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set,kernelSetMask<uchar>,kernelSetMask<uchar>,NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},\r
- {kernelSetMask<schar>,kernelSetMask<schar>,kernelSetMask<schar>,kernelSetMask<schar>},\r
- {NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set,kernelSetMask<ushort>,kernelSetMask<ushort>,NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},\r
- {NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set,kernelSetMask<short>,kernelSetMask<short>,NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},\r
- {NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set,kernelSetMask<int>,kernelSetMask<int>,NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},\r
- {NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set,kernelSetMask<float>,kernelSetMask<float>,NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},\r
- {kernelSetMask<double>,kernelSetMask<double>,kernelSetMask<double>,kernelSetMask<double>}\r
- };\r
-\r
- callers[m.depth()][m.channels() - 1](m, s, mask);\r
- }\r
- }\r
-\r
- void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const\r
- {\r
- cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );\r
- }\r
-\r
- void free(void* devPtr) const\r
- {\r
- cudaFree(devPtr);\r
- }\r
- };\r
-\r
- class Initializer\r
- {\r
- public:\r
- Initializer()\r
- {\r
- static CudaFuncTable funcTable;\r
- setGpuFuncTable(&funcTable);\r
- }\r
- };\r
-\r
- Initializer init;\r
-}\r
-\r
#endif\r
\r
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__\r
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__\r
\r
-#include "internal_shared.hpp"\r
#include "saturate_cast.hpp"\r
#include "vec_traits.hpp"\r
#include "vec_math.hpp"\r
#ifndef __OPENCV_GPU_COLOR_HPP__\r
#define __OPENCV_GPU_COLOR_HPP__\r
\r
-#include "internal_shared.hpp"\r
#include "detail/color_detail.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef __OPENCV_GPU_COMMON_HPP__\r
+#define __OPENCV_GPU_COMMON_HPP__\r
+\r
+#include <cuda_runtime.h>\r
+#include "opencv2/core/devmem2d.hpp"\r
+\r
+#ifndef CV_PI\r
+ #define CV_PI 3.1415926535897932384626433832795\r
+#endif\r
+\r
+#ifndef CV_PI_F\r
+ #ifndef CV_PI\r
+ #define CV_PI_F 3.14159265f\r
+ #else\r
+ #define CV_PI_F ((float)CV_PI)\r
+ #endif\r
+#endif\r
+\r
+namespace cv { namespace gpu \r
+{ \r
+ __host__ __device__ __forceinline__ int divUp(int total, int grain) \r
+ { \r
+ return (total + grain - 1) / grain; \r
+ }\r
+\r
+ namespace device \r
+ {\r
+ typedef unsigned char uchar;\r
+ typedef unsigned short ushort;\r
+ typedef signed char schar;\r
+ typedef unsigned int uint;\r
+\r
+ template<class T> inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)\r
+ {\r
+ cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();\r
+ cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );\r
+ }\r
+ }\r
+}}\r
+\r
+#if defined(__GNUC__)\r
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)\r
+#else /* defined(__CUDACC__) || defined(__MSVC__) */\r
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)\r
+#endif\r
+\r
+namespace cv { namespace gpu \r
+{\r
+ void error(const char *error_string, const char *file, const int line, const char *func = "");\r
+}}\r
+\r
+static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")\r
+{\r
+ if (cudaSuccess != err)\r
+ cv::gpu::error(cudaGetErrorString(err), file, line, func);\r
+}\r
+\r
+#endif // __OPENCV_GPU_COMMON_HPP__\r
#ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__\r
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__\r
\r
-#include "internal_shared.hpp"\r
+#include "common.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
{\r
#ifndef __OPENCV_GPU_COLOR_DETAIL_HPP__\r
#define __OPENCV_GPU_COLOR_DETAIL_HPP__\r
\r
-#include "internal_shared.hpp"\r
+#include "../common.hpp"\r
#include "../vec_traits.hpp"\r
#include "../saturate_cast.hpp"\r
#include "../limits.hpp"\r
#ifndef __OPENCV_GPU_TRANSFORM_DETAIL_HPP__\r
#define __OPENCV_GPU_TRANSFORM_DETAIL_HPP__\r
\r
-#include "internal_shared.hpp"\r
+#include "../common.hpp"\r
#include "../vec_traits.hpp"\r
#include "../functional.hpp"\r
\r
#ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__\r
#define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__\r
\r
-#include "internal_shared.hpp"\r
+#include "../common.hpp"\r
#include "../vec_traits.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
#ifndef __OPENCV_GPU_UTILITY_DETAIL_HPP__\r
#define __OPENCV_GPU_UTILITY_DETAIL_HPP__\r
\r
-#include "internal_shared.hpp"\r
-\r
namespace cv { namespace gpu { namespace device \r
{\r
namespace utility_detail\r
#ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__\r
#define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__\r
\r
-#include "internal_shared.hpp"\r
#include "../datamov_utils.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__\r
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__\r
\r
-#include "internal_shared.hpp"\r
-\r
-BEGIN_OPENCV_DEVICE_NAMESPACE\r
- \r
-template<class T> struct DynamicSharedMem\r
-{\r
- __device__ __forceinline__ operator T*()\r
- {\r
- extern __shared__ int __smem[];\r
- return (T*)__smem;\r
- }\r
-\r
- __device__ __forceinline__ operator const T*() const\r
+namespace cv { namespace gpu { namespace device\r
+{ \r
+ template<class T> struct DynamicSharedMem\r
{\r
- extern __shared__ int __smem[];\r
- return (T*)__smem;\r
- }\r
-};\r
+ __device__ __forceinline__ operator T*()\r
+ {\r
+ extern __shared__ int __smem[];\r
+ return (T*)__smem;\r
+ }\r
\r
-// specialize for double to avoid unaligned memory access compile errors\r
-template<> struct DynamicSharedMem<double>\r
-{\r
- __device__ __forceinline__ operator double*()\r
- {\r
- extern __shared__ double __smem_d[];\r
- return (double*)__smem_d;\r
- }\r
+ __device__ __forceinline__ operator const T*() const\r
+ {\r
+ extern __shared__ int __smem[];\r
+ return (T*)__smem;\r
+ }\r
+ };\r
\r
- __device__ __forceinline__ operator const double*() const\r
+ // specialize for double to avoid unaligned memory access compile errors\r
+ template<> struct DynamicSharedMem<double>\r
{\r
- extern __shared__ double __smem_d[];\r
- return (double*)__smem_d;\r
- }\r
-};\r
+ __device__ __forceinline__ operator double*()\r
+ {\r
+ extern __shared__ double __smem_d[];\r
+ return (double*)__smem_d;\r
+ }\r
\r
-END_OPENCV_DEVICE_NAMESPACE\r
+ __device__ __forceinline__ operator const double*() const\r
+ {\r
+ extern __shared__ double __smem_d[];\r
+ return (double*)__smem_d;\r
+ }\r
+ };\r
+}}}\r
\r
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__\r
#ifndef OPENCV_GPU_EMULATION_HPP_\r
#define OPENCV_GPU_EMULATION_HPP_\r
\r
-#include "internal_shared.hpp"\r
#include "warp_reduce.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
#ifndef __OPENCV_GPU_FILTERS_HPP__\r
#define __OPENCV_GPU_FILTERS_HPP__\r
\r
-#include "internal_shared.hpp"\r
#include "saturate_cast.hpp"\r
#include "vec_traits.hpp"\r
#include "vec_math.hpp"\r
#define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_\r
\r
#include <cstdio>\r
-#include "internal_shared.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
{\r
#define __OPENCV_GPU_FUNCTIONAL_HPP__\r
\r
#include <thrust/functional.h>\r
-#include "internal_shared.hpp"\r
#include "saturate_cast.hpp"\r
#include "vec_traits.hpp"\r
#include "type_traits.hpp"\r
#define __OPENCV_GPU_LIMITS_GPU_HPP__\r
\r
#include <limits>\r
-#include "internal_shared.hpp"\r
+#include "common.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
{\r
#ifndef __OPENCV_GPU_SATURATE_CAST_HPP__\r
#define __OPENCV_GPU_SATURATE_CAST_HPP__\r
\r
-#include "internal_shared.hpp"\r
+#include "common.hpp"\r
\r
namespace cv { namespace gpu { namespace device\r
{\r
#ifndef __OPENCV_GPU_TRANSFORM_HPP__\r
#define __OPENCV_GPU_TRANSFORM_HPP__\r
\r
-#include "internal_shared.hpp"\r
+#include "common.hpp"\r
#include "utility.hpp"\r
#include "detail/transform_detail.hpp"\r
\r
#ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__\r
#define __OPENCV_GPU_TYPE_TRAITS_HPP__\r
\r
-#include "internal_shared.hpp"\r
#include "detail/type_traits_detail.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
#ifndef __OPENCV_GPU_UTILITY_HPP__\r
#define __OPENCV_GPU_UTILITY_HPP__\r
\r
-#include "internal_shared.hpp"\r
#include "saturate_cast.hpp"\r
#include "datamov_utils.hpp"\r
#include "detail/utility_detail.hpp"\r
#ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__\r
#define __OPENCV_GPU_VEC_DISTANCE_HPP__\r
\r
-#include "internal_shared.hpp"\r
#include "utility.hpp"\r
#include "functional.hpp"\r
#include "detail/vec_distance_detail.hpp"\r
#ifndef __OPENCV_GPU_VECMATH_HPP__\r
#define __OPENCV_GPU_VECMATH_HPP__\r
\r
-#include "internal_shared.hpp"\r
#include "saturate_cast.hpp"\r
#include "vec_traits.hpp"\r
#include "functional.hpp"\r
#ifndef __OPENCV_GPU_VEC_TRAITS_HPP__\r
#define __OPENCV_GPU_VEC_TRAITS_HPP__\r
\r
-#include "internal_shared.hpp"\r
+#include "common.hpp"\r
\r
namespace cv { namespace gpu { namespace device \r
{\r
#ifndef __OPENCV_GPU_DEVICE_WARP_HPP__\r
#define __OPENCV_GPU_DEVICE_WARP_HPP__\r
\r
-#include "internal_shared.hpp"\r
-\r
namespace cv { namespace gpu { namespace device \r
{\r
struct Warp\r
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__\r
#define OPENCV_GPU_WARP_REDUCE_HPP__\r
\r
-#include "internal_shared.hpp"\r
-\r
namespace cv { namespace gpu { namespace device \r
{ \r
template <class T> \r