#if defined(USE_CUDA)
-#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func)
-#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
+// Disable NPP for this file
+//#define USE_NPP
+#undef USE_NPP
+#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func)
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
+#ifdef USE_NPP
+
+#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (err < 0)
}
}
+#endif
+
namespace cv { namespace gpu { namespace device
{
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
}
+#ifdef USE_NPP
+
template<int n> struct NPPTypeTraits;
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
+#endif
+
//////////////////////////////////////////////////////////////////////////
// Convert
+#ifdef USE_NPP
+
template<int SDEPTH, int DDEPTH> struct NppConvertFunc
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
}
};
+#endif
+
//////////////////////////////////////////////////////////////////////////
// Set
+#ifdef USE_NPP
+
template<int SDEPTH, int SCN> struct NppSetFunc
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
}
};
+#endif
+
//////////////////////////////////////////////////////////////////////////
// CopyMasked
+#ifdef USE_NPP
+
template<int SDEPTH> struct NppCopyMaskedFunc
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
}
};
+#endif
+
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
{
return reinterpret_cast<size_t>(ptr) % size == 0;
}
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
+
+#ifdef USE_NPP
static const func_t funcs[7][4] =
{
/* 8U */ {NppCopyMasked<CV_8U , nppiCopy_8u_C1MR >::call, cv::gpu::device::copyWithMask, NppCopyMasked<CV_8U , nppiCopy_8u_C3MR >::call, NppCopyMasked<CV_8U , nppiCopy_8u_C4MR >::call},
};
const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cv::gpu::device::copyWithMask;
+#else
+ const func_t func = cv::gpu::device::copyWithMask;
+#endif
func(src, dst, mask, 0);
}
void convert(const GpuMat& src, GpuMat& dst) const
{
typedef void (*func_t)(const GpuMat& src, GpuMat& dst);
+
+#ifdef USE_NPP
static const func_t funcs[7][7][4] =
{
{
/* 64F -> 64F */ {0,0,0,0}
}
};
+#endif
CV_Assert(src.depth() <= CV_64F && src.channels() <= 4);
CV_Assert(dst.depth() <= CV_64F);
return;
}
+#ifdef USE_NPP
const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1];
CV_DbgAssert(func != 0);
+#else
+ const func_t func = cv::gpu::device::convertTo;
+#endif
func(src, dst);
}
}
typedef void (*func_t)(GpuMat& src, Scalar s);
+
+#ifdef USE_NPP
static const func_t funcs[7][4] =
{
{NppSet<CV_8U , 1, nppiSet_8u_C1R >::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_8U , 4, nppiSet_8u_C4R >::call},
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_32F, 4, nppiSet_32f_C4R>::call},
{cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo }
};
+#endif
CV_Assert(m.depth() <= CV_64F && m.channels() <= 4);
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
+#ifdef USE_NPP
+ const func_t func = funcs[m.depth()][m.channels() - 1];
+#else
+ const func_t func = cv::gpu::device::setTo;
+#endif
+
if (stream)
cv::gpu::device::setTo(m, s, stream);
else
- funcs[m.depth()][m.channels() - 1](m, s);
+ func(m, s);
}
else
{
typedef void (*func_t)(GpuMat& src, Scalar s, const GpuMat& mask);
+
+#ifdef USE_NPP
static const func_t funcs[7][4] =
{
{NppSetMask<CV_8U , 1, nppiSet_8u_C1MR >::call, cv::gpu::device::setTo, cv::gpu::device::setTo, NppSetMask<CV_8U , 4, nppiSet_8u_C4MR >::call},
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::call, cv::gpu::device::setTo, cv::gpu::device::setTo, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::call},
{cv::gpu::device::setTo , cv::gpu::device::setTo, cv::gpu::device::setTo, cv::gpu::device::setTo }
};
+#endif
CV_Assert(m.depth() <= CV_64F && m.channels() <= 4);
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
+#ifdef USE_NPP
+ const func_t func = funcs[m.depth()][m.channels() - 1];
+#else
+ const func_t func = cv::gpu::device::setTo;
+#endif
+
if (stream)
cv::gpu::device::setTo(m, s, mask, stream);
else
- funcs[m.depth()][m.channels() - 1](m, s, mask);
+ func(m, s, mask);
}
}