\r
bool aligned = isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16);\r
\r
+#if CUDART_VERSION == 4000 \r
if (aligned && src1.depth() == CV_8U && (src1.cols * src1.channels()) % 4 == 0)\r
{\r
NppStreamHandler h(stream);\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
- else if (aligned && src1.depth() == CV_8U)\r
+ else \r
+#endif\r
{\r
- NppStreamHandler h(stream);\r
+ if (aligned && src1.depth() == CV_8U)\r
+ {\r
+ NppStreamHandler h(stream);\r
\r
- nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), \r
- dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );\r
+ nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), \r
+ dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );\r
\r
- if (stream == 0)\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- else if (aligned && src1.depth() == CV_32S)\r
- {\r
- NppStreamHandler h(stream);\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+#if CUDART_VERSION == 4000 \r
+ else if (aligned && src1.depth() == CV_32S)\r
+ {\r
+ NppStreamHandler h(stream);\r
\r
- nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), static_cast<int>(src1.step), src2.ptr<Npp32s>(), static_cast<int>(src2.step), \r
- dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) );\r
+ nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), static_cast<int>(src1.step), src2.ptr<Npp32s>(), static_cast<int>(src2.step), \r
+ dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) );\r
\r
- if (stream == 0)\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- else if (aligned && src1.depth() == CV_32F)\r
- {\r
- NppStreamHandler h(stream);\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+#endif\r
+ else if (aligned && src1.depth() == CV_32F)\r
+ {\r
+ NppStreamHandler h(stream);\r
\r
- nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step), \r
- dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );\r
+ nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step), \r
+ dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );\r
\r
- if (stream == 0)\r
- cudaSafeCall( cudaDeviceSynchronize() );\r
- }\r
- else\r
- {\r
- const func_t func = funcs[src1.depth()];\r
- CV_Assert(func != 0);\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+ }\r
+ else\r
+ {\r
+ const func_t func = funcs[src1.depth()];\r
+ CV_Assert(func != 0);\r
\r
- func(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);\r
+ func(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);\r
+ }\r
}\r
}\r
\r
\r
NppStreamHandler h(stream);\r
\r
+#if CUDART_VERSION > 4000 \r
+ NppiGraphcutState* pState;\r
+ nppSafeCall( nppiGraphcutInitAlloc(sznpp, &pState, buf.ptr<Npp8u>()) );\r
+ \r
+ nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),\r
+ static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), pState) );\r
+\r
+ nppSafeCall( nppiGraphcutFree(pState) );\r
+#else\r
nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),\r
static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );\r
+#endif\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
\r
void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)\r
{\r
+#if CUDART_VERSION > 4000 \r
+ CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);\r
+\r
+ dst.create(src.size(), CV_32FC1);\r
+\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ NppiRect nppRect;\r
+ nppRect.height = rect.height;\r
+ nppRect.width = rect.width;\r
+ nppRect.x = rect.x;\r
+ nppRect.y = rect.y;\r
+\r
+ cudaStream_t stream = StreamAccessor::getStream(s);\r
+\r
+ NppStreamHandler h(stream);\r
+\r
+ nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp64f>(), static_cast<int>(sqr.step),\r
+ dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );\r
+\r
+ if (stream == 0)\r
+ cudaSafeCall( cudaDeviceSynchronize() );\r
+#else\r
CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);\r
\r
dst.create(src.size(), CV_32FC1);\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
+#endif\r
}\r
\r
\r