fixed build under CUDA 4.1
authorVladislav Vinogradov <no@email>
Mon, 30 Jan 2012 13:15:20 +0000 (13:15 +0000)
committerVladislav Vinogradov <no@email>
Mon, 30 Jan 2012 13:15:20 +0000 (13:15 +0000)
modules/gpu/src/element_operations.cpp
modules/gpu/src/graphcuts.cpp
modules/gpu/src/imgproc.cpp
modules/gpu/src/matrix_reductions.cpp

index 3081303..230a0f8 100644 (file)
@@ -680,6 +680,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
 \r
     bool aligned = isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16);\r
 \r
+#if CUDART_VERSION == 4000 \r
     if (aligned && src1.depth() == CV_8U && (src1.cols * src1.channels()) % 4 == 0)\r
     {\r
         NppStreamHandler h(stream);\r
@@ -692,42 +693,48 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
         if (stream == 0)\r
             cudaSafeCall( cudaDeviceSynchronize() );\r
     }\r
-    else if (aligned && src1.depth() == CV_8U)\r
+    else \r
+#endif\r
     {\r
-        NppStreamHandler h(stream);\r
+        if (aligned && src1.depth() == CV_8U)\r
+        {\r
+            NppStreamHandler h(stream);\r
 \r
-        nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), \r
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );\r
+            nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), \r
+                dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );\r
 \r
-        if (stream == 0)\r
-            cudaSafeCall( cudaDeviceSynchronize() );\r
-    }\r
-    else if (aligned && src1.depth() == CV_32S)\r
-    {\r
-        NppStreamHandler h(stream);\r
+            if (stream == 0)\r
+                cudaSafeCall( cudaDeviceSynchronize() );\r
+        }\r
+#if CUDART_VERSION == 4000 \r
+        else if (aligned && src1.depth() == CV_32S)\r
+        {\r
+            NppStreamHandler h(stream);\r
 \r
-        nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), static_cast<int>(src1.step), src2.ptr<Npp32s>(), static_cast<int>(src2.step), \r
-            dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) );\r
+            nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), static_cast<int>(src1.step), src2.ptr<Npp32s>(), static_cast<int>(src2.step), \r
+                dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) );\r
 \r
-        if (stream == 0)\r
-            cudaSafeCall( cudaDeviceSynchronize() );\r
-    }\r
-    else if (aligned && src1.depth() == CV_32F)\r
-    {\r
-        NppStreamHandler h(stream);\r
+            if (stream == 0)\r
+                cudaSafeCall( cudaDeviceSynchronize() );\r
+        }\r
+#endif\r
+        else if (aligned && src1.depth() == CV_32F)\r
+        {\r
+            NppStreamHandler h(stream);\r
 \r
-        nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step), \r
-            dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );\r
+            nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step), \r
+                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );\r
 \r
-        if (stream == 0)\r
-            cudaSafeCall( cudaDeviceSynchronize() );\r
-    }\r
-    else\r
-    {\r
-        const func_t func = funcs[src1.depth()];\r
-        CV_Assert(func != 0);\r
+            if (stream == 0)\r
+                cudaSafeCall( cudaDeviceSynchronize() );\r
+        }\r
+        else\r
+        {\r
+            const func_t func = funcs[src1.depth()];\r
+            CV_Assert(func != 0);\r
 \r
-        func(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);\r
+            func(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);\r
+    }\r
     }\r
 }\r
 \r
index f3c4a06..90ccadc 100644 (file)
@@ -77,8 +77,18 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
 \r
     NppStreamHandler h(stream);\r
 \r
+#if CUDART_VERSION > 4000 \r
+    NppiGraphcutState* pState;\r
+    nppSafeCall( nppiGraphcutInitAlloc(sznpp, &pState, buf.ptr<Npp8u>()) );\r
+    \r
+    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),\r
+        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), pState) );\r
+\r
+    nppSafeCall( nppiGraphcutFree(pState) );\r
+#else\r
     nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),\r
         static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );\r
+#endif\r
 \r
     if (stream == 0)\r
         cudaSafeCall( cudaDeviceSynchronize() );\r
index e5ea90c..ed243a3 100644 (file)
@@ -935,6 +935,31 @@ void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
 \r
 void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)\r
 {\r
+#if CUDART_VERSION > 4000 \r
+    CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1);\r
+\r
+    dst.create(src.size(), CV_32FC1);\r
+\r
+    NppiSize sz;\r
+    sz.width = src.cols;\r
+    sz.height = src.rows;\r
+\r
+    NppiRect nppRect;\r
+    nppRect.height = rect.height;\r
+    nppRect.width = rect.width;\r
+    nppRect.x = rect.x;\r
+    nppRect.y = rect.y;\r
+\r
+    cudaStream_t stream = StreamAccessor::getStream(s);\r
+\r
+    NppStreamHandler h(stream);\r
+\r
+    nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp64f>(), static_cast<int>(sqr.step),\r
+                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );\r
+\r
+    if (stream == 0)\r
+        cudaSafeCall( cudaDeviceSynchronize() );\r
+#else\r
     CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_32FC1);\r
 \r
     dst.create(src.size(), CV_32FC1);\r
@@ -958,6 +983,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
 \r
     if (stream == 0)\r
         cudaSafeCall( cudaDeviceSynchronize() );\r
+#endif\r
 }\r
 \r
 \r
index 71ce0b7..10e4e81 100644 (file)
@@ -117,7 +117,15 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
 \r
     DeviceBuffer dbuf(2);\r
 \r
+#if CUDART_VERSION > 4000 \r
+    int bufSize;\r
+    nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );\r
+\r
+    GpuMat buf(1, bufSize, CV_8UC1);\r
+    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );\r
+#else\r
     nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, dbuf, (double*)dbuf + 1) );\r
+#endif\r
 \r
     cudaSafeCall( cudaDeviceSynchronize() );\r
     \r