From 8ed47c01b7c0aa6fd48acaac38c063cd91f856e9 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 1 Oct 2013 15:28:51 +0400
Subject: [PATCH] used new device layer for cv::cuda::norm

---
 modules/cudaarithm/src/cuda/norm.cu   | 119 ++++++++++++++++++++++++++++++++++
 modules/cudaarithm/src/reductions.cpp |  53 ---------------
 2 files changed, 119 insertions(+), 53 deletions(-)
 create mode 100644 modules/cudaarithm/src/cuda/norm.cu
diff --git a/modules/cudaarithm/src/cuda/norm.cu b/modules/cudaarithm/src/cuda/norm.cu
new file mode 100644
index 0000000..bda6b45
--- /dev/null
+++ b/modules/cudaarithm/src/cuda/norm.cu
@@ -0,0 +1,119 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifndef HAVE_OPENCV_CUDEV
+
+#error "opencv_cudev is required"
+
+#else
+
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudev.hpp"
+
+using namespace cv::cudev;
+
+namespace
+{
+    double normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
+    {
+        const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
+        const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
+        GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
+
+        gridFindMinMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
+
+        int data[2];
+        buf.download(cv::Mat(1, 2, buf.type(), data));
+
+        return data[1];
+    }
+
+    double normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
+    {
+        const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
+        const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
+        GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
+
+        gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
+
+        int data;
+        buf.download(cv::Mat(1, 1, buf.type(), &data));
+
+        return data;
+    }
+
+    double normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
+    {
+        const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
+        const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
+        GpuMat_<double>& buf = (GpuMat_<double>&) _buf;
+
+        gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf);
+
+        double data;
+        buf.download(cv::Mat(1, 1, buf.type(), &data));
+
+        return std::sqrt(data);
+    }
+}
+
+double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType)
+{
+    typedef double (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf);
+    static const func_t funcs[] =
+    {
+        0, normDiffInf, normDiffL1, 0, normDiffL2
+    };
+
+    GpuMat src1 = _src1.getGpuMat();
+    GpuMat src2 = _src2.getGpuMat();
+
+    CV_Assert( src1.type() == CV_8UC1 );
+    CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() );
+    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
+
+    return funcs[normType](src1, src2, buf);
+}
+
+#endif
diff --git a/modules/cudaarithm/src/reductions.cpp b/modules/cudaarithm/src/reductions.cpp
index 5a4a2df..c1e2af4 100644
--- a/modules/cudaarithm/src/reductions.cpp
+++ b/modules/cudaarithm/src/reductions.cpp
@@ -133,59 +133,6 @@ double cv::cuda::norm(InputArray _src, int normType, InputArray _mask, GpuMat& b
     return std::max(std::abs(min_val), std::abs(max_val));
 }
 
-double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType)
-{
-#if CUDA_VERSION < 5050
-    (void) buf;
-
-    typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal);
-
-    static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
-#else
-    typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
-        NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer);
-
-    typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize);
-
-    static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
-
-    static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R};
-#endif
-
-    GpuMat src1 = _src1.getGpuMat();
-    GpuMat src2 = _src2.getGpuMat();
-
-    CV_Assert( src1.type() == CV_8UC1 );
-    CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() );
-    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
-
-    NppiSize sz;
-    sz.width  = src1.cols;
-    sz.height = src1.rows;
-
-    const int funcIdx = normType >> 1;
-
-    DeviceBuffer dbuf;
-
-#if CUDA_VERSION < 5050
-    nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
-#else
-    int bufSize;
-    buf_size_funcs[funcIdx](sz, &bufSize);
-
-    ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
-
-    nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) );
-#endif
-
-    cudaSafeCall( cudaDeviceSynchronize() );
-
-    double retVal;
-    dbuf.download(&retVal);
-
-    return retVal;
-}
-
 ////////////////////////////////////////////////////////////////////////
 // meanStdDev
 
-- 
2.7.4