From b6c33bf86b9986911782dd92f7b43fabe20b80e6 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 6 Dec 2013 19:04:50 +0400
Subject: [PATCH] added cv::norm to T-API

---
 modules/core/src/ocl.cpp              |   2 +-
 modules/core/src/opencl/arithm.cl     |  21 ++++--
 modules/core/src/stat.cpp             | 120 ++++++++++++++++++++++++++++++++--
 modules/core/test/ocl/test_arithm.cpp | 119 ++++++++++++++++++++++++++++++---
 4 files changed, 242 insertions(+), 20 deletions(-)
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index f733dd1..0cff7b5 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -2379,7 +2379,7 @@ struct Program::Impl
                 size_t retsz = 0;
                 retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
                                                CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
-                if( retval >= 0 && retsz > 0 )
+                if( retval >= 0 && retsz > 1 )
                 {
                     AutoBuffer<char> bufbuf(retsz + 16);
                     char* buf = bufbuf;
diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl
index b4cdb53..9c86057 100644
--- a/modules/core/src/opencl/arithm.cl
+++ b/modules/core/src/opencl/arithm.cl
@@ -58,10 +58,10 @@
 */
 
 #ifdef DOUBLE_SUPPORT
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #define CV_EPSILON DBL_EPSILON
 #define CV_PI M_PI
@@ -76,12 +76,18 @@
 
 #ifndef workT
 
+    #ifndef srcT1
     #define srcT1 dstT
+    #endif
+    #ifndef srcT2
     #define srcT2 dstT
+    #endif
     #define workT dstT
-    #define srcelem1 *(__global dstT*)(srcptr1 + src1_index)
-    #define srcelem2 *(__global dstT*)(srcptr2 + src2_index)
+    #define srcelem1 *(__global srcT1*)(srcptr1 + src1_index)
+    #define srcelem2 *(__global srcT2*)(srcptr2 + src2_index)
+    #ifndef convertToDT
     #define convertToDT noconvert
+    #endif
 
 #else
 
@@ -160,6 +166,11 @@
 #elif defined OP_MAG
 #define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2)
 
+#elif defined OP_ABS_NOSAT
+#define PROCESS_ELEM \
+    dstT v = convertToDT(srcelem1); \
+    dstelem = v >= 0 ? v : -v
+
 #elif defined OP_PHASE_RADIANS
 #define PROCESS_ELEM \
         workT tmp = atan2(srcelem2, srcelem1); \
diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp
index b19be3b..d04857c 100644
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -1760,15 +1760,76 @@ static NormDiffFunc getNormDiffFunc(int normType, int depth)
 
 }
 
-double cv::norm( InputArray _src, int normType, InputArray _mask )
+namespace cv {
+
+static bool ocl_norm( InputArray _src, int normType, double & result )
 {
-    Mat src = _src.getMat(), mask = _mask.getMat();
-    int depth = src.depth(), cn = src.channels();
+    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 
-    normType &= 7;
+    if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2) ||
+         (!doubleSupport && depth == CV_64F))
+        return false;
+
+    UMat src = _src.getUMat();
+
+    if (normType == NORM_INF)
+    {
+        UMat abssrc;
+
+        if (depth != CV_8U && depth != CV_16U)
+        {
+            int wdepth = std::max(CV_32S, depth);
+            char cvt[50];
+
+            ocl::Kernel kabs("KF", ocl::core::arithm_oclsrc,
+                             format("-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s -D convertToDT=%s%s",
+                                    ocl::typeToStr(wdepth), ocl::typeToStr(depth),
+                                    ocl::convertTypeStr(depth, wdepth, 1, cvt),
+                                    doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+            if (kabs.empty())
+                return false;
+
+            abssrc.create(src.size(), CV_MAKE_TYPE(wdepth, cn));
+            kabs.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(abssrc, cn));
+
+            size_t globalsize[2] = { src.cols * cn, src.rows };
+            if (!kabs.run(2, globalsize, NULL, false))
+                return false;
+        }
+        else
+            abssrc = src;
+
+        cv::minMaxIdx(abssrc.reshape(1), NULL, &result);
+    }
+    else if (normType == NORM_L1 || normType == NORM_L2)
+    {
+        Scalar s;
+        bool unstype = depth == CV_8U || depth == CV_16U;
+
+        ocl_sum(src.reshape(1), s, normType == NORM_L2 ?
+                    OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS) );
+        result = normType == NORM_L1 ? s[0] : std::sqrt(s[0]);
+    }
+
+    return true;
+}
+
+}
+
+double cv::norm( InputArray _src, int normType, InputArray _mask )
+{
+    normType &= NORM_TYPE_MASK;
     CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
                normType == NORM_L2 || normType == NORM_L2SQR ||
-               ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) );
+               ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
+
+    double _result = 0;
+    if (ocl::useOpenCL() && _mask.empty() && _src.isUMat() && _src.dims() <= 2 && ocl_norm(_src, normType, _result))
+        return _result;
+
+    Mat src = _src.getMat(), mask = _mask.getMat();
+    int depth = src.depth(), cn = src.channels();
 
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
     size_t total_size = src.total();
@@ -2047,9 +2108,56 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
     return result.d;
 }
 
+namespace cv {
+
+static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, double & result )
+{
+    int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
+    bool relative = (normType & NORM_RELATIVE) != 0;
+    normType &= ~NORM_RELATIVE;
+
+    if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2) ||
+         (!doubleSupport && depth == CV_64F))
+        return false;
+
+    int wdepth = std::max(CV_32S, depth);
+    char cvt[50];
+    ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
+                  format("-D BINARY_OP -D OP_ABSDIFF -D dstT=%s -D workT=dstT -D srcT1=%s -D srcT2=srcT1"
+                         " -D convertToDT=%s -D convertToWT1=convertToDT -D convertToWT2=convertToDT%s",
+                         ocl::typeToStr(wdepth), ocl::typeToStr(depth),
+                         ocl::convertTypeStr(depth, wdepth, 1, cvt),
+                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+    if (k.empty())
+        return false;
+
+    UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), diff(src1.size(), CV_MAKE_TYPE(wdepth, cn));
+    k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2),
+           ocl::KernelArg::WriteOnly(diff, cn));
+
+    size_t globalsize[2] = { diff.cols * cn, diff.rows };
+    if (!k.run(2, globalsize, NULL, false))
+        return false;
+
+    result = cv::norm(diff, normType);
+    if (relative)
+        result /= cv::norm(src2, normType) + DBL_EPSILON;
+
+    return true;
+}
+
+}
 
 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
 {
+    CV_Assert( _src1.size() == _src2.size() && _src1.type() == _src2.type() );
+
+    double _result = 0;
+    if (ocl::useOpenCL() && _mask.empty() && _src1.isUMat() && _src2.isUMat() &&
+            _src1.dims() <= 2 && _src2.dims() <= 2 && ocl_norm(_src1, _src2, normType, _result))
+        return _result;
+
     if( normType & CV_RELATIVE )
     {
 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
@@ -2135,7 +2243,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
     int depth = src1.depth(), cn = src1.channels();
 
-    CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
+    CV_Assert( src1.size == src2.size );
 
     normType &= 7;
     CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp
index 844be7b..045fbd7 100644
--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@@ -795,8 +795,8 @@ struct RepeatTestCase :
     {
         const int type = CV_MAKE_TYPE(depth, cn);
 
-        nx = 2;//randomInt(1, 4);
-        ny = 2;//randomInt(1, 4);
+        nx = randomInt(1, 4);
+        ny = randomInt(1, 4);
 
         Size srcRoiSize = randomSize(1, MAX_VALUE);
         Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
@@ -813,7 +813,7 @@ struct RepeatTestCase :
 
 typedef RepeatTestCase Repeat;
 
-OCL_TEST_P(Repeat, DISABLED_Mat)
+OCL_TEST_P(Repeat, Mat)
 {
     for (int i = 0; i < test_loop_times; ++i)
     {
@@ -1004,6 +1004,108 @@ OCL_TEST_P(Flip, BOTH)
     }
 }
 
+//////////////////////////////// Norm /////////////////////////////////////////////////
+
+static bool relativeError(double actual, double expected, double eps)
+{
+    return std::abs(actual - expected) / actual < eps;
+}
+
+typedef ArithmTestBase Norm;
+
+OCL_TEST_P(Norm, NORM_INF_1arg)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_INF));
+        OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_INF));
+
+        EXPECT_NEAR(cpuRes, gpuRes, 0.1);
+    }
+}
+
+OCL_TEST_P(Norm, NORM_L1_1arg)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L1));
+        OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L1));
+
+        EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
+    }
+}
+
+OCL_TEST_P(Norm, NORM_L2_1arg)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L2));
+        OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L2));
+
+        EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
+    }
+}
+
+OCL_TEST_P(Norm, NORM_INF_2args)
+{
+    for (int relative = 0; relative < 2; ++relative)
+        for (int j = 0; j < test_loop_times; j++)
+        {
+            generateTestData();
+
+            int type = NORM_INF;
+            if (relative == 1)
+                type |= NORM_RELATIVE;
+
+            OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type));
+            OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type));
+
+            EXPECT_NEAR(cpuRes, gpuRes, 0.1);
+        }
+}
+
+OCL_TEST_P(Norm, NORM_L1_2args)
+{
+    for (int relative = 0; relative < 2; ++relative)
+        for (int j = 0; j < test_loop_times; j++)
+        {
+            generateTestData();
+
+            int type = NORM_L1;
+            if (relative == 1)
+                type |= NORM_RELATIVE;
+
+            OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type));
+            OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type));
+
+            EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
+        }
+}
+
+OCL_TEST_P(Norm, NORM_L2_2args)
+{
+    for (int relative = 0; relative < 2; ++relative)
+        for (int j = 0; j < test_loop_times; j++)
+        {
+            generateTestData();
+
+            int type = NORM_L2;
+            if (relative == 1)
+                type |= NORM_RELATIVE;
+
+            OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type));
+            OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type));
+
+            EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
+        }
+}
+
 //////////////////////////////////////// Instantiation /////////////////////////////////////////
 
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
@@ -1017,10 +1119,10 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHA
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
-//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
-//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
-//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
-//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
@@ -1033,7 +1135,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(::testing::Values(CV_32F, CV_64
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
-OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 
 } } // namespace cvtest::ocl
 
-- 
2.7.4