add performance test code for ocl module
authorniko <newlife20080214@gmail.com>
Wed, 8 Aug 2012 09:09:29 +0000 (17:09 +0800)
committerniko <newlife20080214@gmail.com>
Wed, 8 Aug 2012 09:09:29 +0000 (17:09 +0800)
14 files changed:
modules/ocl/CMakeLists.txt
modules/ocl/perf/interpolation.hpp [new file with mode: 0644]
modules/ocl/perf/main.cpp [new file with mode: 0644]
modules/ocl/perf/perf_test_ocl.cpp [new file with mode: 0644]
modules/ocl/perf/precomp.cpp [new file with mode: 0644]
modules/ocl/perf/precomp.hpp [new file with mode: 0644]
modules/ocl/perf/test_arithm.cpp [new file with mode: 0644]
modules/ocl/perf/test_filters.cpp [new file with mode: 0644]
modules/ocl/perf/test_haar.cpp [new file with mode: 0644]
modules/ocl/perf/test_imgproc.cpp [new file with mode: 0644]
modules/ocl/perf/test_matrix_operation.cpp [new file with mode: 0644]
modules/ocl/perf/test_split_merge.cpp [new file with mode: 0644]
modules/ocl/perf/utility.cpp [new file with mode: 0644]
modules/ocl/perf/utility.hpp [new file with mode: 0644]

index 2459577..a6496ae 100644 (file)
@@ -64,8 +64,8 @@ ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
 ################################################################################################################
 ################################   OpenCL Module Performance  ##################################################
 ################################################################################################################
-#file(GLOB perf_srcs "perf/*.cpp")
-#file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
+file(GLOB perf_srcs "perf/*.cpp")
+file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
 
-#ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
-#                       FILES "Src" ${perf_srcs})
+ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
+                       FILES "Src" ${perf_srcs})
diff --git a/modules/ocl/perf/interpolation.hpp b/modules/ocl/perf/interpolation.hpp
new file mode 100644 (file)
index 0000000..d918004
--- /dev/null
@@ -0,0 +1,120 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
+#define __OPENCV_TEST_INTERPOLATION_HPP__
+
+template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+{
+    if (border_type == cv::BORDER_CONSTANT)
+        return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
+
+    return src.at<T>(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c);
+}
+
+template <typename T> struct NearestInterpolator
+{
+    static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+    {
+        return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
+    }
+};
+
+template <typename T> struct LinearInterpolator
+{
+    static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+    {
+        x -= 0.5f;
+        y -= 0.5f;
+
+        int x1 = cvFloor(x);
+        int y1 = cvFloor(y);
+        int x2 = x1 + 1;
+        int y2 = y1 + 1;
+
+        float res = 0;
+
+        res += readVal<T>(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y));
+        res += readVal<T>(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y));
+        res += readVal<T>(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1));
+        res += readVal<T>(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1));
+
+        return cv::saturate_cast<T>(res);
+    }
+};
+
+template <typename T> struct CubicInterpolator
+{
+    static float getValue(float p[4], float x)
+    {
+        return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0])));
+    }
+
+    static float getValue(float p[4][4], float x, float y)
+    {
+        float arr[4];
+
+        arr[0] = getValue(p[0], x);
+        arr[1] = getValue(p[1], x);
+        arr[2] = getValue(p[2], x);
+        arr[3] = getValue(p[3], x);
+
+        return getValue(arr, y);
+    }
+
+    static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+    {
+        int ix = cvRound(x);
+        int iy = cvRound(y);
+
+        float vals[4][4] =
+        {
+            {readVal<T>(src, iy - 2, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 2, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 2, ix, c, border_type, borderVal), readVal<T>(src, iy - 2, ix + 1, c, border_type, borderVal)},
+            {readVal<T>(src, iy - 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 1, ix, c, border_type, borderVal), readVal<T>(src, iy - 1, ix + 1, c, border_type, borderVal)},
+            {readVal<T>(src, iy    , ix - 2, c, border_type, borderVal), readVal<T>(src, iy    , ix - 1, c, border_type, borderVal), readVal<T>(src, iy    , ix, c, border_type, borderVal), readVal<T>(src, iy    , ix + 1, c, border_type, borderVal)},
+            {readVal<T>(src, iy + 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy + 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy + 1, ix, c, border_type, borderVal), readVal<T>(src, iy + 1, ix + 1, c, border_type, borderVal)},
+        };
+
+        return cv::saturate_cast<T>(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0));
+    }
+};
+
+#endif // __OPENCV_TEST_INTERPOLATION_HPP__
diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp
new file mode 100644 (file)
index 0000000..0d9d967
--- /dev/null
@@ -0,0 +1,108 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+
+void print_info()
+{    
+    printf("\n");
+#if defined _WIN32
+#   if defined _WIN64
+        puts("OS: Windows 64");
+#   else
+        puts("OS: Windows 32");
+#   endif
+#elif defined linux
+#   if defined _LP64
+        puts("OS: Linux 64");
+#   else
+        puts("OS: Linux 32");
+#   endif
+#elif defined __APPLE__
+#   if defined _LP64
+        puts("OS: Apple 64");
+#   else
+        puts("OS: Apple 32");
+#   endif
+#endif
+
+}
+
+#if PERF_TEST_OCL
+int main(int argc, char** argv)
+{
+       
+       static std::vector<Info> ocl_info;
+       ocl::getDevice(ocl_info);
+
+    run_perf_test();
+    return 0;
+}
+#else
+int main(int argc, char** argv)
+{
+    TS::ptr()->init("ocl");
+    InitGoogleTest(&argc, argv);
+
+    print_info();
+
+    return RUN_ALL_TESTS();
+}
+#endif // PERF_TEST_OCL
+
+#else // HAVE_OPENC
+
+int main()
+{
+    printf("OpenCV was built without OpenCL support\n");
+    return 0;
+}
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/perf_test_ocl.cpp b/modules/ocl/perf/perf_test_ocl.cpp
new file mode 100644 (file)
index 0000000..67f20a3
--- /dev/null
@@ -0,0 +1,1191 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicore Ware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <ctime>
+
+#if PERF_TEST_OCL
+
+#ifdef HAVE_OPENCL
+
+#define SHOW_CPU false
+#define REPEAT   1000
+#define COUNT_U  0 // count the uploading execution time for ocl mat structures
+#define COUNT_D  0
+
+
+// the following macro section tests the target function (kernel) performance
+// upload is the code snippet for converting cv::mat to cv::ocl::oclMat
+// downloading is the code snippet for converting cv::ocl::oclMat back to cv::mat
+// change COUNT_U and COUNT_D to take downloading and uploading time into account
+#define P_TEST_FULL( upload, kernel_call, download ) \
+{ \
+    std::cout<< "\n" #kernel_call "\n----------------------"; \
+    {upload;} \
+    R_TEST( kernel_call, 15 ); \
+    double t = (double)cvGetTickCount(); \
+    R_T( { \
+            if( COUNT_U ) {upload;} \
+            kernel_call; \
+            if( COUNT_D ) {download;} \
+            } ); \
+    t = (double)cvGetTickCount() - t; \
+    std::cout << "runtime is  " << t/((double)cvGetTickFrequency()* 1000.) << "ms" << std::endl; \
+}
+
+
+#define R_T2( test ) \
+{ \
+    std::cout<< "\n" #test "\n----------------------"; \
+    R_TEST( test, 15 ) \
+    clock_t st = clock(); \
+    R_T( test ) \
+    std::cout<< clock() - st << "ms\n"; \
+}
+#define R_T( test ) \
+    R_TEST( test, REPEAT )
+#define R_TEST( test, repeat ) \
+    try{ \
+        for( int i = 0; i < repeat; i ++ ) { test; } \
+    } catch( ... ) { std::cout << "||||| Exception catched! |||||\n"; return; }
+
+#define FILTER_TEST_IMAGE "C:/Windows/Web/Wallpaper/Landscapes/img9.jpg"
+#define WARN_NRUN( name ) \
+    std::cout << "Warning: " #name " is not runnable!\n";
+
+
+void print_info();
+
+// performance base class
+struct PerfTest
+{
+    virtual void Run()   = 0;
+    protected:
+    virtual void SetUp() = 0;
+};
+///////////////////////////////////////
+// Arithm
+struct ArithmTestP : PerfTest
+{
+    int type;
+    cv::Scalar val;
+
+    cv::Size size;
+    cv::Mat mat1, mat2;
+    cv::Mat mask;
+    cv::Mat dst;
+    cv::ocl::oclMat oclRes, oclmat1, oclmat2;
+    cv::ocl::oclMat oclmask;
+    std::vector<cv::Mat> dstv;
+    protected:
+    ArithmTestP() : type( CV_8UC4 ) {}
+    virtual void SetUp()
+    {
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size( 3000, 3000 ); // big input image
+        mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+        mat2 = cvtest::randomMat(rng, size, type, 1, 255, false);
+        mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+        cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+        val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+
+        oclmat1 = cv::ocl::oclMat(mat1);
+        oclmat2 = cv::ocl::oclMat(mat2);
+        oclmask = cv::ocl::oclMat(mask);
+    }
+};
+
+struct AddArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::add(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct SubtractArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::subtract(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct MultiplyArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        clock_t start = clock();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::multiply(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst);
+                );             
+    }
+};
+
+struct DivideArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::divide(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct ExpP : ArithmTestP
+{
+    void Run()
+    {
+        type = CV_32FC1;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::exp(oclmat1, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct LogP : ArithmTestP
+{
+    void Run()
+    {
+        type = CV_32FC1;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::log(oclmat1, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct CompareP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32FC1;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::compare(oclmat1, oclmat2, oclRes, cv::CMP_EQ),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct FlipP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::flip(oclmat1, oclRes, 0),
+                oclRes.download(dst);
+                );
+    }
+    protected:
+    virtual void SetUp()
+    {
+        type = CV_8UC4;
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size(3000, 3000);
+        mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+        oclmat1 = cv::ocl::oclMat(mat1);
+    }
+};
+
+struct MagnitudeP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32F;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::magnitude(oclmat1, oclmat1, oclRes),
+                oclRes.download(dst);
+                );
+    }
+};
+
+struct LUTP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);ocllut  = cv::ocl::oclMat(lut),
+                cv::ocl::LUT(oclmat1, ocllut, oclRes),
+                oclRes.download(dst);
+                );
+    }
+    protected:
+    cv::Mat lut;
+    cv::ocl::oclMat ocllut;
+    virtual void SetUp()
+    {
+        type = CV_8UC1;
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size(3000, 3000);
+        mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+        lut = cvtest::randomMat(rng, cv::Size(256, 1), CV_8UC1, 100, 200, false);
+        oclmat1 = cv::ocl::oclMat(mat1);
+        ocllut  = cv::ocl::oclMat(lut);
+    }
+};
+
+struct MinMaxP : ArithmTestP
+{
+    double minVal_gold, minVal;
+    double maxVal_gold, maxVal;
+
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::minMax(oclmat1, &minVal, &maxVal, oclmat2),
+                {};
+                );
+    }
+
+    protected:
+    virtual void SetUp()
+    {
+        type = CV_64F;
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(3000, 3000);
+
+        mat1 = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+        oclmat1 = cv::ocl::oclMat(mat1);
+        oclmat2 = cv::ocl::oclMat(mat2);
+    }
+};
+
+struct MinMaxLocP : MinMaxP
+{
+    cv::Point minLoc_gold;
+    cv::Point maxLoc_gold;
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::minMaxLoc(oclmat1, &minVal, &maxVal, &minLoc_gold, &maxLoc_gold, oclmat2),
+                {}
+                );
+    }
+};
+
+struct CountNonZeroP : ArithmTestP
+{
+    int n;
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                n = cv::ocl::countNonZero(oclmat1),
+                {}
+                );
+    }
+    protected:
+    virtual void SetUp()
+    {
+        type = 6;
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size( 3000, 3000 );
+
+        cv::Mat matBase = cvtest::randomMat(rng, size, CV_8U, 0.0, 1.0, false);
+        matBase.convertTo(mat1, type);
+
+        oclmat1 = cv::ocl::oclMat(mat1);
+    }
+};
+
+struct SumP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        cv::Scalar n;
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                n = cv::ocl::sum(oclmat1),
+                {}
+                );
+    }
+};
+
+struct BitwiseP : ArithmTestP
+{
+    protected:
+        virtual void SetUp()
+        {
+            type = CV_8UC4;
+
+            cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+            size = cv::Size( 3000, 3000 );
+
+            mat1.create(size, type);
+            mat2.create(size, type);
+
+            for (int i = 0; i < mat1.rows; ++i)
+            {
+                cv::Mat row1(1, static_cast<int>(mat1.cols * mat1.elemSize()), CV_8U, (void*)mat1.ptr(i));
+                rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+
+                cv::Mat row2(1, static_cast<int>(mat2.cols * mat2.elemSize()), CV_8U, (void*)mat2.ptr(i));
+                rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+            }
+            oclmat1 = cv::ocl::oclMat(mat1);
+            oclmat2 = cv::ocl::oclMat(mat2);
+        }
+};
+
+struct BitwiseNotP : BitwiseP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::bitwise_not(oclmat1, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct BitwiseAndP : BitwiseP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::bitwise_and(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst)
+                );
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::bitwise_and(oclmat1, val, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct BitwiseXorP : BitwiseP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::bitwise_xor(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst)
+                );
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::bitwise_xor(oclmat1, val, oclRes),
+                oclRes.download(dst)
+                );
+
+    }
+};
+
+struct BitwiseOrP : BitwiseP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::bitwise_or(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst)
+                );
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::bitwise_or(oclmat1, val, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct TransposeP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::transpose(oclmat1, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct AbsdiffArrayP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32FC1;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::absdiff(oclmat1, oclmat2, oclRes),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct PhaseP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32F;
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+                cv::ocl::phase(oclmat1,oclmat2,oclRes,1),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct CartToPolarP : ArithmTestP
+{
+    cv::ocl::oclMat oclRes1;
+    virtual void Run()
+    {
+        type = CV_64FC4;
+        SetUp();
+        clock_t start = clock();
+        R_TEST(
+                cv::ocl::cartToPolar(oclmat1,oclmat2,oclRes, oclRes1, 1);
+                if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);}
+                , 5);
+        std::cout<< "ocl::CartToPolar -- " << clock() - start << "ms\n";
+    }
+};
+
+struct PolarToCartP : ArithmTestP
+{
+    cv::ocl::oclMat oclRes1;
+    virtual void Run()
+    {
+        type = CV_64FC4;
+        SetUp();
+        clock_t start = clock();
+        R_TEST(
+                cv::ocl::polarToCart(oclmat1,oclmat2,oclRes, oclRes1, 1);
+                if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);}
+                , 2);
+        std::cout<< "ocl::polarToCart -- " << clock() - start << "ms\n";
+    }
+};
+
+///////////////////////////////////////
+// split & merge
+struct SplitP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::split(oclmat1, dev_dst),
+                {                      
+                dstv.resize(dev_dst.size());
+                for (size_t i = 0; i < dev_dst.size(); ++i)
+                {
+                dev_dst[i].download(dstv[i]);
+                }
+                }
+                );
+    }
+    protected:
+    std::vector<cv::ocl::oclMat> dev_dst;
+    virtual void SetUp()
+    {
+        size = cv::Size( 3000, 3000 );
+
+        mat1.create(size, type);
+        mat1.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0));
+
+        oclmat1 = cv::ocl::oclMat(mat1);
+    }
+};
+
+struct MergeP : SplitP
+{
+    virtual void Run()
+    {
+        SetUp();
+        cv::ocl::split(oclmat1, dev_dst);
+        cv::split(mat1, dstv);
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                cv::ocl::merge(dev_dst, oclmat2),
+                oclmat2.download(dst)
+                );
+    }
+};
+
+struct SetToP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        static cv::Scalar s = cv::Scalar(1, 2, 3, 4);
+        P_TEST_FULL(
+                oclmat2 = cv::ocl::oclMat(mat2),
+                oclmat1.setTo( s, oclmat2 ),
+                oclmat1.download(dst);
+                );
+    }
+    protected:
+    virtual void SetUp()
+    {
+        type = CV_32FC4;
+        size = cv::Size(3000, 3000);
+
+        mat1.create(size, type);
+        oclmat1.create(size, type);
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0.0, 1.5, false);
+        oclmat2 = cv::ocl::oclMat(mat2);
+    }
+};
+
+struct CopyToP : SetToP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                oclmat1.copyTo( oclRes, oclmat2 ),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct ConvertToP : ArithmTestP
+{
+    virtual void Run()
+    {
+        type = CV_32FC1;;
+        SetUp();
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        const double a = rng.uniform(0.0, 1.0);
+        const double b = rng.uniform(-10.0, 10.0);
+
+        int type2 = CV_32FC4;
+
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat(mat1),
+                oclmat1.convertTo( oclRes, type2 /*, a, b */ ), // fails when scaling factors a and b are specified
+                oclRes.download(dst)
+                );
+    }
+};
+
+////////////////////////////////////////////
+// Filters
+
+struct FilterTestP : PerfTest
+{
+    protected:
+        int ksize;
+        int dx, dy;
+
+        cv::Mat img_rgba;
+        cv::Mat img_gray;
+
+        cv::ocl::oclMat ocl_img_rgba;
+        cv::ocl::oclMat ocl_img_gray;
+
+        cv::ocl::oclMat dev_dst_rgba;
+        cv::ocl::oclMat dev_dst_gray;
+
+        cv::Mat dst_rgba;
+        cv::Mat dst_gray;
+
+        cv::Mat kernel;
+
+        int bordertype;
+
+        virtual void SetUp()
+        {
+            bordertype = (int)cv::BORDER_DEFAULT;
+            ksize = 7;
+            dx = ksize/2; dy = ksize/2;
+
+            kernel = cv::Mat::ones(ksize, ksize, CV_8U);
+
+            cv::Mat img = readImage(FILTER_TEST_IMAGE);
+            ASSERT_FALSE(img.empty());
+
+            cv::cvtColor(img, img_rgba, CV_BGR2BGRA);
+            cv::cvtColor(img, img_gray, CV_BGR2GRAY);
+
+            ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+            ocl_img_gray = cv::ocl::oclMat(img_gray);
+        }
+};
+
+struct BlurP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::blur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype);
+                cv::ocl::blur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct SobelP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::Sobel(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, ksize, 1, 0, bordertype);
+                cv::ocl::Sobel(ocl_img_gray, dev_dst_gray, -1, dx, dy, ksize, 1, 0, bordertype);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct ScharrP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        dx = 0; dy = 1;
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::Scharr(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, 1, 0, bordertype);
+                cv::ocl::Scharr(ocl_img_gray, dev_dst_gray, -1, dx, dy, 1, 0, bordertype);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct GaussianBlurP : FilterTestP
+{
+    virtual void Run()
+    {
+        double sigma1 = 3, sigma2 = 3;
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::GaussianBlur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), sigma1, sigma2);
+                cv::ocl::GaussianBlur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), sigma1, sigma2);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct DilateP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::dilate(ocl_img_rgba, dev_dst_rgba, kernel);
+                cv::ocl::dilate(ocl_img_gray, dev_dst_gray, kernel);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct ErodeP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::erode(ocl_img_rgba, dev_dst_rgba, kernel);
+                cv::ocl::erode(ocl_img_gray, dev_dst_gray, kernel);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct MorphExP : FilterTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        cv::ocl::oclMat okernel;
+        P_TEST_FULL(
+                {
+                okernel      = cv::ocl::oclMat(kernel);
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::morphologyEx(ocl_img_rgba, dev_dst_rgba, 3, okernel);
+                cv::ocl::morphologyEx(ocl_img_gray, dev_dst_gray, 3, okernel);
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+struct LaplacianP : FilterTestP
+{
+    void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                {
+                ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::Laplacian(ocl_img_rgba, dev_dst_rgba, -1, 3 );
+                cv::ocl::Laplacian(ocl_img_gray, dev_dst_gray, -1, 3 );
+                },
+                {
+                dev_dst_rgba.download(dst_rgba);
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+////////////////////
+// histograms
+struct CalcHistP : PerfTest
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat = cv::ocl::oclMat( src ),
+                cv::ocl::calcHist(oclmat, oclRes),
+                oclRes.download(hist)
+                );
+    }
+    protected:
+    cv::Size size;
+    cv::Mat src, hist;
+
+    cv::ocl::oclMat oclmat;
+    cv::ocl::oclMat oclRes;
+
+    virtual void SetUp()
+    {
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        size = cv::Size(3000, 3000);
+        src = cvtest::randomMat(rng, size, CV_8UC1, 0, 255, false);
+        oclmat = cv::ocl::oclMat( src );
+    }
+};
+
+struct EqualizeHistP : CalcHistP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat = cv::ocl::oclMat( src ),
+                cv::ocl::equalizeHist(oclmat, oclRes),
+                oclRes.download(hist)
+                );
+    }
+};
+
+struct ThresholdP : CalcHistP
+{
+    virtual void Run()
+    {
+        SetUp();
+        int threshOp = (int)cv::THRESH_TOZERO_INV;;
+        double maxVal = 200;
+        double thresh = 125;
+
+        clock_t start = clock();
+
+        P_TEST_FULL(
+                oclmat = cv::ocl::oclMat( src ),
+                cv::ocl::threshold(oclmat, oclRes, thresh, maxVal, threshOp ),
+                oclRes.download(hist)
+                );
+    }
+};
+
+struct ResizeP : ArithmTestP
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat( mat1 ),
+                cv::ocl::resize(oclmat1, oclRes, cv::Size(), 2.0, 2.0),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct CvtColorP : PerfTest
+{
+    virtual void Run()
+    {
+        SetUp();
+        P_TEST_FULL(
+                oclmat = cv::ocl::oclMat( img ),
+                cv::ocl::cvtColor(oclmat, ocldst, cvtcode),
+                ocldst.download(dst)
+                );
+    }
+    protected:
+    int type;
+    int cvtcode;
+
+    cv::Mat img, dst;
+    cv::ocl::oclMat oclmat, ocldst;
+    virtual void SetUp()
+    {
+        type = CV_8U;
+        cvtcode = CV_BGR2GRAY;
+        cv::Mat imgBase = readImage(FILTER_TEST_IMAGE);
+        ASSERT_FALSE(imgBase.empty());
+
+        imgBase.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+        oclmat = cv::ocl::oclMat( img );
+    };
+};
+
+
+struct WarpAffineP : ArithmTestP
+{
+    void Run()
+    {
+        SetUp();
+        const double aplha = CV_PI / 4;
+        double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2},
+            {std::sin(aplha),  std::cos(aplha), 0}};
+        cv::Mat M(2, 3, CV_64F, (void*) mat);
+
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat( mat1 ),
+                cv::ocl::warpAffine( oclmat1, oclRes, M, cv::Size(1500, 1500) ),
+                oclRes.download(dst)
+                );
+    }
+};
+
+struct WarpPerspectiveP : ArithmTestP
+{
+    void Run()
+    {
+        SetUp();
+        const double aplha = CV_PI / 4;
+        double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2},
+            {std::sin(aplha),  std::cos(aplha), 0},
+            {0.0,              0.0,             1.0}};
+        cv::Mat M(3, 3, CV_64F, (void*) mat);
+
+        P_TEST_FULL(
+                oclmat1 = cv::ocl::oclMat( mat1 ),
+                cv::ocl::warpPerspective( oclmat1, oclRes, M, cv::Size(1500, 1500) ),
+                oclRes.download(dst)
+                );
+    }
+};
+
+
+struct CornerHarrisP : FilterTestP
+{
+    void Run()
+    {
+        SetUp();
+        bordertype = 2;
+        P_TEST_FULL(
+                {
+                ocl_img_gray = cv::ocl::oclMat(img_gray);
+                },
+                {
+                cv::ocl::cornerHarris(ocl_img_gray, dev_dst_gray, 3, ksize, 0.5, bordertype );
+                },
+                {
+                dev_dst_gray.download(dst_gray);
+                });
+    }
+};
+
+void test()
+{
+    clock_t start = clock();
+    std::cout << ">>>>>>>> Performance test started <<<<<<<<\n";
+    /*
+       {
+       AddArrayP AddArrayP;
+       AddArrayP.Run();
+       SubtractArrayP subarray;
+       subarray.Run();
+       MultiplyArrayP MultiplyArrayP;
+       MultiplyArrayP.Run();
+       DivideArrayP DivideArrayP;
+       DivideArrayP.Run();
+       }
+       std::cout.flush();
+       {
+       CompareP comp;
+       comp.Run();
+       MagnitudeP magnitude;
+       magnitude.Run();
+       LUTP lut;
+       lut.Run();
+       FlipP FlipP;
+       FlipP.Run();
+       MinMaxP minmax;
+       minmax.Run();
+       MinMaxLocP minmaxloc;
+       minmaxloc.Run();
+       CountNonZeroP cnz;
+       cnz.Run();
+       SumP sum;
+       sum.Run();
+       }*/
+      /* std::cout.flush();
+       {
+       BitwiseNotP bn;
+       bn.Run();
+       BitwiseOrP bo;
+       bo.Run();
+       BitwiseAndP ba;
+       ba.Run();
+       BitwiseXorP bx;
+       bx.Run();
+       }*/
+       
+    std::cout.flush();
+    {
+        //   TransposeP transpose;
+        //  transpose.Run();
+        // AbsdiffArrayP absdiff;
+        // absdiff.Run();
+        // SplitP split;
+        // split.Run();
+       // MergeP merge;
+       // merge.Run();
+        /*
+           SetToP setto;
+           setto.Run();
+           CopyToP copyto;
+           copyto.Run();
+           ConvertToP convertto;
+           convertto.Run();
+           */
+    }
+    /*
+       std::cout.flush();
+       {
+       BlurP blur;
+       blur.Run();
+       SobelP sobel;
+       sobel.Run();
+       ScharrP scharr;
+       scharr.Run();
+       GaussianBlurP gblur;
+       gblur.Run();
+       DilateP dilate;
+       dilate.Run();
+       ErodeP erode;
+       erode.Run();
+       }
+       std::cout.flush();
+       {
+       MorphExP morphex;
+       morphex.Run();
+       CalcHistP calchist;
+       calchist.Run();
+       EqualizeHistP eqhist;
+       eqhist.Run();
+       ThresholdP threshold;
+       threshold.Run();
+       ResizeP resize;
+       resize.Run();
+       CvtColorP cvtcolor;
+       cvtcolor.Run();
+       }
+
+       {
+       LogP log;
+       log.Run();
+       ExpP exp;
+       exp.Run();
+       }
+
+       std::cout.flush();
+       {
+    //PhaseP phase;
+    //phase.Run();
+    }
+    std::cout.flush();
+    {
+    CartToPolarP ctop;
+    ctop.Run();
+    }
+    std::cout.flush();
+    {
+    PolarToCartP ptoc;
+    ptoc.Run();
+    }
+    {
+    WarpAffineP warpA;
+    warpA.Run();
+    WarpPerspectiveP warpP;
+    warpP.Run();       
+    }
+
+    {
+    CornerHarrisP ch;
+    ch.Run();
+    }
+
+    {
+    LaplacianP laplacian;
+    laplacian.Run();
+    }
+
+
+    */
+        std::cout << ">>>>>>>> Performance test ended <<<<<<<<\ntotal - " << clock() - start << "ms\n";
+    std::cout.flush();
+}
+
+void  run_perf_test()
+{
+    print_info();
+    cvtest::TS::ptr()->init("ocl");
+    test();
+}
+
+#endif // WITH_OPENCL
+
+#endif // PREF_TEST_OCL
diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp
new file mode 100644 (file)
index 0000000..f505dac
--- /dev/null
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+
+       
\ No newline at end of file
diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp
new file mode 100644 (file)
index 0000000..cad26fc
--- /dev/null
@@ -0,0 +1,72 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include <cmath>
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <limits>
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <cstdarg>
+#include "cvconfig.h"
+#include "opencv2/core/core.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/video/video.hpp"
+#include "opencv2/ts/ts.hpp"
+#include "opencv2/ts/ts_perf.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/nonfree/nonfree.hpp"
+
+#include "utility.hpp"
+#include "interpolation.hpp"
+//#include "add_test_info.h"
+//#define  PERF_TEST_OCL 1
+
+#endif
+
diff --git a/modules/ocl/perf/test_arithm.cpp b/modules/ocl/perf/test_arithm.cpp
new file mode 100644 (file)
index 0000000..0e6cf6e
--- /dev/null
@@ -0,0 +1,3658 @@
+///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Niko Li, newlife20080214@gmail.com
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//    Shengen Yan, yanshengen@gmail.com
+//    Jiang Liyuan,jlyuan001.good@163.com
+//    Rock Li, Rock.Li@amd.com
+//    Zailong Wu, bullet@yeah.net
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+#include <iomanip>
+
+#ifdef HAVE_OPENCL
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
+{
+       int type;
+       cv::Scalar val;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat mat2;
+       cv::Mat mask;
+       cv::Mat dst;
+       cv::Mat dst1; //bak, for two outputs
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int src2x;
+       int src2y;
+       int dstx;
+       int dsty;
+       int maskx;
+       int masky;
+
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat mat2_roi;
+       cv::Mat mask_roi;
+       cv::Mat dst_roi;
+       cv::Mat dst1_roi; //bak
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+       cv::ocl::oclMat gdst1_whole; //bak
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gmat2;
+       cv::ocl::oclMat gdst;
+       cv::ocl::oclMat gdst1;   //bak
+       cv::ocl::oclMat gmask;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+               mat2 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               dst1  = randomMat(rng, size, type, 5, 16, false);
+               mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+               cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+               val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums>0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat1.cols-1; 
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src2x   = 1;
+                       src1y   = 1;
+                       src2y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+                       maskx    =1;
+                       masky   =1;
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src2x = 0;
+                       src1y = 0;
+                       src2y = 0;
+                       dstx = 0;
+                       dsty = 0;
+                       maskx    =0;
+                       masky   =0;
+               };
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               //mat2_roi = mat2(Rect(src2x,src2y,256,1));
+               mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+               mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+               dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+               //gdst_whole = dst;
+               //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+               //gdst1_whole = dst1;
+               //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+               //gmat1 = mat1_roi;
+               //gmat2 = mat2_roi;
+               //gmask = mask_roi; 
+       }
+
+};
+////////////////////////////////lut/////////////////////////////////////////////////
+
+struct Lut : ArithmTestBase {};
+
+TEST_P(Lut, Mat)
+{       
+
+       cv::Mat mat2(3, 512, CV_8UC1);
+       cv::RNG& rng = TS::ptr()->get_rng();
+       rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256));
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);  
+                       mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+                       mat2_roi = mat2(Rect(src2x,src2y,256,1));
+
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::LUT(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::LUT(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               // s=GetParam();
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               //  src2x = rng.uniform( 0,mat2.cols - 256);
+               // src2y = rng.uniform (0,mat2.rows - 1);
+
+               // cv::Mat mat2_roi = mat2(Rect(src2x,src2y,256,1));
+               mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+               mat2_roi = mat2(Rect(src2x,src2y,256,1));
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               //   gdst1_whole = dst1;
+               //     gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               //     gmask = mask_roi; 
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::LUT(gmat1, gmat2, gdst);
+       };
+#endif
+
+}
+
+
+
+////////////////////////////////exp/////////////////////////////////////////////////
+
+struct Exp : ArithmTestBase {};
+
+TEST_P(Exp, Mat) 
+{  
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::exp(mat1_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+                       gmat1 = mat1_roi;
+
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::exp(gmat1, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download(cpu_dst);
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+                       //EXPECT_MAT_NEAR(dst, cpu_dst, 0,"");
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::exp(gmat1, gdst);
+       };
+#endif
+
+}
+
+
+////////////////////////////////log/////////////////////////////////////////////////
+
+struct Log : ArithmTestBase {};
+
+TEST_P(Log, Mat) 
+{  
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::log(mat1_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::log(gmat1, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::log(gmat1, gdst);
+       };
+#endif
+
+}
+
+
+
+
+////////////////////////////////add/////////////////////////////////////////////////
+
+struct Add : ArithmTestBase {};
+
+TEST_P(Add, Mat) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::add(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::add(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::add(gmat1, gmat2, gdst);
+       };
+#endif
+}
+
+TEST_P(Add, Mat_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gmask = mask_roi; 
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::add(gmat1, gmat2, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               gmask = mask_roi; 
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::add(gmat1, gmat2, gdst, gmask);
+       };
+#endif
+}
+TEST_P(Add, Scalar) 
+{  
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::add(mat1_roi, val, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::add(gmat1, val, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::add(gmat1, val, gdst);
+       };
+#endif
+}
+
+TEST_P(Add, Scalar_Mask) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::add(mat1_roi, val, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+                       gmat1 = mat1_roi;
+                       gmask = mask_roi; 
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::add(gmat1, val, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmask = mask_roi; 
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::add(gmat1, val, gdst, gmask);
+       };
+#endif
+}
+
+
+////////////////////////////////sub/////////////////////////////////////////////////
+struct Sub : ArithmTestBase {};
+
+TEST_P(Sub, Mat) 
+{ 
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::subtract(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::subtract(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::subtract(gmat1, gmat2, gdst);
+       };
+#endif
+}
+
+TEST_P(Sub, Mat_Mask) 
+{  
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               gmask = mask_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
+       };
+#endif
+}
+TEST_P(Sub, Scalar) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::subtract(mat1_roi, val, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::subtract(gmat1, val, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::subtract(gmat1, val, gdst);
+       };
+#endif
+}
+
+TEST_P(Sub, Scalar_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::subtract(mat1_roi, val, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::subtract(gmat1, val, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmask = mask_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::subtract(gmat1, val, gdst, gmask);
+       };
+#endif
+}
+
+
+////////////////////////////////Mul/////////////////////////////////////////////////
+struct Mul : ArithmTestBase {};
+
+TEST_P(Mul, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::multiply(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::multiply(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::multiply(gmat1, gmat2, gdst);
+       };
+#endif
+}
+
+TEST_P(Mul, Mat_Scalar) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       cv::RNG& rng = TS::ptr()->get_rng();
+                       double s = rng.uniform(-10.0, 10.0);    
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::multiply(mat1_roi, mat2_roi, dst_roi, s);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::multiply(gmat1, gmat2, gdst, s);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               cv::RNG& rng = TS::ptr()->get_rng();
+               double s = rng.uniform(-10.0, 10.0);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::multiply(gmat1, gmat2, gdst, s);
+       };
+#endif
+}
+
+
+struct Div : ArithmTestBase {};
+
+TEST_P(Div, Mat) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::divide(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::divide(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::divide(gmat1, gmat2, gdst);
+       };
+#endif
+}
+
+TEST_P(Div, Mat_Scalar) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       cv::RNG& rng = TS::ptr()->get_rng();
+                       double s = rng.uniform(-10.0, 10.0);  
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::divide(mat1_roi, mat2_roi, dst_roi, s);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::divide(gmat1, gmat2, gdst, s);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               cv::RNG& rng = TS::ptr()->get_rng();
+               double s = rng.uniform(-10.0, 10.0);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::divide(gmat1, gmat2, gdst, s);
+       };
+#endif
+}
+
+
+struct Absdiff : ArithmTestBase {};
+
+TEST_P(Absdiff, Mat) 
+{ 
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::absdiff(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::absdiff(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::absdiff(gmat1, gmat2, gdst);
+       };
+#endif
+}
+
+TEST_P(Absdiff, Mat_Scalar) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::absdiff(mat1_roi, val, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::absdiff(gmat1, val, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::absdiff(gmat1, val, gdst);
+       };
+#endif
+}
+
+
+
+struct CartToPolar : ArithmTestBase {};
+
+TEST_P(CartToPolar, angleInDegree) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gdst1_whole = dst1;
+                       gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       cv::Mat cpu_dst1;
+                       gdst1_whole.download(cpu_dst1);
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gdst1_whole = dst1;
+               gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
+       };
+#endif
+}
+
+TEST_P(CartToPolar, angleInRadians) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+                       gdst1_whole = dst1;
+                       gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       cv::Mat cpu_dst1;
+                       gdst1_whole.download(cpu_dst1);
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gdst1_whole = dst1;
+               gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
+       };
+#endif
+}
+
+
+struct PolarToCart : ArithmTestBase {};
+
+TEST_P(PolarToCart, angleInDegree) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gdst1_whole = dst1;
+                       gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       cv::Mat cpu_dst1;
+                       gdst1_whole.download(cpu_dst1);
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gdst1_whole = dst1;
+               gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
+       };
+#endif
+}
+
+TEST_P(PolarToCart, angleInRadians) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gdst1_whole = dst1;
+                       gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       cv::Mat cpu_dst1;
+                       gdst1_whole.download(cpu_dst1);
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               gdst1_whole = dst1;
+               gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
+       };
+#endif
+}
+
+
+
+struct Magnitude : ArithmTestBase {};
+
+TEST_P(Magnitude, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::magnitude(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::magnitude(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::magnitude(gmat1, gmat2, gdst);
+       };
+#endif
+}
+
+struct Transpose : ArithmTestBase {};
+
+TEST_P(Transpose, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::transpose(mat1_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::transpose(gmat1, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::transpose(gmat1, gdst);
+       };
+#endif
+}
+
+
+struct Flip : ArithmTestBase {};
+
+TEST_P(Flip, X) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::flip(mat1_roi, dst_roi, 0);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::flip(gmat1, gdst, 0);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::flip(gmat1, gdst, 0);
+       };
+#endif
+}
+
+TEST_P(Flip, Y) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::flip(mat1_roi, dst_roi, 1);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::flip(gmat1, gdst, 1);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::flip(gmat1, gdst, 1);
+       };
+#endif
+}
+
+TEST_P(Flip, BOTH) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::flip(mat1_roi, dst_roi, -1);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::flip(gmat1, gdst, -1);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::flip(gmat1, gdst, -1);
+       };
+#endif
+}
+
+
+
+struct MinMax : ArithmTestBase {};
+
+TEST_P(MinMax, MAT) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       double minVal, maxVal;
+                       cv::Point minLoc, maxLoc;
+                       t0 = (double)cvGetTickCount();//cpu start
+                       if (mat1.depth() != CV_8S)
+                       {
+                               cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
+                       }
+                       else 
+                       {
+                               minVal = std::numeric_limits<double>::max();
+                               maxVal = -std::numeric_limits<double>::max();
+                               for (int i = 0; i < mat1_roi.rows; ++i)
+                                       for (int j = 0; j < mat1_roi.cols; ++j)
+                                       {
+                                               signed char val = mat1_roi.at<signed char>(i, j);
+                                               if (val < minVal) minVal = val;
+                                               if (val > maxVal) maxVal = val;
+                                       }
+                       }
+
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gmat1 = mat1_roi;
+                       double minVal_, maxVal_;  
+                       t2=(double)cvGetTickCount();//kernel        
+                       cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat1 = mat1_roi;
+               double minVal_, maxVal_;  
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
+       };
+#endif
+}
+
+TEST_P(MinMax, MASK) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       double minVal, maxVal;
+                       cv::Point minLoc, maxLoc;
+                       t0 = (double)cvGetTickCount();//cpu start
+                       if (mat1.depth() != CV_8S)
+                       {
+                               cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi);
+                       }
+                       else 
+                       {
+                               minVal = std::numeric_limits<double>::max();
+                               maxVal = -std::numeric_limits<double>::max();
+                               for (int i = 0; i < mat1_roi.rows; ++i)
+                                       for (int j = 0; j < mat1_roi.cols; ++j)
+                                       {
+                                               signed char val = mat1_roi.at<signed char>(i, j);
+                                               unsigned char m = mask_roi.at<unsigned char>(i, j);
+                                               if (val < minVal && m) minVal = val;
+                                               if (val > maxVal && m) maxVal = val;
+                                       }
+                       }
+
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gmat1 = mat1_roi;
+                       gmask = mask_roi;
+                       double minVal_, maxVal_;  
+                       t2=(double)cvGetTickCount();//kernel        
+                       cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat1 = mat1_roi;
+               gmask = mask_roi;
+               double minVal_, maxVal_;  
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask);
+       };
+#endif
+}
+
+
+struct MinMaxLoc : ArithmTestBase {};
+
+TEST_P(MinMaxLoc, MAT) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       double minVal, maxVal;
+                       cv::Point minLoc, maxLoc;
+                       int depth = mat1.depth();
+                       t0 = (double)cvGetTickCount();//cpu start
+                       if (depth != CV_8S)
+                       {
+                               cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
+                       }
+                       else 
+                       {
+                               minVal = std::numeric_limits<double>::max();
+                               maxVal = -std::numeric_limits<double>::max();
+                               for (int i = 0; i < mat1_roi.rows; ++i)
+                                       for (int j = 0; j < mat1_roi.cols; ++j)
+                                       {
+                                               signed char val = mat1_roi.at<signed char>(i, j);
+                                               if (val < minVal) {
+                                                       minVal = val;
+                                                       minLoc.x = j;
+                                                       minLoc.y = i;
+                                               }
+                                               if (val > maxVal) {
+                                                       maxVal = val;
+                                                       maxLoc.x = j;
+                                                       maxLoc.y = i;
+                                               } 
+                                       }
+                       }
+
+
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gmat1 = mat1_roi;
+                       double minVal_, maxVal_;  
+                       cv::Point minLoc_, maxLoc_;    
+                       t2=(double)cvGetTickCount();//kernel                    
+                       cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat());
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat1 = mat1_roi;
+               double minVal_, maxVal_;  
+               cv::Point minLoc_, maxLoc_;    
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat());
+       };
+#endif
+
+}
+
+
+TEST_P(MinMaxLoc, MASK) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       double minVal, maxVal;
+                       cv::Point minLoc, maxLoc;
+                       int depth = mat1.depth();
+                       t0 = (double)cvGetTickCount();//cpu start
+                       if (depth != CV_8S)
+                       {
+                               cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi);
+                       }
+                       else 
+                       {
+                               minVal = std::numeric_limits<double>::max();
+                               maxVal = -std::numeric_limits<double>::max();
+                               for (int i = 0; i < mat1_roi.rows; ++i)
+                                       for (int j = 0; j < mat1_roi.cols; ++j)
+                                       {
+                                               signed char val = mat1_roi.at<signed char>(i, j);
+                                               unsigned char m = mask_roi.at<unsigned char>(i ,j);
+                                               if (val < minVal && m) {
+                                                       minVal = val;
+                                                       minLoc.x = j;
+                                                       minLoc.y = i;
+                                               }
+                                               if (val > maxVal && m) {
+                                                       maxVal = val;
+                                                       maxLoc.x = j;
+                                                       maxLoc.y = i;
+                                               } 
+                                       }
+                       }
+
+
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gmat1 = mat1_roi;
+                       gmask = mask_roi;
+                       double minVal_, maxVal_;  
+                       cv::Point minLoc_, maxLoc_;    
+                       t2=(double)cvGetTickCount();//kernel                    
+                       cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat1 = mat1_roi;
+               gmask = mask_roi;
+               double minVal_, maxVal_;  
+               cv::Point minLoc_, maxLoc_;    
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask);
+       };
+#endif
+}
+
+
+struct Sum : ArithmTestBase {};
+
+TEST_P(Sum, MAT) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       Scalar cpures =cv::sum(mat1_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       Scalar gpures=cv::ocl::sum(gmat1);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               Scalar gpures=cv::ocl::sum(gmat1);
+       };
+#endif
+}
+
+//TEST_P(Sum, MASK) 
+//{    
+//    for(int j=0; j<LOOP_TIMES; j++)
+//    {
+//       
+//    }
+//}
+
+struct CountNonZero : ArithmTestBase {};
+
+TEST_P(CountNonZero, MAT) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       int cpures =cv::countNonZero(mat1_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       int gpures=cv::ocl::countNonZero(gmat1);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               int gpures=cv::ocl::countNonZero(gmat1);
+       };
+#endif
+
+}
+
+
+
+////////////////////////////////phase/////////////////////////////////////////////////
+struct Phase : ArithmTestBase {};
+
+TEST_P(Phase, Mat)
+{
+       if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F)
+       {
+               cout<<"\tUnsupported type\t\n";
+       }
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::phase(mat1_roi,mat2_roi,dst_roi,0);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::phase(gmat1,gmat2,gdst,0);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::phase(gmat1,gmat2,gdst,0);
+       };
+#endif
+
+}
+
+
+////////////////////////////////bitwise_and/////////////////////////////////////////////////
+struct Bitwise_and : ArithmTestBase {};
+
+TEST_P(Bitwise_and, Mat) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_and(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_and(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_and(gmat1, gmat2, gdst);
+       };
+#endif
+
+}
+
+TEST_P(Bitwise_and, Mat_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_and(mat1_roi, mat2_roi, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               gmask = mask_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
+       };
+#endif
+}
+
+TEST_P(Bitwise_and, Scalar) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_and(mat1_roi, val, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_and(gmat1, val, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_and(gmat1, val, gdst);
+       };
+#endif
+}
+
+TEST_P(Bitwise_and, Scalar_Mask) 
+{   
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_and(mat1_roi, val, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmask = mask_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
+       };
+#endif
+}
+
+
+
+////////////////////////////////bitwise_or/////////////////////////////////////////////////
+
+struct Bitwise_or : ArithmTestBase {};
+
+TEST_P(Bitwise_or, Mat) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_or(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_or(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_or(gmat1, gmat2, gdst);
+       };
+#endif
+}
+
+TEST_P(Bitwise_or, Mat_Mask) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_or(mat1_roi, mat2_roi, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               gmask = mask_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
+       };
+#endif
+}
+TEST_P(Bitwise_or, Scalar) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_or(mat1_roi, val, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_or(gmat1, val, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_or(gmat1, val, gdst);
+       };
+#endif
+}
+
+TEST_P(Bitwise_or, Scalar_Mask) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_or(mat1_roi, val, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmask = mask_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
+       };
+#endif
+}
+
+
+////////////////////////////////bitwise_xor/////////////////////////////////////////////////
+
+struct Bitwise_xor : ArithmTestBase {};
+
+TEST_P(Bitwise_xor, Mat) 
+{   
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
+       };
+#endif
+}
+
+TEST_P(Bitwise_xor, Mat_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               gmask = mask_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
+       };
+#endif
+}
+
+TEST_P(Bitwise_xor, Scalar) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_xor(mat1_roi, val, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_xor(gmat1, val, gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_xor(gmat1, val, gdst);
+       };
+#endif
+}
+
+TEST_P(Bitwise_xor, Scalar_Mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_xor(mat1_roi, val, dst_roi, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               gmask = mask_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
+       };
+#endif
+}
+
+
+////////////////////////////////bitwise_not/////////////////////////////////////////////////
+
+struct Bitwise_not : ArithmTestBase {};
+
+TEST_P(Bitwise_not, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::bitwise_not(mat1_roi,dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::bitwise_not(gmat1,gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::bitwise_not(gmat1,gdst);
+       };
+#endif
+}
+
+////////////////////////////////compare/////////////////////////////////////////////////
+PARAM_TEST_CASE ( CompareTestBase, MatType, bool)
+{
+       int type;
+       cv::Scalar val;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat mat2;
+       cv::Mat mask;
+       cv::Mat dst;
+       cv::Mat dst1; //bak, for two outputs
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int src2x;
+       int src2y;
+       int dstx;
+       int dsty;
+       int maskx;
+       int masky;
+
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat mat2_roi;
+       cv::Mat mask_roi;
+       cv::Mat dst_roi;
+       cv::Mat dst1_roi; //bak
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+       cv::ocl::oclMat gdst1_whole; //bak
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gmat2;
+       cv::ocl::oclMat gdst;
+       cv::ocl::oclMat gdst1;   //bak
+       cv::ocl::oclMat gmask;
+
+       virtual void SetUp()
+       {
+               //type = GET_PARAM(0);
+               type = CV_8UC1;
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+               mat2 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               dst1  = randomMat(rng, size, type, 5, 16, false);
+               mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+               cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+               val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums>0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat1.cols-1; 
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src2x   = 1;
+                       src1y   = 1;
+                       src2y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+                       maskx    =1;
+                       masky   =1;
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src2x = 0;
+                       src1y = 0;
+                       src2y = 0;
+                       dstx = 0;
+                       dsty = 0;
+                       maskx    =0;
+                       masky   =0;
+               };
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               //mat2_roi = mat2(Rect(src2x,src2y,256,1));
+               mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+               mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+               dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+               //gdst_whole = dst;
+               //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+               //gdst1_whole = dst1;
+               //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+               //gmat1 = mat1_roi;
+               //gmat2 = mat2_roi;
+               //gmask = mask_roi; 
+       }
+
+};
+struct Compare : CompareTestBase {};
+
+TEST_P(Compare, Mat) 
+{   
+       if(mat1.type()==CV_8SC1)
+       {
+               cout << "\tUnsupported type\t\n";
+       }       
+
+       int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE};
+       //const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
+       int cmp_num = sizeof(cmp_codes) / sizeof(int);
+       for (int i = 0; i < cmp_num; ++i)
+       {
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+               double totalcputick=0;
+               double totalgputick=0;
+               double totalgputick_kernel=0;
+               double t0=0;
+               double t1=0;
+               double t2=0;    
+               for(int k=1;k<2;k++){
+                       totalcputick=0;
+                       totalgputick=0;
+                       totalgputick_kernel=0;
+                       for(int j = 0; j < LOOP_TIMES+1; j ++)
+                       {
+                               Has_roi(k);       
+
+                               t0 = (double)cvGetTickCount();//cpu start
+                               cv::compare(mat1_roi,mat2_roi,dst_roi,cmp_codes[i]);
+                               t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                               t1 = (double)cvGetTickCount();//gpu start1              
+                               gdst_whole = dst;
+                               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                               gmat1 = mat1_roi;
+                               gmat2 = mat2_roi;
+                               t2=(double)cvGetTickCount();//kernel
+                               cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]);
+                               t2 = (double)cvGetTickCount() - t2;//kernel
+                               cv::Mat cpu_dst;
+                               gdst_whole.download (cpu_dst);//download
+                               t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                               if(j == 0)
+                                       continue;
+                               totalgputick=t1+totalgputick;
+                               totalcputick=t0+totalcputick;   
+                               totalgputick_kernel=t2+totalgputick_kernel;     
+
+                       }
+                       if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+                       cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                       cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                       cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               }
+#else
+               for(int j = 0; j < 2; j ++)
+               {
+                       Has_roi(j);
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+                       cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]);
+               };
+#endif
+       }
+
+}
+
+struct Pow : ArithmTestBase {};
+
+TEST_P(Pow, Mat)
+{
+       if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F)
+       {
+               cout<<"\tUnsupported type\t\n";
+       }
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       double p=4.5;
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::pow(mat1_roi,p,dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::pow(gmat1,p,gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               double p=4.5;
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::pow(gmat1,p,gdst);
+       };
+#endif
+}
+
+
+struct MagnitudeSqr : ArithmTestBase {};
+
+TEST_P(MagnitudeSqr, Mat) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       for(int i = 0;i < mat1.rows;++i)
+                               for(int j = 0;j < mat1.cols;++j)
+                               {
+                                       float val1 = mat1.at<float>(i,j);
+                                       float val2 = mat2.at<float>(i,j);
+
+                                       ((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2;
+
+                               }
+                               t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                               t1 = (double)cvGetTickCount();//gpu start1              
+                               cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+                               t2=(double)cvGetTickCount();//kernel
+                               cv::ocl::magnitudeSqr(clmat1,clmat2, cldst);
+                               t2 = (double)cvGetTickCount() - t2;//kernel
+                               cv::Mat cpu_dst;
+                               cldst.download(cpu_dst);//download
+                               t1 = (double)cvGetTickCount() - t1;//gpu end1   
+                               if(j == 0)
+                                       continue;
+                               totalgputick=t1+totalgputick;
+                               totalcputick=t0+totalcputick;   
+                               totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::magnitudeSqr(clmat1,clmat2, cldst);
+       };
+#endif
+
+}
+
+
+struct AddWeighted : ArithmTestBase {};
+
+TEST_P(AddWeighted, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+    double totalcputick=0;
+    double totalgputick=0;
+    double totalgputick_kernel=0;
+    double t0=0;
+    double t1=0;
+    double t2=0;
+    for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+    for(int j = 0; j < LOOP_TIMES+1; j ++)
+    {
+        Has_roi(k);
+        double alpha=2.0,beta=1.0,gama=3.0;      
+
+        t0 = (double)cvGetTickCount();//cpu start
+        cv::addWeighted(mat1_roi,alpha,mat2_roi,beta,gama,dst_roi);
+        t0 = (double)cvGetTickCount() - t0;//cpu end
+
+        t1 = (double)cvGetTickCount();//gpu start1
+
+            gdst_whole = dst;
+            gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+            gmat1 = mat1_roi;
+            gmat2 = mat2_roi;
+
+        t2=(double)cvGetTickCount();//kernel
+        cv::ocl::addWeighted(gmat1,alpha,gmat2,beta,gama, gdst);
+        t2 = (double)cvGetTickCount() - t2;//kernel
+        cv::Mat cpu_dst;
+        gdst_whole.download(cpu_dst);
+        t1 = (double)cvGetTickCount() - t1;//gpu end1
+        if(j == 0)
+            continue;
+        totalgputick=t1+totalgputick;
+        totalcputick=t0+totalcputick;  
+        totalgputick_kernel=t2+totalgputick_kernel;    
+
+    }
+
+        if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+    cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+    cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+    cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+}
+#else
+    for(int j = 0; j < 2; j ++)
+       {
+          Has_roi(j);
+    double alpha=2.0,beta=1.0,gama=3.0;   
+        gdst_whole = dst;
+        gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+        gmat1 = mat1_roi;
+        gmat2 = mat2_roi;
+        if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+        cv::ocl::addWeighted(gmat1,alpha, gmat2,beta,gama, gdst);
+   // double alpha=2.0,beta=1.0,gama=3.0;   
+   // cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+   // if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+   // cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+       };
+#endif
+
+}
+/*
+struct AddWeighted : ArithmTestBase {};
+
+TEST_P(AddWeighted, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int j = 0; j < LOOP_TIMES+1; j ++)
+       {
+               double alpha=2.0,beta=1.0,gama=3.0;      
+
+               t0 = (double)cvGetTickCount();//cpu start
+               cv::addWeighted(mat1,alpha,mat2,beta,gama,dst);
+               t0 = (double)cvGetTickCount() - t0;//cpu end
+
+               t1 = (double)cvGetTickCount();//gpu start1              
+               cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+
+               t2=(double)cvGetTickCount();//kernel
+               cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+               t2 = (double)cvGetTickCount() - t2;//kernel
+               cv::Mat cpu_dst;
+               cldst.download(cpu_dst);
+               t1 = (double)cvGetTickCount() - t1;//gpu end1
+               if(j == 0)
+                       continue;
+               totalgputick=t1+totalgputick;
+               totalcputick=t0+totalcputick;   
+               totalgputick_kernel=t2+totalgputick_kernel;     
+
+       }
+       cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+#else
+       //for(int j = 0; j < 2; j ++)
+       //      {
+       double alpha=2.0,beta=1.0,gama=3.0;   
+       cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+       //if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+       cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+       //      };
+#endif
+
+}
+
+*/
+//********test****************
+
+INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(
+                                               Values(CV_8UC1, CV_8UC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(
+                                               Values(CV_32FC1, CV_64FC1),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(
+                                               Values(CV_32FC1, CV_64FC1),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1,  CV_32FC4),
+                                               Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
+                                               Values(CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(
+                                               Values(CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(
+                                               Values(CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
+                                               Values(CV_8UC1, CV_32FC1),
+                                               Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(
+                                               Values(CV_8UC1, CV_32FC1),
+                                               Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(
+                                               Values(CV_8U, CV_32S, CV_32F),
+                                               Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(
+                                               Values(CV_8U, CV_32S, CV_32F),
+                                               Values(false)));
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
+                                               Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
+                                               Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
+                                               Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1,CV_16UC1,CV_16SC1,CV_32SC1,CV_32FC1,CV_64FC1), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(
+                                               Values(CV_8UC1, CV_32SC1, CV_32FC1),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/test_filters.cpp b/modules/ocl/perf/test_filters.cpp
new file mode 100644 (file)
index 0000000..ac9a865
--- /dev/null
@@ -0,0 +1,1096 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Niko Li, newlife20080214@gmail.com
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//    Zero Lin, Zero.Lin@amd.com
+//    Zhang Ying, zhangying913@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+//using namespace cv::ocl;
+
+PARAM_TEST_CASE(FilterTestBase, MatType, bool)
+{
+       int type;
+       cv::Scalar val;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat mat2;
+       cv::Mat mask;
+       cv::Mat dst;
+       cv::Mat dst1; //bak, for two outputs
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int src2x;
+       int src2y;
+       int dstx;
+       int dsty;
+       int maskx;
+       int masky;
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat mat2_roi;
+       cv::Mat mask_roi;
+       cv::Mat dst_roi;
+       cv::Mat dst1_roi; //bak
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+       cv::ocl::oclMat gdst1_whole; //bak
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gmat2;
+       cv::ocl::oclMat gdst;
+       cv::ocl::oclMat gdst1;   //bak
+       cv::ocl::oclMat gmask;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               mat2 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               dst1  = randomMat(rng, size, type, 5, 16, false);
+               mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+               cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+               val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+       }
+
+       void random_roi()
+       {
+               cv::RNG& rng = TS::ptr()->get_rng();
+
+               //randomize ROI
+               roicols = rng.uniform(1, mat1.cols);
+               roirows = rng.uniform(1, mat1.rows);
+               src1x   = rng.uniform(0, mat1.cols - roicols);
+               src1y   = rng.uniform(0, mat1.rows - roirows);
+               src2x   = rng.uniform(0, mat2.cols - roicols);
+               src2y   = rng.uniform(0, mat2.rows - roirows);
+               dstx    = rng.uniform(0, dst.cols  - roicols);
+               dsty    = rng.uniform(0, dst.rows  - roirows);
+               maskx   = rng.uniform(0, mask.cols - roicols);
+               masky   = rng.uniform(0, mask.rows - roirows);
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+               mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+               dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+               gdst1_whole = dst1;
+               gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               gmask = mask_roi;
+       }
+
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// blur
+
+PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
+{
+       int type;
+       cv::Size ksize;
+       int bordertype;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               ksize = GET_PARAM(1);
+               bordertype = GET_PARAM(2);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+
+
+       void Has_roi(int b)
+       {
+               if(b)
+               {
+                       roicols =  mat1.cols-1; 
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src1y = 0;
+                       dstx = 0;
+                       dsty = 0;
+               };
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+       }
+
+};
+
+TEST_P(Blur, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::blur(mat1_roi, dst_roi, ksize, Point(-1,-1), bordertype);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype);
+       };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+//Laplacian 
+
+PARAM_TEST_CASE(LaplacianTestBase, MatType, int)
+{
+       int type;
+       int ksize;
+
+       //src mat
+       cv::Mat mat; 
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int srcx;
+       int srcy;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               ksize = GET_PARAM(1);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size = cv::Size(2560, 2560);
+
+               mat  = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               if(b)
+               {
+                       roicols =  mat.cols-1; 
+                       roirows = mat.rows-1;
+                       srcx   = 1;
+                       srcy   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+               }else
+               {
+                       roicols = mat.cols;
+                       roirows = mat.rows;
+                       srcx = 0;
+                       srcy = 0;
+                       dstx = 0;
+                       dsty = 0;
+               };
+
+               mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+       }
+
+};
+
+struct Laplacian : LaplacianTestBase {};
+
+TEST_P(Laplacian, Accuracy) 
+{    
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat = mat_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat = mat_roi;
+
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
+       };
+#endif
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// erode & dilate 
+
+PARAM_TEST_CASE(ErodeDilateBase, MatType, bool)
+{
+       int type;
+       //int iterations;
+
+       //erode or dilate kernel
+       cv::Mat kernel;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               //  iterations = GET_PARAM(1);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size = cv::Size(2560, 2560);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               //              rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
+               kernel = randomMat(rng, Size(3,3), CV_8UC1, 0, 3, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               if(b)
+               {
+                       roicols =  mat1.cols-1; 
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src1y = 0;
+                       dstx = 0;
+                       dsty = 0;
+               };
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+       }
+
+};
+
+// erode 
+
+struct Erode : ErodeDilateBase{};
+
+TEST_P(Erode, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::erode(mat1_roi, dst_roi, kernel);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::erode(gmat1, gdst, kernel);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::erode(gmat1, gdst, kernel);
+       };
+#endif
+
+}
+
+// dilate
+
+struct Dilate : ErodeDilateBase{};
+
+TEST_P(Dilate, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::dilate(mat1_roi, dst_roi, kernel);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::dilate(gmat1, gdst, kernel);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::dilate(gmat1, gdst, kernel);
+       };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// Sobel 
+
+PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
+{
+       int type;
+       int dx, dy, ksize, bordertype;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               dx = GET_PARAM(1);
+               dy = GET_PARAM(2);
+               ksize = GET_PARAM(3);
+               bordertype = GET_PARAM(4);
+               dx = 2; dy=0;
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size = cv::Size(2560, 2560);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               if(b)
+               {
+                       roicols =  mat1.cols-1; 
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src1y = 0;
+                       dstx = 0;
+                       dsty = 0;
+               };
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+       }
+
+};
+
+TEST_P(Sobel, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype);
+       };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// Scharr 
+
+PARAM_TEST_CASE(Scharr, MatType, int, int, int)
+{
+       int type;
+       int dx, dy, bordertype;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               dx = GET_PARAM(1);
+               dy = GET_PARAM(2);
+               bordertype = GET_PARAM(3);
+               dx = 1; dy=0;
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size = cv::Size(2560, 2560);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               if(b)
+               {
+                       roicols =  mat1.cols-1; 
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src1y = 0;
+                       dstx = 0;
+                       dsty = 0;
+               };
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+       }
+};
+
+TEST_P(Scharr, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype);
+       };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// GaussianBlur
+
+PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int)
+{
+       int type;
+       cv::Size ksize;
+       int bordertype;
+
+       double sigma1, sigma2;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               ksize = GET_PARAM(1);
+               bordertype = GET_PARAM(2);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size = cv::Size(2560, 2560);
+
+               sigma1 = rng.uniform(0.1, 1.0); 
+               sigma2 = rng.uniform(0.1, 1.0);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               if(b)
+               {
+                       roicols =  mat1.cols-1; 
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src1y = 0;
+                       dstx = 0;
+                       dsty = 0;
+               };
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+       }
+
+};
+
+TEST_P(GaussianBlur, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
+       };
+#endif
+
+}
+
+//************test**********
+
+INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(cv::Size(3, 3)/*, cv::Size(5, 5), cv::Size(7, 7)*/),
+                                               Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(1/*, 3*/)));
+
+//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
+
+INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+
+//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
+
+INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+
+
+INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(1, 2), Values(0, 1), Values(3, 5, 7), Values((MatType)cv::BORDER_CONSTANT,
+                                               (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
+                                               Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
+                                               Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/test_haar.cpp b/modules/ocl/perf/test_haar.cpp
new file mode 100644 (file)
index 0000000..8aabd67
--- /dev/null
@@ -0,0 +1,198 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/objdetect/objdetect.hpp"
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv;
+
+struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
+
+PARAM_TEST_CASE(HaarTestBase, int, int)
+{
+       std::vector<cv::ocl::Info> oclinfo;
+    cv::ocl::OclCascadeClassifier cascade, nestedCascade;
+       cv::CascadeClassifier cpucascade, cpunestedCascade;
+//    Mat img;
+
+    double scale;
+    int index;
+
+    virtual void SetUp()
+    {
+        scale = 1.1;
+
+#if WIN32
+        string cascadeName="E:\\opencvbuffer\\trunk\\data\\haarcascades\\haarcascade_frontalface_alt.xml";
+#else
+        string cascadeName="../data/haarcascades/haarcascade_frontalface_alt.xml";
+#endif
+
+        if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
+        {
+            cout << "ERROR: Could not load classifier cascade" << endl;
+            cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
+                "   [--nested-cascade[=nested_cascade_path]]\n"
+                "   [--scale[=<image scale>\n"
+                "   [filename|camera_index]\n" << endl ;
+
+            return;
+        }
+       int devnums = getDevice(oclinfo);
+       CV_Assert(devnums>0);
+       //if you want to use undefault device, set it here
+       //setDevice(oclinfo[0]);
+       cv::ocl::setBinpath("E:\\");
+    }
+};
+
+////////////////////////////////faceDetect/////////////////////////////////////////////////
+
+struct Haar : HaarTestBase {};
+
+TEST_P(Haar, FaceDetect) 
+{    
+    for(int index = 1;index < 2; index++)
+    {
+        Mat img;
+        char buff[256];
+#if WIN32
+        sprintf(buff,"E:\\myDataBase\\%d.jpg",index);
+        img = imread( buff, 1 );
+#else 
+        sprintf(buff,"%d.jpg",index);
+        img = imread( buff, 1 );
+        std::cout << "Now test " << index << ".jpg" <<std::endl;
+#endif
+        if(img.empty())
+        { 
+            std::cout << "Couldn't read test" << index <<".jpg" << std::endl;
+            continue;
+        }
+
+        int i = 0;
+        double t = 0;
+        vector<Rect> faces;
+
+        const static Scalar colors[] =  { CV_RGB(0,0,255),
+            CV_RGB(0,128,255),
+            CV_RGB(0,255,255),
+            CV_RGB(0,255,0),
+            CV_RGB(255,128,0),
+            CV_RGB(255,255,0),
+            CV_RGB(255,0,0),
+            CV_RGB(255,0,255)} ;
+
+        Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+        MemStorage storage(cvCreateMemStorage(0));
+        cvtColor( img, gray, CV_BGR2GRAY );
+        resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+        equalizeHist( smallImg, smallImg );
+        CvMat _image = smallImg;
+
+        Mat tempimg(&_image, false);
+
+        cv::ocl::oclMat image(tempimg);
+        CvSeq* _objects;
+
+#if 1
+        for(int k= 0; k<10; k++)
+        {
+            t = (double)cvGetTickCount();
+            _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
+                    2, 0
+                    |CV_HAAR_SCALE_IMAGE
+                    , Size(30,30), Size(0, 0) );
+
+            t = (double)cvGetTickCount() - t ;
+            printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
+        }
+
+#else
+        cpucascade.detectMultiScale( image, faces,  1.1,
+                2, 0
+                |CV_HAAR_SCALE_IMAGE
+                , Size(30,30), Size(0, 0) );
+
+#endif
+        vector<CvAvgComp> vecAvgComp;
+        Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
+        faces.resize(vecAvgComp.size());
+        std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
+
+        for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
+        { 
+            Mat smallImgROI;
+            vector<Rect> nestedObjects;
+            Point center;
+            Scalar color = colors[i%8];
+            int radius;
+            center.x = cvRound((r->x + r->width*0.5)*scale);
+            center.y = cvRound((r->y + r->height*0.5)*scale);
+            radius = cvRound((r->width + r->height)*0.25*scale);
+            circle( img, center, radius, color, 3, 8, 0 );
+        }  
+
+#if WIN32
+        sprintf(buff,"E:\\result1\\%d.jpg",index);
+        imwrite(buff,img);
+#else 
+        sprintf(buff,"testdet_%d.jpg",index);
+        imwrite(buff,img);
+#endif
+    }
+}
+
+
+//INSTANTIATE_TEST_CASE_P(HaarTestBase, Haar, Combine(Values(1),
+//            Values(1)));
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/test_imgproc.cpp b/modules/ocl/perf/test_imgproc.cpp
new file mode 100644 (file)
index 0000000..e01e976
--- /dev/null
@@ -0,0 +1,1551 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Niko Li, newlife20080214@gmail.com
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//    Shengen Yan, yanshengen@gmail.com
+//    Jiang Liyuan, lyuan001.good@163.com
+//    Rock Li, Rock.Li@amd.com
+//    Zailong Wu, bullet@yeah.net
+//    Xu Pang, pangxu010@163.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+
+MatType nulltype = -1;
+
+#define ONE_TYPE(type)  testing::ValuesIn(typeVector(type))
+#define NULL_TYPE  testing::ValuesIn(typeVector(nulltype))
+
+
+vector<MatType> typeVector(MatType type)
+{
+       vector<MatType> v;
+       v.push_back(type);
+       return v;
+}
+
+
+PARAM_TEST_CASE(ImgprocTestBase, MatType,MatType,MatType,MatType,MatType, bool)
+{
+       int type1,type2,type3,type4,type5;
+       cv::Scalar val;
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int src2x;
+       int src2y;
+       int dstx;
+       int dsty;
+       int dst1x;
+       int dst1y;
+       int maskx;
+       int masky;
+
+       //mat
+       cv::Mat mat1; 
+       cv::Mat mat2;
+       cv::Mat mask;
+       cv::Mat dst;
+       cv::Mat dst1; //bak, for two outputs
+
+       //mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat mat2_roi;
+       cv::Mat mask_roi;
+       cv::Mat dst_roi;
+       cv::Mat dst1_roi; //bak
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl mat
+       cv::ocl::oclMat clmat1;
+       cv::ocl::oclMat clmat2;
+       cv::ocl::oclMat clmask;
+       cv::ocl::oclMat cldst;
+       cv::ocl::oclMat cldst1; //bak
+
+       //ocl mat with roi
+       cv::ocl::oclMat clmat1_roi;
+       cv::ocl::oclMat clmat2_roi;
+       cv::ocl::oclMat clmask_roi;
+       cv::ocl::oclMat cldst_roi;
+       cv::ocl::oclMat cldst1_roi;
+
+       virtual void SetUp()
+       {
+               type1 = GET_PARAM(0);
+               type2 = GET_PARAM(1);
+               type3 = GET_PARAM(2);
+               type4 = GET_PARAM(3);
+               type5 = GET_PARAM(4);
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+               double min = 1,max = 20; 
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums>0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+               if(type1!=nulltype)
+               {
+                       mat1 = randomMat(rng, size, type1, min, max, false);
+                       clmat1 = mat1;
+               }
+               if(type2!=nulltype)
+               {
+                       mat2 = randomMat(rng, size, type2, min, max, false);
+                       clmat2 = mat2;
+               }
+               if(type3!=nulltype)
+               {
+                       dst  = randomMat(rng, size, type3, min, max, false);
+                       cldst = dst;
+               }
+               if(type4!=nulltype)
+               {
+                       dst1 = randomMat(rng, size, type4, min, max, false);
+                       cldst1 = dst1;
+               }
+               if(type5!=nulltype)
+               {
+                       mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+                       cv::threshold(mask, mask, 0.5, 255., type5);
+                       clmask = mask;
+               }
+               val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+       }
+
+
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat1.cols-1; //start
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src2x   = 1;
+                       src1y   = 1;
+                       src2y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+                       dst1x    = 1;
+                       dst1y    =1;
+                       maskx    =1;
+                       masky   =1;
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src2x = 0;
+                       src1y = 0;
+                       src2y = 0;
+                       dstx = 0;
+                       dsty = 0;
+                       dst1x  =0;
+                       dst1y  =0;
+                       maskx    =0;
+                       masky   =0;
+               };
+
+               if(type1!=nulltype)
+               {
+                       mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+                       //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+               }
+               if(type2!=nulltype)
+               {
+                       mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+                       //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
+               }
+               if(type3!=nulltype)
+               {
+                       dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+                       //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+               }
+               if(type4!=nulltype)
+               {
+                       dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+                       //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
+               }
+               if(type5!=nulltype)
+               {
+                       mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+                       //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
+               }
+       }
+
+       void random_roi()
+       {
+               cv::RNG& rng = TS::ptr()->get_rng();
+
+               //randomize ROI
+               roicols = rng.uniform(1, mat1.cols);
+               roirows = rng.uniform(1, mat1.rows);
+               src1x   = rng.uniform(0, mat1.cols - roicols);
+               src1y   = rng.uniform(0, mat1.rows - roirows);
+               src2x   = rng.uniform(0, mat2.cols - roicols);
+               src2y   = rng.uniform(0, mat2.rows - roirows);
+               dstx    = rng.uniform(0, dst.cols  - roicols);
+               dsty    = rng.uniform(0, dst.rows  - roirows);
+               dst1x    = rng.uniform(0, dst1.cols  - roicols);
+               dst1y    = rng.uniform(0, dst1.rows  - roirows);
+               maskx   = rng.uniform(0, mask.cols - roicols);
+               masky   = rng.uniform(0, mask.rows - roirows);
+
+               if(type1!=nulltype)
+               {
+                       mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+                       //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+               }
+               if(type2!=nulltype)
+               {
+                       mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+                       //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
+               }
+               if(type3!=nulltype)
+               {
+                       dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+                       //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+               }
+               if(type4!=nulltype)
+               {
+                       dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+                       //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
+               }
+               if(type5!=nulltype)
+               {
+                       mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+                       //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
+               }
+       }
+};
+////////////////////////////////equalizeHist//////////////////////////////////////////
+
+struct equalizeHist : ImgprocTestBase {};
+
+TEST_P(equalizeHist, MatType) 
+{ 
+       if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
+       {
+               cout<<"Unsupported type"<<endl;
+               EXPECT_DOUBLE_EQ(0.0, 0.0);
+       }
+       else
+       {
+#ifndef PRINT_KERNEL_RUN_TIME   
+               double totalcputick=0;
+               double totalgputick=0;
+               double totalgputick_kernel=0;
+               double t0=0;
+               double t1=0;
+               double t2=0;    
+               for(int k=0;k<2;k++){
+                       totalcputick=0;
+                       totalgputick=0;
+                       totalgputick_kernel=0;
+                       for(int j = 0; j < LOOP_TIMES+1; j ++)
+                       {
+                               Has_roi(k);       
+
+                               t0 = (double)cvGetTickCount();//cpu start
+                               cv::equalizeHist(mat1_roi, dst_roi);
+                               t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                               t1 = (double)cvGetTickCount();//gpu start1              
+                               if(type1!=nulltype)
+                               {
+                                       clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                               }
+                               cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+                               t2=(double)cvGetTickCount();//kernel
+                               cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
+                               t2 = (double)cvGetTickCount() - t2;//kernel
+                               cv::Mat cpu_cldst;
+                               //cldst.download(cpu_cldst);//download
+                               t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                               if(j == 0)
+                                       continue;
+
+                               totalgputick=t1+totalgputick;
+                               totalcputick=t0+totalcputick;   
+                               totalgputick_kernel=t2+totalgputick_kernel;     
+
+                       }
+                       if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+                       cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                       cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                       cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               }
+#else
+               for(int j = 0; j < 2; j ++)
+               {
+                       Has_roi(j);
+                       if(type1!=nulltype)
+                       {
+                               clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                       }
+                       if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+                       cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
+               };
+#endif
+       }
+}
+
+
+////////////////////////////////bilateralFilter////////////////////////////////////////////
+
+struct bilateralFilter : ImgprocTestBase {};
+
+TEST_P(bilateralFilter, Mat) 
+{    
+       double sigmacolor = 50.0;
+       int radius = 9;
+       int d = 2*radius+1;
+       double sigmaspace = 20.0;
+       int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
+       //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+       if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
+       {
+               cout<<"Unsupported type"<<endl;
+               EXPECT_DOUBLE_EQ(0.0, 0.0);
+       }
+       else
+       {
+               for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
+#ifndef PRINT_KERNEL_RUN_TIME   
+                       double totalcputick=0;
+                       double totalgputick=0;
+                       double totalgputick_kernel=0;
+                       double t0=0;
+                       double t1=0;
+                       double t2=0;    
+                       for(int k=0;k<2;k++){
+                               totalcputick=0;
+                               totalgputick=0;
+                               totalgputick_kernel=0;
+                               for(int j = 0; j < LOOP_TIMES+1; j ++)
+                               {
+                                       Has_roi(k);       
+
+                                       t0 = (double)cvGetTickCount();//cpu start
+                                       cv::bilateralFilter(mat1_roi, dst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+                                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                                       t1 = (double)cvGetTickCount();//gpu start1              
+                                       if(type1!=nulltype)
+                                       {
+                                               clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                                       }
+                                       t2=(double)cvGetTickCount();//kernel
+                                       cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+                                       t2 = (double)cvGetTickCount() - t2;//kernel
+                                       cv::Mat cpu_cldst;
+                                       cldst.download(cpu_cldst);//download
+                                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                                       if(j == 0)
+                                               continue;
+
+                                       totalgputick=t1+totalgputick;
+                                       totalcputick=t0+totalcputick;   
+                                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+                               }
+                               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+                               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                       }
+
+#else
+                       for(int j = 0; j < 2; j ++)
+                       {
+                               Has_roi(j);
+                               if(type1!=nulltype)
+                               {
+                                       clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                               };
+                               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+                               cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+                       };
+
+#endif
+               };
+
+       }
+}
+
+////////////////////////////////copyMakeBorder////////////////////////////////////////////
+
+struct CopyMakeBorder : ImgprocTestBase {};
+
+TEST_P(CopyMakeBorder, Mat) 
+{    
+       int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
+       //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+
+       if ((mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1) || mat1.type() != dst.type())
+       {
+               cout<<"Unsupported type"<<endl;
+               EXPECT_DOUBLE_EQ(0.0, 0.0);
+       }
+       else
+       {
+               for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
+#ifndef PRINT_KERNEL_RUN_TIME   
+                       double totalcputick=0;
+                       double totalgputick=0;
+                       double totalgputick_kernel=0;
+                       double t0=0;
+                       double t1=0;
+                       double t2=0;    
+                       for(int k=0;k<2;k++){
+                               totalcputick=0;
+                               totalgputick=0;
+                               totalgputick_kernel=0;
+                               for(int j = 0; j < LOOP_TIMES+1; j ++)
+                               {
+                                       Has_roi(k);       
+
+                                       t0 = (double)cvGetTickCount();//cpu start
+                                       cv::copyMakeBorder(mat1_roi, dst_roi, 7,5,5,7, bordertype[i],cv::Scalar(1.0));
+                                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                                       t1 = (double)cvGetTickCount();//gpu start1              
+                                       if(type1!=nulltype)
+                                       {
+                                               clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                                       }
+                                       t2=(double)cvGetTickCount();//kernel
+                                       cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi,7,5,5,7,  bordertype[i],cv::Scalar(1.0));
+                                       t2 = (double)cvGetTickCount() - t2;//kernel
+                                       cv::Mat cpu_cldst;
+                                       cldst.download(cpu_cldst);//download
+                                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                                       if(j == 0)
+                                               continue;
+
+                                       totalgputick=t1+totalgputick;
+                                       totalcputick=t0+totalcputick;   
+                                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+                               }
+                               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+                               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+                       }
+#else
+                       for(int j = 0; j < 2; j ++)
+                       {
+                               Has_roi(j);
+                               if(type1!=nulltype)
+                               {
+                                       clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                               };
+                               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+                               cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi,7,5,5,7,  bordertype[i],cv::Scalar(1.0));
+                       };
+#endif
+               };
+       }
+}
+
+////////////////////////////////cornerMinEigenVal//////////////////////////////////////////
+
+struct cornerMinEigenVal : ImgprocTestBase {};
+
+TEST_P(cornerMinEigenVal, Mat) 
+{      
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       int blockSize = 7, apertureSize= 1 + 2 * (rand() % 4);
+                       int borderType = cv::BORDER_REFLECT;
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType); 
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       if(type1!=nulltype)
+                       {
+                               clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                       }
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_cldst;
+                       cldst.download(cpu_cldst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               int blockSize = 7, apertureSize= 1 + 2 * (rand() % 4);
+               int borderType = cv::BORDER_REFLECT;
+               if(type1!=nulltype)
+               {
+                       clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+               };
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
+       };
+#endif
+}
+
+
+////////////////////////////////cornerHarris//////////////////////////////////////////
+
+struct cornerHarris : ImgprocTestBase {};
+
+TEST_P(cornerHarris, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);   
+                       int blockSize = 7, apertureSize= 3;
+                       int borderType = cv::BORDER_REFLECT;
+                       double kk = 2;
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, kk, borderType); 
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       if(type1!=nulltype)
+                       {
+                               clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                       }
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_cldst;
+                       cldst.download(cpu_cldst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               double kk = 2;
+               int blockSize = 7, apertureSize= 3;
+               int borderType = cv::BORDER_REFLECT;
+               if(type1!=nulltype)
+               {
+                       clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+               };
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
+       };
+#endif
+
+}
+
+
+////////////////////////////////integral/////////////////////////////////////////////////
+
+struct integral : ImgprocTestBase {};
+
+TEST_P(integral, Mat) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);   
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::integral(mat1_roi, dst_roi, dst1_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       if(type1!=nulltype)
+                       {
+                               clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+                       }
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_cldst;
+                       cv::Mat cpu_cldst1;
+                       cldst.download(cpu_cldst);//download
+                       cldst1.download(cpu_cldst1);
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               if(type1!=nulltype)
+               {
+                       clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+               };
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
+       };
+#endif
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// warpAffine  & warpPerspective
+
+PARAM_TEST_CASE(WarpTestBase, MatType, int)
+{
+       int type;
+       cv::Size size;
+       int interpolation;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat dst;
+
+       // set up roi
+       int src_roicols;
+       int src_roirows;
+       int dst_roicols;
+       int dst_roirows;
+       int src1x;
+       int src1y;
+       int dstx;
+       int dsty;
+
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               //dsize = GET_PARAM(1);
+               interpolation = GET_PARAM(1);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               size = cv::Size(MWIDTH, MHEIGHT);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       src_roicols =  mat1.cols-1; //start
+                       src_roirows = mat1.rows-1;
+                       dst_roicols=dst.cols-1;
+                       dst_roirows=dst.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+
+               }else
+               {
+                       src_roicols = mat1.cols;
+                       src_roirows = mat1.rows;
+                       dst_roicols=dst.cols;
+                       dst_roirows=dst.rows;
+                       src1x = 0;
+                       src1y = 0;
+                       dstx = 0;
+                       dsty = 0;
+
+               };
+               mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows));
+               dst_roi  = dst(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+
+       }
+
+};
+
+/////warpAffine
+
+struct WarpAffine : WarpTestBase{};
+
+TEST_P(WarpAffine, Mat)
+{
+       static const double coeffs[2][3] =
+       {
+               {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+               {sin(3.14 / 6), cos(3.14 / 6), -100.0}
+       };
+       Mat M(2, 3, CV_64F, (void*)coeffs);
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
+       };
+#endif
+
+}
+
+
+// warpPerspective
+
+struct WarpPerspective : WarpTestBase{};
+
+TEST_P(WarpPerspective, Mat)
+{
+       static const double coeffs[3][3] =
+       {
+               {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+               {sin(3.14 / 6), cos(3.14 / 6), -100.0},
+               {0.0, 0.0, 1.0}
+       };
+       Mat M(3, 3, CV_64F, (void*)coeffs);
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
+       };
+#endif
+
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// resize
+
+PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
+{
+       int type;
+       cv::Size dsize;
+       double fx, fy;
+       int interpolation;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat dst;
+
+       // set up roi
+       int src_roicols;
+       int src_roirows;
+       int dst_roicols;
+       int dst_roirows;
+       int src1x;
+       int src1y;
+       int dstx;
+       int dsty;
+
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               dsize = GET_PARAM(1);
+               fx = GET_PARAM(2);
+               fy = GET_PARAM(3);
+               interpolation = GET_PARAM(4);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               if(dsize == cv::Size() && !(fx > 0 && fy > 0))
+               {
+                       cout << "invalid dsize and fx fy" << endl;
+                       return;
+               }
+
+               if(dsize == cv::Size()) 
+               {
+                       dsize.width = (int)(size.width * fx);
+                       dsize.height = (int)(size.height * fy);
+               }
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, dsize, type, 5, 16, false);
+
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       src_roicols =  mat1.cols-1; //start
+                       src_roirows = mat1.rows-1;
+                       dst_roicols=dst.cols-1;
+                       dst_roirows=dst.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+
+               }else
+               {
+                       src_roicols = mat1.cols;
+                       src_roirows = mat1.rows;
+                       dst_roicols=dst.cols;
+                       dst_roirows=dst.rows;
+                       src1x = 0;
+                       src1y = 0;
+                       dstx = 0;
+                       dsty = 0;
+
+               };
+               mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows));
+               dst_roi  = dst(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+
+       }
+
+};
+
+TEST_P(Resize, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+               gmat1 = mat1_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
+       };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+//threshold 
+
+PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
+{
+       int type;
+       int threshOp;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               threshOp = GET_PARAM(1);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat1 = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat1.cols-1; //start
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x = 0;
+                       src1y = 0;
+                       dstx = 0;
+                       dsty = 0;
+
+               };
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+
+       }
+};
+
+TEST_P(Threshold, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       double maxVal = randomDouble(20.0, 127.0);
+                       double thresh = randomDouble(0.0, maxVal);
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+                       gmat1 = mat1_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               double maxVal = randomDouble(20.0, 127.0);
+               double thresh = randomDouble(0.0, maxVal);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               gmat1 = mat1_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
+       };
+#endif
+
+}
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//meanShift
+
+PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria)
+{
+       int type, typeCoor;
+       int sp, sr;
+       cv::TermCriteria crit;
+       //src mat
+       cv::Mat src;
+       cv::Mat dst;
+       cv::Mat dstCoor;
+
+       //set up roi
+       int roicols;
+       int roirows;
+       int srcx;
+       int srcy;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat src_roi;
+       cv::Mat dst_roi;
+       cv::Mat dstCoor_roi;
+
+       //ocl dst mat
+       cv::ocl::oclMat gdst;
+       cv::ocl::oclMat gdstCoor;
+
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl mat with roi
+       cv::ocl::oclMat gsrc_roi;
+       cv::ocl::oclMat gdst_roi;
+       cv::ocl::oclMat gdstCoor_roi;
+
+       virtual void SetUp()
+       {
+               type     = GET_PARAM(0);
+               typeCoor = GET_PARAM(1);
+               sp       = GET_PARAM(2);
+               sr       = GET_PARAM(3);
+               crit     = GET_PARAM(4);
+
+               cv::RNG &rng = TS::ptr()->get_rng();
+
+               // MWIDTH=256, MHEIGHT=256. defined in utility.hpp
+               cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+
+               src = randomMat(rng, size, type, 5, 16, false);
+               dst = randomMat(rng, size, type, 5, 16, false);
+               dstCoor = randomMat(rng, size, typeCoor, 5, 16, false);
+
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               cv::ocl::setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               if(b)
+               {
+                       //randomize ROI
+                       roicols = src.cols - 1;
+                       roirows = src.rows - 1;
+                       srcx = 1;
+                       srcy = 1;
+                       dstx = 1;
+                       dsty = 1;
+               }else
+               {
+                       roicols = src.cols;
+                       roirows = src.rows;
+                       srcx = 0;
+                       srcy = 0;
+                       dstx = 0;
+                       dsty = 0;
+               };
+
+               src_roi = src(Rect(srcx, srcy, roicols, roirows));
+               dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+               dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows));
+
+               gdst = dst;
+               gdstCoor = dstCoor;
+       }
+};
+
+/////////////////////////meanShiftFiltering/////////////////////////////
+struct meanShiftFiltering : meanShiftTestBase {};
+
+TEST_P(meanShiftFiltering, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++)
+       {
+               double totalgputick=0;
+               double totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t1 = (double)cvGetTickCount();//gpu start1      
+
+                       gsrc_roi = src_roi;
+                       gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows));  //gdst_roi
+
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+
+                       cv::Mat cpu_gdst;
+                       gdst.download(cpu_gdst);//download
+
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+
+               gsrc_roi = src_roi;
+               gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows));  //gdst_roi
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
+       };
+#endif
+
+}
+
+///////////////////////////meanShiftProc//////////////////////////////////
+struct meanShiftProc : meanShiftTestBase {};
+
+TEST_P(meanShiftProc, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++)
+       {
+               double totalgputick=0;
+               double totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+
+                       gsrc_roi = src_roi;
+                       gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows));  //gdst_roi
+                       gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
+
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+
+                       cv::Mat cpu_gdstCoor;
+                       gdstCoor.download(cpu_gdstCoor);//download
+
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1   
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+
+               gsrc_roi = src_roi;
+               gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows));  //gdst_roi
+               gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
+       };
+#endif
+
+}
+
+
+
+//************test*******************
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
+                                               ONE_TYPE(CV_8UC1),
+                                               NULL_TYPE,
+                                               ONE_TYPE(CV_8UC1),
+                                               NULL_TYPE,
+                                               NULL_TYPE,
+                                               Values(false))); // Values(false) is the reserved parameter
+
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
+//     ONE_TYPE(CV_8UC1),
+//     NULL_TYPE,
+//     ONE_TYPE(CV_8UC1),
+//     NULL_TYPE,
+//     NULL_TYPE,
+//     Values(false))); // Values(false) is the reserved parameter
+//
+//
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
+//     Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/),
+//     NULL_TYPE,
+//     Values(CV_8UC1,CV_8UC4/*,CV_32SC1*/),
+//     NULL_TYPE,
+//     NULL_TYPE,
+//     Values(false))); // Values(false) is the reserved parameter
+
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
+//     Values(CV_8UC1,CV_32FC1),
+//     NULL_TYPE,
+//     ONE_TYPE(CV_32FC1),
+//     NULL_TYPE,
+//     NULL_TYPE,
+//     Values(false))); // Values(false) is the reserved parameter
+//
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
+//     Values(CV_8UC1,CV_32FC1),
+//     NULL_TYPE,
+//     ONE_TYPE(CV_32FC1),
+//     NULL_TYPE,
+//     NULL_TYPE,
+//     Values(false))); // Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
+                                               ONE_TYPE(CV_8UC1),
+                                               NULL_TYPE,
+                                               ONE_TYPE(CV_32SC1),
+                                               ONE_TYPE(CV_32FC1),
+                                               NULL_TYPE,
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
+                                               (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+                                               (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
+                                               (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
+                                               (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+                                               (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),  Values(cv::Size()),
+                                               Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
+                                               Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY),
+                                               ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC),
+                                               ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))));
+
+INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine(
+                                               ONE_TYPE(CV_8UC4),
+                                               ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering
+                                               Values(5),
+                                               Values(6),
+                                               Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
+                                               ));
+
+INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
+                                               ONE_TYPE(CV_8UC4),
+                                               ONE_TYPE(CV_16SC2),
+                                               Values(5),
+                                               Values(6),
+                                               Values(cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 5, 1))
+                                               ));
+
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/test_matrix_operation.cpp b/modules/ocl/perf/test_matrix_operation.cpp
new file mode 100644 (file)
index 0000000..cc9a142
--- /dev/null
@@ -0,0 +1,616 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv::ocl;
+////////////////////////////////converto/////////////////////////////////////////////////
+PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
+{
+       int type;
+       int dst_type;
+
+       //src mat
+       cv::Mat mat; 
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int srcx;
+       int srcy;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type     = GET_PARAM(0);
+               dst_type = GET_PARAM(1);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat.cols-1; //start
+                       roirows = mat.rows-1;
+                       srcx   = 1;
+                       srcy   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+               }else
+               {
+                       roicols = mat.cols;
+                       roirows = mat.rows;
+                       srcx   = 0;
+                       srcy   = 0;
+                       dstx   = 0;
+                       dsty   = 0;
+               };
+
+               mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+               //gdst_whole = dst;
+               //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+               //gmat = mat_roi;
+       }
+};
+
+
+struct ConvertTo :ConvertToTestBase {};
+
+TEST_P(ConvertTo, Accuracy) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       mat_roi.convertTo(dst_roi, dst_type);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat = mat_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       gmat.convertTo(gdst, dst_type);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+               gmat = mat_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               gmat.convertTo(gdst, dst_type);
+       };
+#endif
+
+}
+
+
+///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
+
+PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
+{
+       int type;
+
+       cv::Mat mat; 
+       cv::Mat mask;
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int srcx;
+       int srcy;
+       int dstx;
+       int dsty;
+       int maskx;
+       int masky;
+
+       //src mat with roi
+       cv::Mat mat_roi;
+       cv::Mat mask_roi;
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat;
+       cv::ocl::oclMat gdst;
+       cv::ocl::oclMat gmask;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat = randomMat(rng, size, type, 5, 16, false);
+               dst  = randomMat(rng, size, type, 5, 16, false);
+               mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+               cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat.cols-1; //start
+                       roirows = mat.rows-1;
+                       srcx   = 1;
+                       srcy   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+                       maskx   = 1;
+                       masky   = 1;
+               }else
+               {
+                       roicols = mat.cols;
+                       roirows = mat.rows;
+                       srcx   = 0;
+                       srcy   = 0;
+                       dstx   = 0;
+                       dsty   = 0;
+                       maskx   = 0;
+                       masky   = 0;
+               };
+
+               mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+               mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+               dst_roi  = dst(Rect(dstx,dsty,roicols,roirows));
+
+               //gdst_whole = dst;
+               //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+               //gmat = mat_roi;
+               //gmask = mask_roi;
+       }
+};
+
+struct CopyTo :CopyToTestBase {};
+
+TEST_P(CopyTo, Without_mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       mat_roi.copyTo(dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat = mat_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       gmat.copyTo(gdst);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+               gmat = mat_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               gmat.copyTo(gdst);
+       };
+#endif
+}
+
+TEST_P(CopyTo, With_mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       mat_roi.copyTo(dst_roi,mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+                       gmat = mat_roi;
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       gmat.copyTo(gdst, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+               gmat = mat_roi;
+               gmask = mask_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               gmat.copyTo(gdst, gmask);
+       };
+#endif
+}
+
+///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
+
+PARAM_TEST_CASE(SetToTestBase, MatType, bool)
+{
+       int type;
+       cv::Scalar val;
+
+       cv::Mat mat; 
+       cv::Mat mask;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int srcx;
+       int srcy;
+       int maskx;
+       int masky;
+
+       //src mat with roi
+       cv::Mat mat_roi;
+       cv::Mat mask_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gmat_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat;
+       cv::ocl::oclMat gmask;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat = randomMat(rng, size, type, 5, 16, false);
+               mask = randomMat(rng, size, CV_8UC1, 0, 2,  false);
+
+               cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+               val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat.cols-1; //start
+                       roirows = mat.rows-1;
+                       srcx   = 1;
+                       srcy   = 1;
+                       maskx   = 1;
+                       masky   = 1;
+               }else
+               {
+                       roicols = mat.cols;
+                       roirows = mat.rows;
+                       srcx   = 0;
+                       srcy   = 0;
+                       maskx   = 0;
+                       masky   = 0;
+               };
+
+               mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+               mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+
+               //gmat_whole = mat;
+               //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+               //gmask = mask_roi;
+       }
+};
+
+struct SetTo :SetToTestBase {};
+
+TEST_P(SetTo, Without_mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       mat_roi.setTo(val);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gmat_whole = mat;
+                       gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+                       t2=(double)cvGetTickCount();//kernel
+                       gmat.setTo(val);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gmat_whole.download(cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat_whole = mat;
+               gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               gmat.setTo(val);
+       };
+#endif
+}
+
+TEST_P(SetTo, With_mask) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+
+                       t0 = (double)cvGetTickCount();//cpu start
+                       mat_roi.setTo(val, mask_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gmat_whole = mat;
+                       gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+                       gmask = mask_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       gmat.setTo(val, gmask);
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gmat_whole.download(cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat_whole = mat;
+               gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+               gmask = mask_roi;
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               gmat.setTo(val, gmask);
+       };
+#endif
+}
+
+//**********test************   
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)));
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
+                                               Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                                               Values(false))); // Values(false) is the reserved parameter
+#endif
diff --git a/modules/ocl/perf/test_split_merge.cpp b/modules/ocl/perf/test_split_merge.cpp
new file mode 100644 (file)
index 0000000..e3e8ee4
--- /dev/null
@@ -0,0 +1,455 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv::ocl;
+PARAM_TEST_CASE(MergeTestBase, MatType, int)
+{
+       int type;
+       int channels;
+
+       //src mat
+       cv::Mat mat1; 
+       cv::Mat mat2;
+       cv::Mat mat3;
+       cv::Mat mat4;
+
+       //dst mat
+       cv::Mat dst;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int src1x;
+       int src1y;
+       int src2x;
+       int src2y;
+       int src3x;
+       int src3y;
+       int src4x;
+       int src4y;
+       int dstx;
+       int dsty;
+
+       //src mat with roi
+       cv::Mat mat1_roi;
+       cv::Mat mat2_roi;
+       cv::Mat mat3_roi;
+       cv::Mat mat4_roi;
+
+       //dst mat with roi
+       cv::Mat dst_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat1;
+       cv::ocl::oclMat gmat2;
+       cv::ocl::oclMat gmat3;
+       cv::ocl::oclMat gmat4;
+       cv::ocl::oclMat gdst;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               channels = GET_PARAM(1);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+               mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+               mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+               mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+               dst  = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               setBinpath(CLBINPATH);
+       }
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat1.cols-1; //start
+                       roirows = mat1.rows-1;
+                       src1x   = 1;
+                       src1y   = 1;
+                       src2x   = 1;
+                       src2y   = 1;
+                       src3x   = 1;
+                       src3y   = 1;
+                       src4x   = 1;
+                       src4y   = 1;
+                       dstx    = 1;
+                       dsty    =1;
+
+               }else
+               {
+                       roicols = mat1.cols;
+                       roirows = mat1.rows;
+                       src1x   = 0;
+                       src1y   = 0;
+                       src2x   = 0;
+                       src2y   = 0;
+                       src3x   = 0;
+                       src3y   = 0;
+                       src4x   = 0;
+                       src4y   = 0;
+                       dstx    = 0;
+                       dsty    = 0;
+               };
+
+               mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+               mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+               mat3_roi = mat3(Rect(src3x,src3y,roicols,roirows));
+               mat4_roi = mat4(Rect(src4x,src4y,roicols,roirows));
+
+
+               dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+       }
+
+};
+
+struct Merge : MergeTestBase {};
+
+TEST_P(Merge, Accuracy) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       std::vector<cv::Mat> dev_src;
+                       dev_src.push_back(mat1_roi);
+                       dev_src.push_back(mat2_roi);
+                       dev_src.push_back(mat3_roi);
+                       dev_src.push_back(mat4_roi);   
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::merge(dev_src, dst_roi);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1      ]
+                       gmat1 = mat1_roi;
+                       gmat2 = mat2_roi;
+                       gmat3 = mat3_roi;
+                       gmat4 = mat4_roi;
+                       gdst_whole = dst;
+                       gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+                       std::vector<cv::ocl::oclMat> dev_gsrc;
+                       dev_gsrc.push_back(gmat1);
+                       dev_gsrc.push_back(gmat2);
+                       dev_gsrc.push_back(gmat3);
+                       dev_gsrc.push_back(gmat4);
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::merge(dev_gsrc, gdst); 
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst;
+                       gdst_whole.download (cpu_dst);//download
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+
+                       if(j == 0)
+                               continue;
+
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               gmat1 = mat1_roi;
+               gmat2 = mat2_roi;
+               gmat3 = mat3_roi;
+               gmat4 = mat4_roi;
+               gdst_whole = dst;
+               gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+               std::vector<cv::ocl::oclMat> dev_gsrc;
+               dev_gsrc.push_back(gmat1);
+               dev_gsrc.push_back(gmat2);
+               dev_gsrc.push_back(gmat3);
+               dev_gsrc.push_back(gmat4);
+
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::merge(dev_gsrc, gdst); 
+       };
+#endif
+}
+
+
+PARAM_TEST_CASE(SplitTestBase, MatType, int)
+{
+       int type;
+       int channels;
+
+       //src mat
+       cv::Mat mat; 
+
+       //dstmat
+       cv::Mat dst1;
+       cv::Mat dst2;
+       cv::Mat dst3;
+       cv::Mat dst4;
+
+       // set up roi
+       int roicols;
+       int roirows;
+       int srcx;
+       int srcy;
+       int dst1x;
+       int dst1y;
+       int dst2x;
+       int dst2y;
+       int dst3x;
+       int dst3y;
+       int dst4x;
+       int dst4y;
+
+       //src mat with roi
+       cv::Mat mat_roi;
+
+       //dst mat with roi
+       cv::Mat dst1_roi;
+       cv::Mat dst2_roi;
+       cv::Mat dst3_roi;
+       cv::Mat dst4_roi;
+       std::vector<cv::ocl::Info> oclinfo;
+       //ocl dst mat for testing
+       cv::ocl::oclMat gdst1_whole;
+       cv::ocl::oclMat gdst2_whole;
+       cv::ocl::oclMat gdst3_whole;
+       cv::ocl::oclMat gdst4_whole;
+
+       //ocl mat with roi
+       cv::ocl::oclMat gmat;
+       cv::ocl::oclMat gdst1;
+       cv::ocl::oclMat gdst2;
+       cv::ocl::oclMat gdst3;
+       cv::ocl::oclMat gdst4;
+
+       virtual void SetUp()
+       {
+               type = GET_PARAM(0);
+               channels = GET_PARAM(1);
+
+               cv::RNG& rng = TS::ptr()->get_rng();
+               cv::Size size(MWIDTH, MHEIGHT);
+
+               mat  = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+               dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+               dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+               dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+               dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+               int devnums = getDevice(oclinfo);
+               CV_Assert(devnums > 0);
+               //if you want to use undefault device, set it here
+               //setDevice(oclinfo[0]);
+               setBinpath(CLBINPATH);
+       }
+
+       void Has_roi(int b)
+       {
+               //cv::RNG& rng = TS::ptr()->get_rng();
+               if(b)
+               {
+                       //randomize ROI
+                       roicols =  mat.cols-1; //start
+                       roirows = mat.rows-1;
+                       srcx   = 1;
+                       srcx   = 1;
+                       dst1x    = 1;
+                       dst1y    =1;
+                       dst2x    = 1;
+                       dst2y    =1;
+                       dst3x    = 1;
+                       dst3y    =1;
+                       dst4x    = 1;
+                       dst4y    =1;
+               }else
+               {
+                       roicols = mat.cols;
+                       roirows = mat.rows;
+                       srcx = 0;
+                       srcy = 0;
+                       dst1x = 0;
+                       dst1y = 0;
+                       dst2x    = 0;
+                       dst2y    =0;
+                       dst3x    = 0;
+                       dst3y    =0;
+                       dst4x    = 0;
+                       dst4y    =0;
+               };
+
+               mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+
+               dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+               dst2_roi = dst2(Rect(dst2x,dst2y,roicols,roirows));
+               dst3_roi = dst3(Rect(dst3x,dst3y,roicols,roirows));
+               dst4_roi = dst4(Rect(dst4x,dst4y,roicols,roirows));
+       }
+
+};
+
+struct Split :SplitTestBase {};
+
+TEST_P(Split, Accuracy) 
+{    
+#ifndef PRINT_KERNEL_RUN_TIME   
+       double totalcputick=0;
+       double totalgputick=0;
+       double totalgputick_kernel=0;
+       double t0=0;
+       double t1=0;
+       double t2=0;    
+       for(int k=0;k<2;k++){
+               totalcputick=0;
+               totalgputick=0;
+               totalgputick_kernel=0;
+               for(int j = 0; j < LOOP_TIMES+1; j ++)
+               {
+                       Has_roi(k);       
+                       cv::Mat         dev_dst[4]  = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
+                       cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
+                       t0 = (double)cvGetTickCount();//cpu start
+                       cv::split(mat_roi, dev_dst);
+                       t0 = (double)cvGetTickCount() - t0;//cpu end
+
+                       t1 = (double)cvGetTickCount();//gpu start1              
+                       gdst1_whole = dst1;
+                       gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
+
+                       gdst2_whole = dst2;
+                       gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
+
+                       gdst3_whole = dst3;
+                       gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
+
+                       gdst4_whole = dst4;
+                       gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
+
+                       gmat = mat_roi;
+                       t2=(double)cvGetTickCount();//kernel
+                       cv::ocl::split(gmat, dev_gdst); 
+                       t2 = (double)cvGetTickCount() - t2;//kernel
+                       cv::Mat cpu_dst1;
+                       cv::Mat cpu_dst2;
+                       cv::Mat cpu_dst3;
+                       cv::Mat cpu_dst4;
+                       gdst1_whole.download(cpu_dst1);
+                       gdst2_whole.download(cpu_dst2);
+                       gdst3_whole.download(cpu_dst3);
+                       gdst4_whole.download(cpu_dst4);
+                       t1 = (double)cvGetTickCount() - t1;//gpu end1           
+                       if(j == 0)
+                               continue;
+                       totalgputick=t1+totalgputick;
+                       totalcputick=t0+totalcputick;   
+                       totalgputick_kernel=t2+totalgputick_kernel;     
+
+               }
+               if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+               cout << "average cpu runtime is  " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime is  " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+               cout << "average gpu runtime without data transfer is  " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+       }
+#else
+       for(int j = 0; j < 2; j ++)
+       {
+               Has_roi(j);
+               cv::Mat         dev_dst[4]  = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
+               cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
+               gdst1_whole = dst1;
+               gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
+
+               gdst2_whole = dst2;
+               gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
+
+               gdst3_whole = dst3;
+               gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
+
+               gdst4_whole = dst4;
+               gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
+               gmat = mat_roi;
+               if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+               cv::ocl::split(gmat, dev_gdst); 
+       };
+#endif
+}
+
+//*************test*****************
+INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
+                                               Values(CV_8UC4, CV_32FC4), Values(1, 4)));
+
+INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
+                                               Values(CV_8U, CV_32S, CV_32F), Values(1, 4)));     
+
+#endif // HAVE_OPENCL
diff --git a/modules/ocl/perf/utility.cpp b/modules/ocl/perf/utility.cpp
new file mode 100644 (file)
index 0000000..417f72f
--- /dev/null
@@ -0,0 +1,265 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#define VARNAME(A) #A
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+using namespace cvtest;
+
+
+//std::string generateVarList(int first,...)
+//{
+//     vector<std::string> varname;
+//
+//     va_list argp;
+//     string s;
+//     stringstream ss;
+//     va_start(argp,first);
+//     int i=first;
+//     while(i!=-1)
+//     {
+//             ss<<i<<",";
+//             i=va_arg(argp,int);
+//     };
+//     s=ss.str();
+//     va_end(argp);
+//     return s;
+//};
+
+//std::string generateVarList(int& p1,int& p2)
+//{
+//     stringstream ss;
+//     ss<<VARNAME(p1)<<":"<<src1x<<","<<VARNAME(p2)<<":"<<src1y;
+//     return ss.str();
+//};
+
+int randomInt(int minVal, int maxVal)
+{
+    RNG& rng = TS::ptr()->get_rng();
+    return rng.uniform(minVal, maxVal);
+}
+
+double randomDouble(double minVal, double maxVal)
+{
+    RNG& rng = TS::ptr()->get_rng();
+    return rng.uniform(minVal, maxVal);
+}
+
+Size randomSize(int minVal, int maxVal)
+{
+    return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal));
+}
+
+Scalar randomScalar(double minVal, double maxVal)
+{
+    return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal));
+}
+
+Mat randomMat(Size size, int type, double minVal, double maxVal)
+{
+    return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
+}
+
+
+
+
+
+
+
+/*
+void showDiff(InputArray gold_, InputArray actual_, double eps)
+{
+    Mat gold;
+    if (gold_.kind() == _InputArray::MAT)
+        gold = gold_.getMat();
+    else
+        gold_.getGpuMat().download(gold);
+
+    Mat actual;
+    if (actual_.kind() == _InputArray::MAT)
+        actual = actual_.getMat();
+    else
+        actual_.getGpuMat().download(actual);
+
+    Mat diff;
+    absdiff(gold, actual, diff);
+    threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY);
+
+    namedWindow("gold", WINDOW_NORMAL);
+    namedWindow("actual", WINDOW_NORMAL);
+    namedWindow("diff", WINDOW_NORMAL);
+
+    imshow("gold", gold);
+    imshow("actual", actual);
+    imshow("diff", diff);
+
+    waitKey();
+}
+*/
+
+/*
+bool supportFeature(const DeviceInfo& info, FeatureSet feature)
+{
+    return TargetArchs::builtWith(feature) && info.supports(feature);
+}
+
+const vector<DeviceInfo>& devices()
+{
+    static vector<DeviceInfo> devs;
+    static bool first = true;
+
+    if (first)
+    {
+        int deviceCount = getCudaEnabledDeviceCount();
+
+        devs.reserve(deviceCount);
+
+        for (int i = 0; i < deviceCount; ++i)
+        {
+            DeviceInfo info(i);
+            if (info.isCompatible())
+                devs.push_back(info);
+        }
+
+        first = false;
+    }
+
+    return devs;
+}
+
+vector<DeviceInfo> devices(FeatureSet feature)
+{
+    const vector<DeviceInfo>& d = devices();
+    
+    vector<DeviceInfo> devs_filtered;
+
+    if (TargetArchs::builtWith(feature))
+    {
+        devs_filtered.reserve(d.size());
+
+        for (size_t i = 0, size = d.size(); i < size; ++i)
+        {
+            const DeviceInfo& info = d[i];
+
+            if (info.supports(feature))
+                devs_filtered.push_back(info);
+        }
+    }
+
+    return devs_filtered;
+}
+*/
+
+vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
+{
+    vector<MatType> v;
+
+    v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
+
+    for (int depth = depth_start; depth <= depth_end; ++depth)
+    {
+        for (int cn = cn_start; cn <= cn_end; ++cn)
+        {
+            v.push_back(CV_MAKETYPE(depth, cn));
+        }
+    }
+
+    return v;
+}
+
+const vector<MatType>& all_types()
+{
+    static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
+
+    return v;
+}
+
+Mat readImage(const string& fileName, int flags)
+{
+    return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
+}
+
+Mat readImageType(const string& fname, int type)
+{
+    Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
+    if (CV_MAT_CN(type) == 4)
+    {
+        Mat temp;
+        cvtColor(src, temp, cv::COLOR_BGR2BGRA);
+        swap(src, temp);
+    }
+    src.convertTo(src, CV_MAT_DEPTH(type));
+    return src;
+}
+
+double checkNorm(const Mat& m)
+{
+    return norm(m, NORM_INF);
+}
+
+double checkNorm(const Mat& m1, const Mat& m2)
+{
+    return norm(m1, m2, NORM_INF);
+}
+
+double checkSimilarity(const Mat& m1, const Mat& m2)
+{
+    Mat diff;
+    matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+    return std::abs(diff.at<float>(0, 0) - 1.f);
+}
+
+/*
+void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os)
+{
+    (*os) << info.name();
+}
+*/
+
+void PrintTo(const Inverse& inverse, std::ostream* os)
+{
+    if (inverse)
+        (*os) << "inverse";
+    else
+        (*os) << "direct";
+}
diff --git a/modules/ocl/perf/utility.hpp b/modules/ocl/perf/utility.hpp
new file mode 100644 (file)
index 0000000..0a0bfba
--- /dev/null
@@ -0,0 +1,177 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEST_UTILITY_HPP__
+#define __OPENCV_TEST_UTILITY_HPP__
+//#define PRINT_KERNEL_RUN_TIME
+#ifdef PRINT_KERNEL_RUN_TIME
+#define LOOP_TIMES 1
+#else
+#define LOOP_TIMES 1
+#endif
+#define MWIDTH 2557
+#define MHEIGHT 2579
+#define CLBINPATH ".\\"
+int randomInt(int minVal, int maxVal);
+double randomDouble(double minVal, double maxVal);
+
+//std::string generateVarList(int first,...);
+std::string generateVarList(int& p1,int& p2);
+cv::Size randomSize(int minVal, int maxVal);
+cv::Scalar randomScalar(double minVal, double maxVal);
+cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
+
+void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
+
+//! return true if device supports specified feature and gpu module was built with support the feature.
+//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+
+//! return all devices compatible with current gpu module build.
+//const std::vector<cv::ocl::DeviceInfo>& devices();
+//! return all devices compatible with current gpu module build which support specified feature.
+//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
+
+//! read image from testdata folder.
+cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
+cv::Mat readImageType(const std::string& fname, int type);
+
+double checkNorm(const cv::Mat& m);
+double checkNorm(const cv::Mat& m1, const cv::Mat& m2);
+double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
+
+#define EXPECT_MAT_NORM(mat, eps) \
+{ \
+    EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
+}
+
+//#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
+//{ \
+//    ASSERT_EQ(mat1.type(), mat2.type()); \
+//    ASSERT_EQ(mat1.size(), mat2.size()); \
+//    EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \
+//}
+
+#define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \
+{ \
+    ASSERT_EQ(mat1.type(), mat2.type()); \
+    ASSERT_EQ(mat1.size(), mat2.size()); \
+    EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps)<<s; \
+}
+
+#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
+{ \
+    ASSERT_EQ(mat1.type(), mat2.type()); \
+    ASSERT_EQ(mat1.size(), mat2.size()); \
+    EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \
+}
+
+namespace cv 
+{ 
+    namespace ocl 
+    {
+        // void PrintTo(const DeviceInfo& info, std::ostream* os);
+    }
+}
+
+using perf::MatDepth;
+using perf::MatType;
+
+//! return vector with types from specified range.
+std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
+
+//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
+const std::vector<MatType>& all_types();
+
+class Inverse
+{
+    public:
+        inline Inverse(bool val = false) : val_(val) {}
+
+        inline operator bool() const { return val_; }
+
+    private:
+        bool val_;
+};
+
+void PrintTo(const Inverse& useRoi, std::ostream* os);
+
+CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+
+CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
+
+    enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
+CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
+
+CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+
+    CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
+
+CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+
+CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
+
+CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+
+CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
+
+CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+
+CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+void  run_perf_test();
+
+#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
+
+#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+
+#define ALL_DEVICES testing::ValuesIn(devices())
+#define DEVICES(feature) testing::ValuesIn(devices(feature))
+
+#define ALL_TYPES testing::ValuesIn(all_types())
+#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
+
+#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
+
+#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
+
+#endif // __OPENCV_TEST_UTILITY_HPP__