################################################################################################################
################################ OpenCL Module Performance ##################################################
################################################################################################################
-#file(GLOB perf_srcs "perf/*.cpp")
-#file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
+file(GLOB perf_srcs "perf/*.cpp")
+file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
-#ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
-# FILES "Src" ${perf_srcs})
+ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
+ FILES "Src" ${perf_srcs})
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
+#define __OPENCV_TEST_INTERPOLATION_HPP__
+
+template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+{
+ if (border_type == cv::BORDER_CONSTANT)
+ return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
+
+ return src.at<T>(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c);
+}
+
+template <typename T> struct NearestInterpolator
+{
+ static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+ {
+ return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
+ }
+};
+
+template <typename T> struct LinearInterpolator
+{
+ static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+ {
+ x -= 0.5f;
+ y -= 0.5f;
+
+ int x1 = cvFloor(x);
+ int y1 = cvFloor(y);
+ int x2 = x1 + 1;
+ int y2 = y1 + 1;
+
+ float res = 0;
+
+ res += readVal<T>(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y));
+ res += readVal<T>(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y));
+ res += readVal<T>(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1));
+ res += readVal<T>(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1));
+
+ return cv::saturate_cast<T>(res);
+ }
+};
+
+template <typename T> struct CubicInterpolator
+{
+ static float getValue(float p[4], float x)
+ {
+ return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0])));
+ }
+
+ static float getValue(float p[4][4], float x, float y)
+ {
+ float arr[4];
+
+ arr[0] = getValue(p[0], x);
+ arr[1] = getValue(p[1], x);
+ arr[2] = getValue(p[2], x);
+ arr[3] = getValue(p[3], x);
+
+ return getValue(arr, y);
+ }
+
+ static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+ {
+ int ix = cvRound(x);
+ int iy = cvRound(y);
+
+ float vals[4][4] =
+ {
+ {readVal<T>(src, iy - 2, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 2, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 2, ix, c, border_type, borderVal), readVal<T>(src, iy - 2, ix + 1, c, border_type, borderVal)},
+ {readVal<T>(src, iy - 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 1, ix, c, border_type, borderVal), readVal<T>(src, iy - 1, ix + 1, c, border_type, borderVal)},
+ {readVal<T>(src, iy , ix - 2, c, border_type, borderVal), readVal<T>(src, iy , ix - 1, c, border_type, borderVal), readVal<T>(src, iy , ix, c, border_type, borderVal), readVal<T>(src, iy , ix + 1, c, border_type, borderVal)},
+ {readVal<T>(src, iy + 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy + 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy + 1, ix, c, border_type, borderVal), readVal<T>(src, iy + 1, ix + 1, c, border_type, borderVal)},
+ };
+
+ return cv::saturate_cast<T>(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0));
+ }
+};
+
+#endif // __OPENCV_TEST_INTERPOLATION_HPP__
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+
+void print_info()
+{
+ printf("\n");
+#if defined _WIN32
+# if defined _WIN64
+ puts("OS: Windows 64");
+# else
+ puts("OS: Windows 32");
+# endif
+#elif defined linux
+# if defined _LP64
+ puts("OS: Linux 64");
+# else
+ puts("OS: Linux 32");
+# endif
+#elif defined __APPLE__
+# if defined _LP64
+ puts("OS: Apple 64");
+# else
+ puts("OS: Apple 32");
+# endif
+#endif
+
+}
+
+#if PERF_TEST_OCL
+int main(int argc, char** argv)
+{
+
+ static std::vector<Info> ocl_info;
+ ocl::getDevice(ocl_info);
+
+ run_perf_test();
+ return 0;
+}
+#else
+int main(int argc, char** argv)
+{
+ TS::ptr()->init("ocl");
+ InitGoogleTest(&argc, argv);
+
+ print_info();
+
+ return RUN_ALL_TESTS();
+}
+#endif // PERF_TEST_OCL
+
+#else // HAVE_OPENC
+
+int main()
+{
+ printf("OpenCV was built without OpenCL support\n");
+ return 0;
+}
+
+
+#endif // HAVE_OPENCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicore Ware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Peng Xiao, pengxiao@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <ctime>
+
+#if PERF_TEST_OCL
+
+#ifdef HAVE_OPENCL
+
+#define SHOW_CPU false
+#define REPEAT 1000
+#define COUNT_U 0 // count the uploading execution time for ocl mat structures
+#define COUNT_D 0
+
+
+// the following macro section tests the target function (kernel) performance
+// upload is the code snippet for converting cv::mat to cv::ocl::oclMat
+// downloading is the code snippet for converting cv::ocl::oclMat back to cv::mat
+// change COUNT_U and COUNT_D to take downloading and uploading time into account
+#define P_TEST_FULL( upload, kernel_call, download ) \
+{ \
+ std::cout<< "\n" #kernel_call "\n----------------------"; \
+ {upload;} \
+ R_TEST( kernel_call, 15 ); \
+ double t = (double)cvGetTickCount(); \
+ R_T( { \
+ if( COUNT_U ) {upload;} \
+ kernel_call; \
+ if( COUNT_D ) {download;} \
+ } ); \
+ t = (double)cvGetTickCount() - t; \
+ std::cout << "runtime is " << t/((double)cvGetTickFrequency()* 1000.) << "ms" << std::endl; \
+}
+
+
+#define R_T2( test ) \
+{ \
+ std::cout<< "\n" #test "\n----------------------"; \
+ R_TEST( test, 15 ) \
+ clock_t st = clock(); \
+ R_T( test ) \
+ std::cout<< clock() - st << "ms\n"; \
+}
+#define R_T( test ) \
+ R_TEST( test, REPEAT )
+#define R_TEST( test, repeat ) \
+ try{ \
+ for( int i = 0; i < repeat; i ++ ) { test; } \
+ } catch( ... ) { std::cout << "||||| Exception catched! |||||\n"; return; }
+
+#define FILTER_TEST_IMAGE "C:/Windows/Web/Wallpaper/Landscapes/img9.jpg"
+#define WARN_NRUN( name ) \
+ std::cout << "Warning: " #name " is not runnable!\n";
+
+
+void print_info();
+
+// performance base class
+struct PerfTest
+{
+ virtual void Run() = 0;
+ protected:
+ virtual void SetUp() = 0;
+};
+///////////////////////////////////////
+// Arithm
+struct ArithmTestP : PerfTest
+{
+ int type;
+ cv::Scalar val;
+
+ cv::Size size;
+ cv::Mat mat1, mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::ocl::oclMat oclRes, oclmat1, oclmat2;
+ cv::ocl::oclMat oclmask;
+ std::vector<cv::Mat> dstv;
+ protected:
+ ArithmTestP() : type( CV_8UC4 ) {}
+ virtual void SetUp()
+ {
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+ size = cv::Size( 3000, 3000 ); // big input image
+ mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+ mat2 = cvtest::randomMat(rng, size, type, 1, 255, false);
+ mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+
+ oclmat1 = cv::ocl::oclMat(mat1);
+ oclmat2 = cv::ocl::oclMat(mat2);
+ oclmask = cv::ocl::oclMat(mask);
+ }
+};
+
+struct AddArrayP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::add(oclmat1, oclmat2, oclRes),
+ oclRes.download(dst);
+ );
+ }
+};
+
+struct SubtractArrayP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::subtract(oclmat1, oclmat2, oclRes),
+ oclRes.download(dst);
+ );
+ }
+};
+
+struct MultiplyArrayP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ clock_t start = clock();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::multiply(oclmat1, oclmat2, oclRes),
+ oclRes.download(dst);
+ );
+ }
+};
+
+struct DivideArrayP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::divide(oclmat1, oclmat2, oclRes),
+ oclRes.download(dst);
+ );
+ }
+};
+
+struct ExpP : ArithmTestP
+{
+ void Run()
+ {
+ type = CV_32FC1;
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::exp(oclmat1, oclRes),
+ oclRes.download(dst);
+ );
+ }
+};
+
+struct LogP : ArithmTestP
+{
+ void Run()
+ {
+ type = CV_32FC1;
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::log(oclmat1, oclRes),
+ oclRes.download(dst);
+ );
+ }
+};
+
+struct CompareP : ArithmTestP
+{
+ virtual void Run()
+ {
+ type = CV_32FC1;
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::compare(oclmat1, oclmat2, oclRes, cv::CMP_EQ),
+ oclRes.download(dst);
+ );
+ }
+};
+
+struct FlipP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::flip(oclmat1, oclRes, 0),
+ oclRes.download(dst);
+ );
+ }
+ protected:
+ virtual void SetUp()
+ {
+ type = CV_8UC4;
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+ size = cv::Size(3000, 3000);
+ mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+ oclmat1 = cv::ocl::oclMat(mat1);
+ }
+};
+
+struct MagnitudeP : ArithmTestP
+{
+ virtual void Run()
+ {
+ type = CV_32F;
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::magnitude(oclmat1, oclmat1, oclRes),
+ oclRes.download(dst);
+ );
+ }
+};
+
+struct LUTP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);ocllut = cv::ocl::oclMat(lut),
+ cv::ocl::LUT(oclmat1, ocllut, oclRes),
+ oclRes.download(dst);
+ );
+ }
+ protected:
+ cv::Mat lut;
+ cv::ocl::oclMat ocllut;
+ virtual void SetUp()
+ {
+ type = CV_8UC1;
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+ size = cv::Size(3000, 3000);
+ mat1 = cvtest::randomMat(rng, size, type, 1, 255, false);
+ lut = cvtest::randomMat(rng, cv::Size(256, 1), CV_8UC1, 100, 200, false);
+ oclmat1 = cv::ocl::oclMat(mat1);
+ ocllut = cv::ocl::oclMat(lut);
+ }
+};
+
+struct MinMaxP : ArithmTestP
+{
+ double minVal_gold, minVal;
+ double maxVal_gold, maxVal;
+
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::minMax(oclmat1, &minVal, &maxVal, oclmat2),
+ {};
+ );
+ }
+
+ protected:
+ virtual void SetUp()
+ {
+ type = CV_64F;
+
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+ size = cv::Size(3000, 3000);
+
+ mat1 = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+ mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ oclmat1 = cv::ocl::oclMat(mat1);
+ oclmat2 = cv::ocl::oclMat(mat2);
+ }
+};
+
+struct MinMaxLocP : MinMaxP
+{
+ cv::Point minLoc_gold;
+ cv::Point maxLoc_gold;
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::minMaxLoc(oclmat1, &minVal, &maxVal, &minLoc_gold, &maxLoc_gold, oclmat2),
+ {}
+ );
+ }
+};
+
+struct CountNonZeroP : ArithmTestP
+{
+ int n;
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ n = cv::ocl::countNonZero(oclmat1),
+ {}
+ );
+ }
+ protected:
+ virtual void SetUp()
+ {
+ type = 6;
+
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+ size = cv::Size( 3000, 3000 );
+
+ cv::Mat matBase = cvtest::randomMat(rng, size, CV_8U, 0.0, 1.0, false);
+ matBase.convertTo(mat1, type);
+
+ oclmat1 = cv::ocl::oclMat(mat1);
+ }
+};
+
+struct SumP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ cv::Scalar n;
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ n = cv::ocl::sum(oclmat1),
+ {}
+ );
+ }
+};
+
+struct BitwiseP : ArithmTestP
+{
+ protected:
+ virtual void SetUp()
+ {
+ type = CV_8UC4;
+
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+ size = cv::Size( 3000, 3000 );
+
+ mat1.create(size, type);
+ mat2.create(size, type);
+
+ for (int i = 0; i < mat1.rows; ++i)
+ {
+ cv::Mat row1(1, static_cast<int>(mat1.cols * mat1.elemSize()), CV_8U, (void*)mat1.ptr(i));
+ rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+
+ cv::Mat row2(1, static_cast<int>(mat2.cols * mat2.elemSize()), CV_8U, (void*)mat2.ptr(i));
+ rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+ }
+ oclmat1 = cv::ocl::oclMat(mat1);
+ oclmat2 = cv::ocl::oclMat(mat2);
+ }
+};
+
+struct BitwiseNotP : BitwiseP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::bitwise_not(oclmat1, oclRes),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct BitwiseAndP : BitwiseP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::bitwise_and(oclmat1, oclmat2, oclRes),
+ oclRes.download(dst)
+ );
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::bitwise_and(oclmat1, val, oclRes),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct BitwiseXorP : BitwiseP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::bitwise_xor(oclmat1, oclmat2, oclRes),
+ oclRes.download(dst)
+ );
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::bitwise_xor(oclmat1, val, oclRes),
+ oclRes.download(dst)
+ );
+
+ }
+};
+
+struct BitwiseOrP : BitwiseP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::bitwise_or(oclmat1, oclmat2, oclRes),
+ oclRes.download(dst)
+ );
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::bitwise_or(oclmat1, val, oclRes),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct TransposeP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::transpose(oclmat1, oclRes),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct AbsdiffArrayP : ArithmTestP
+{
+ virtual void Run()
+ {
+ type = CV_32FC1;
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::absdiff(oclmat1, oclmat2, oclRes),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct PhaseP : ArithmTestP
+{
+ virtual void Run()
+ {
+ type = CV_32F;
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2),
+ cv::ocl::phase(oclmat1,oclmat2,oclRes,1),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct CartToPolarP : ArithmTestP
+{
+ cv::ocl::oclMat oclRes1;
+ virtual void Run()
+ {
+ type = CV_64FC4;
+ SetUp();
+ clock_t start = clock();
+ R_TEST(
+ cv::ocl::cartToPolar(oclmat1,oclmat2,oclRes, oclRes1, 1);
+ if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);}
+ , 5);
+ std::cout<< "ocl::CartToPolar -- " << clock() - start << "ms\n";
+ }
+};
+
+struct PolarToCartP : ArithmTestP
+{
+ cv::ocl::oclMat oclRes1;
+ virtual void Run()
+ {
+ type = CV_64FC4;
+ SetUp();
+ clock_t start = clock();
+ R_TEST(
+ cv::ocl::polarToCart(oclmat1,oclmat2,oclRes, oclRes1, 1);
+ if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);}
+ , 2);
+ std::cout<< "ocl::polarToCart -- " << clock() - start << "ms\n";
+ }
+};
+
+///////////////////////////////////////
+// split & merge
+struct SplitP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::split(oclmat1, dev_dst),
+ {
+ dstv.resize(dev_dst.size());
+ for (size_t i = 0; i < dev_dst.size(); ++i)
+ {
+ dev_dst[i].download(dstv[i]);
+ }
+ }
+ );
+ }
+ protected:
+ std::vector<cv::ocl::oclMat> dev_dst;
+ virtual void SetUp()
+ {
+ size = cv::Size( 3000, 3000 );
+
+ mat1.create(size, type);
+ mat1.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0));
+
+ oclmat1 = cv::ocl::oclMat(mat1);
+ }
+};
+
+struct MergeP : SplitP
+{
+ virtual void Run()
+ {
+ SetUp();
+ cv::ocl::split(oclmat1, dev_dst);
+ cv::split(mat1, dstv);
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ cv::ocl::merge(dev_dst, oclmat2),
+ oclmat2.download(dst)
+ );
+ }
+};
+
+struct SetToP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ static cv::Scalar s = cv::Scalar(1, 2, 3, 4);
+ P_TEST_FULL(
+ oclmat2 = cv::ocl::oclMat(mat2),
+ oclmat1.setTo( s, oclmat2 ),
+ oclmat1.download(dst);
+ );
+ }
+ protected:
+ virtual void SetUp()
+ {
+ type = CV_32FC4;
+ size = cv::Size(3000, 3000);
+
+ mat1.create(size, type);
+ oclmat1.create(size, type);
+
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+ mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0.0, 1.5, false);
+ oclmat2 = cv::ocl::oclMat(mat2);
+ }
+};
+
+struct CopyToP : SetToP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ oclmat1.copyTo( oclRes, oclmat2 ),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct ConvertToP : ArithmTestP
+{
+ virtual void Run()
+ {
+ type = CV_32FC1;;
+ SetUp();
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+ const double a = rng.uniform(0.0, 1.0);
+ const double b = rng.uniform(-10.0, 10.0);
+
+ int type2 = CV_32FC4;
+
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat(mat1),
+ oclmat1.convertTo( oclRes, type2 /*, a, b */ ), // fails when scaling factors a and b are specified
+ oclRes.download(dst)
+ );
+ }
+};
+
+////////////////////////////////////////////
+// Filters
+
+struct FilterTestP : PerfTest
+{
+ protected:
+ int ksize;
+ int dx, dy;
+
+ cv::Mat img_rgba;
+ cv::Mat img_gray;
+
+ cv::ocl::oclMat ocl_img_rgba;
+ cv::ocl::oclMat ocl_img_gray;
+
+ cv::ocl::oclMat dev_dst_rgba;
+ cv::ocl::oclMat dev_dst_gray;
+
+ cv::Mat dst_rgba;
+ cv::Mat dst_gray;
+
+ cv::Mat kernel;
+
+ int bordertype;
+
+ virtual void SetUp()
+ {
+ bordertype = (int)cv::BORDER_DEFAULT;
+ ksize = 7;
+ dx = ksize/2; dy = ksize/2;
+
+ kernel = cv::Mat::ones(ksize, ksize, CV_8U);
+
+ cv::Mat img = readImage(FILTER_TEST_IMAGE);
+ ASSERT_FALSE(img.empty());
+
+ cv::cvtColor(img, img_rgba, CV_BGR2BGRA);
+ cv::cvtColor(img, img_gray, CV_BGR2GRAY);
+
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ }
+};
+
+struct BlurP : FilterTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ {
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::blur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype);
+ cv::ocl::blur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype);
+ },
+ {
+ dev_dst_rgba.download(dst_rgba);
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+struct SobelP : FilterTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ {
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::Sobel(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, ksize, 1, 0, bordertype);
+ cv::ocl::Sobel(ocl_img_gray, dev_dst_gray, -1, dx, dy, ksize, 1, 0, bordertype);
+ },
+ {
+ dev_dst_rgba.download(dst_rgba);
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+struct ScharrP : FilterTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ dx = 0; dy = 1;
+ P_TEST_FULL(
+ {
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::Scharr(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, 1, 0, bordertype);
+ cv::ocl::Scharr(ocl_img_gray, dev_dst_gray, -1, dx, dy, 1, 0, bordertype);
+ },
+ {
+ dev_dst_rgba.download(dst_rgba);
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+struct GaussianBlurP : FilterTestP
+{
+ virtual void Run()
+ {
+ double sigma1 = 3, sigma2 = 3;
+ SetUp();
+ P_TEST_FULL(
+ {
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::GaussianBlur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), sigma1, sigma2);
+ cv::ocl::GaussianBlur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), sigma1, sigma2);
+ },
+ {
+ dev_dst_rgba.download(dst_rgba);
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+struct DilateP : FilterTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ {
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::dilate(ocl_img_rgba, dev_dst_rgba, kernel);
+ cv::ocl::dilate(ocl_img_gray, dev_dst_gray, kernel);
+ },
+ {
+ dev_dst_rgba.download(dst_rgba);
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+struct ErodeP : FilterTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ {
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::erode(ocl_img_rgba, dev_dst_rgba, kernel);
+ cv::ocl::erode(ocl_img_gray, dev_dst_gray, kernel);
+ },
+ {
+ dev_dst_rgba.download(dst_rgba);
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+struct MorphExP : FilterTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ cv::ocl::oclMat okernel;
+ P_TEST_FULL(
+ {
+ okernel = cv::ocl::oclMat(kernel);
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::morphologyEx(ocl_img_rgba, dev_dst_rgba, 3, okernel);
+ cv::ocl::morphologyEx(ocl_img_gray, dev_dst_gray, 3, okernel);
+ },
+ {
+ dev_dst_rgba.download(dst_rgba);
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+struct LaplacianP : FilterTestP
+{
+ void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ {
+ ocl_img_rgba = cv::ocl::oclMat(img_rgba);
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::Laplacian(ocl_img_rgba, dev_dst_rgba, -1, 3 );
+ cv::ocl::Laplacian(ocl_img_gray, dev_dst_gray, -1, 3 );
+ },
+ {
+ dev_dst_rgba.download(dst_rgba);
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+////////////////////
+// histograms
+struct CalcHistP : PerfTest
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat = cv::ocl::oclMat( src ),
+ cv::ocl::calcHist(oclmat, oclRes),
+ oclRes.download(hist)
+ );
+ }
+ protected:
+ cv::Size size;
+ cv::Mat src, hist;
+
+ cv::ocl::oclMat oclmat;
+ cv::ocl::oclMat oclRes;
+
+ virtual void SetUp()
+ {
+ cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+ size = cv::Size(3000, 3000);
+ src = cvtest::randomMat(rng, size, CV_8UC1, 0, 255, false);
+ oclmat = cv::ocl::oclMat( src );
+ }
+};
+
+struct EqualizeHistP : CalcHistP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat = cv::ocl::oclMat( src ),
+ cv::ocl::equalizeHist(oclmat, oclRes),
+ oclRes.download(hist)
+ );
+ }
+};
+
+struct ThresholdP : CalcHistP
+{
+ virtual void Run()
+ {
+ SetUp();
+ int threshOp = (int)cv::THRESH_TOZERO_INV;;
+ double maxVal = 200;
+ double thresh = 125;
+
+ clock_t start = clock();
+
+ P_TEST_FULL(
+ oclmat = cv::ocl::oclMat( src ),
+ cv::ocl::threshold(oclmat, oclRes, thresh, maxVal, threshOp ),
+ oclRes.download(hist)
+ );
+ }
+};
+
+struct ResizeP : ArithmTestP
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat( mat1 ),
+ cv::ocl::resize(oclmat1, oclRes, cv::Size(), 2.0, 2.0),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct CvtColorP : PerfTest
+{
+ virtual void Run()
+ {
+ SetUp();
+ P_TEST_FULL(
+ oclmat = cv::ocl::oclMat( img ),
+ cv::ocl::cvtColor(oclmat, ocldst, cvtcode),
+ ocldst.download(dst)
+ );
+ }
+ protected:
+ int type;
+ int cvtcode;
+
+ cv::Mat img, dst;
+ cv::ocl::oclMat oclmat, ocldst;
+ virtual void SetUp()
+ {
+ type = CV_8U;
+ cvtcode = CV_BGR2GRAY;
+ cv::Mat imgBase = readImage(FILTER_TEST_IMAGE);
+ ASSERT_FALSE(imgBase.empty());
+
+ imgBase.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+ oclmat = cv::ocl::oclMat( img );
+ };
+};
+
+
+struct WarpAffineP : ArithmTestP
+{
+ void Run()
+ {
+ SetUp();
+ const double aplha = CV_PI / 4;
+ double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2},
+ {std::sin(aplha), std::cos(aplha), 0}};
+ cv::Mat M(2, 3, CV_64F, (void*) mat);
+
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat( mat1 ),
+ cv::ocl::warpAffine( oclmat1, oclRes, M, cv::Size(1500, 1500) ),
+ oclRes.download(dst)
+ );
+ }
+};
+
+struct WarpPerspectiveP : ArithmTestP
+{
+ void Run()
+ {
+ SetUp();
+ const double aplha = CV_PI / 4;
+ double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2},
+ {std::sin(aplha), std::cos(aplha), 0},
+ {0.0, 0.0, 1.0}};
+ cv::Mat M(3, 3, CV_64F, (void*) mat);
+
+ P_TEST_FULL(
+ oclmat1 = cv::ocl::oclMat( mat1 ),
+ cv::ocl::warpPerspective( oclmat1, oclRes, M, cv::Size(1500, 1500) ),
+ oclRes.download(dst)
+ );
+ }
+};
+
+
+struct CornerHarrisP : FilterTestP
+{
+ void Run()
+ {
+ SetUp();
+ bordertype = 2;
+ P_TEST_FULL(
+ {
+ ocl_img_gray = cv::ocl::oclMat(img_gray);
+ },
+ {
+ cv::ocl::cornerHarris(ocl_img_gray, dev_dst_gray, 3, ksize, 0.5, bordertype );
+ },
+ {
+ dev_dst_gray.download(dst_gray);
+ });
+ }
+};
+
+void test()
+{
+ clock_t start = clock();
+ std::cout << ">>>>>>>> Performance test started <<<<<<<<\n";
+ /*
+ {
+ AddArrayP AddArrayP;
+ AddArrayP.Run();
+ SubtractArrayP subarray;
+ subarray.Run();
+ MultiplyArrayP MultiplyArrayP;
+ MultiplyArrayP.Run();
+ DivideArrayP DivideArrayP;
+ DivideArrayP.Run();
+ }
+ std::cout.flush();
+ {
+ CompareP comp;
+ comp.Run();
+ MagnitudeP magnitude;
+ magnitude.Run();
+ LUTP lut;
+ lut.Run();
+ FlipP FlipP;
+ FlipP.Run();
+ MinMaxP minmax;
+ minmax.Run();
+ MinMaxLocP minmaxloc;
+ minmaxloc.Run();
+ CountNonZeroP cnz;
+ cnz.Run();
+ SumP sum;
+ sum.Run();
+ }*/
+ /* std::cout.flush();
+ {
+ BitwiseNotP bn;
+ bn.Run();
+ BitwiseOrP bo;
+ bo.Run();
+ BitwiseAndP ba;
+ ba.Run();
+ BitwiseXorP bx;
+ bx.Run();
+ }*/
+
+ std::cout.flush();
+ {
+ // TransposeP transpose;
+ // transpose.Run();
+ // AbsdiffArrayP absdiff;
+ // absdiff.Run();
+ // SplitP split;
+ // split.Run();
+ // MergeP merge;
+ // merge.Run();
+ /*
+ SetToP setto;
+ setto.Run();
+ CopyToP copyto;
+ copyto.Run();
+ ConvertToP convertto;
+ convertto.Run();
+ */
+ }
+ /*
+ std::cout.flush();
+ {
+ BlurP blur;
+ blur.Run();
+ SobelP sobel;
+ sobel.Run();
+ ScharrP scharr;
+ scharr.Run();
+ GaussianBlurP gblur;
+ gblur.Run();
+ DilateP dilate;
+ dilate.Run();
+ ErodeP erode;
+ erode.Run();
+ }
+ std::cout.flush();
+ {
+ MorphExP morphex;
+ morphex.Run();
+ CalcHistP calchist;
+ calchist.Run();
+ EqualizeHistP eqhist;
+ eqhist.Run();
+ ThresholdP threshold;
+ threshold.Run();
+ ResizeP resize;
+ resize.Run();
+ CvtColorP cvtcolor;
+ cvtcolor.Run();
+ }
+
+ {
+ LogP log;
+ log.Run();
+ ExpP exp;
+ exp.Run();
+ }
+
+ std::cout.flush();
+ {
+ //PhaseP phase;
+ //phase.Run();
+ }
+ std::cout.flush();
+ {
+ CartToPolarP ctop;
+ ctop.Run();
+ }
+ std::cout.flush();
+ {
+ PolarToCartP ptoc;
+ ptoc.Run();
+ }
+ {
+ WarpAffineP warpA;
+ warpA.Run();
+ WarpPerspectiveP warpP;
+ warpP.Run();
+ }
+
+ {
+ CornerHarrisP ch;
+ ch.Run();
+ }
+
+ {
+ LaplacianP laplacian;
+ laplacian.Run();
+ }
+
+
+ */
+ std::cout << ">>>>>>>> Performance test ended <<<<<<<<\ntotal - " << clock() - start << "ms\n";
+ std::cout.flush();
+}
+
+void run_perf_test()
+{
+ print_info();
+ cvtest::TS::ptr()->init("ocl");
+ test();
+}
+
+#endif // WITH_OPENCL
+
+#endif // PREF_TEST_OCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+
+
\ No newline at end of file
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include <cmath>
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <limits>
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <cstdarg>
+#include "cvconfig.h"
+#include "opencv2/core/core.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/video/video.hpp"
+#include "opencv2/ts/ts.hpp"
+#include "opencv2/ts/ts_perf.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/nonfree/nonfree.hpp"
+
+#include "utility.hpp"
+#include "interpolation.hpp"
+//#include "add_test_info.h"
+//#define PERF_TEST_OCL 1
+
+#endif
+
--- /dev/null
+///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Niko Li, newlife20080214@gmail.com
+// Jia Haipeng, jiahaipeng95@gmail.com
+// Shengen Yan, yanshengen@gmail.com
+// Jiang Liyuan,jlyuan001.good@163.com
+// Rock Li, Rock.Li@amd.com
+// Zailong Wu, bullet@yeah.net
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+#include <iomanip>
+
+#ifdef HAVE_OPENCL
+using namespace cv;
+using namespace cv::ocl;
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
+{
+ int type;
+ cv::Scalar val;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::Mat dst1; //bak, for two outputs
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int dstx;
+ int dsty;
+ int maskx;
+ int masky;
+
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ cv::Mat dst1_roi; //bak
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+ cv::ocl::oclMat gdst1_whole; //bak
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gmat2;
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gdst1; //bak
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+ mat2 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ dst1 = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums>0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols-1;
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src2x = 1;
+ src1y = 1;
+ src2y = 1;
+ dstx = 1;
+ dsty =1;
+ maskx =1;
+ masky =1;
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src2x = 0;
+ src1y = 0;
+ src2y = 0;
+ dstx = 0;
+ dsty = 0;
+ maskx =0;
+ masky =0;
+ };
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ //mat2_roi = mat2(Rect(src2x,src2y,256,1));
+ mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+ mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+ dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+ //gdst_whole = dst;
+ //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gdst1_whole = dst1;
+ //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gmat1 = mat1_roi;
+ //gmat2 = mat2_roi;
+ //gmask = mask_roi;
+ }
+
+};
+////////////////////////////////lut/////////////////////////////////////////////////
+
+struct Lut : ArithmTestBase {};
+
+TEST_P(Lut, Mat)
+{
+
+ cv::Mat mat2(3, 512, CV_8UC1);
+ cv::RNG& rng = TS::ptr()->get_rng();
+ rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256));
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+ mat2_roi = mat2(Rect(src2x,src2y,256,1));
+
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::LUT(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::LUT(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ // s=GetParam();
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ // src2x = rng.uniform( 0,mat2.cols - 256);
+ // src2y = rng.uniform (0,mat2.rows - 1);
+
+ // cv::Mat mat2_roi = mat2(Rect(src2x,src2y,256,1));
+ mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+ mat2_roi = mat2(Rect(src2x,src2y,256,1));
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ // gdst1_whole = dst1;
+ // gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ // gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::LUT(gmat1, gmat2, gdst);
+ };
+#endif
+
+}
+
+
+
+////////////////////////////////exp/////////////////////////////////////////////////
+
+struct Exp : ArithmTestBase {};
+
+TEST_P(Exp, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::exp(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::exp(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download(cpu_dst);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+ //EXPECT_MAT_NEAR(dst, cpu_dst, 0,"");
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::exp(gmat1, gdst);
+ };
+#endif
+
+}
+
+
+////////////////////////////////log/////////////////////////////////////////////////
+
+struct Log : ArithmTestBase {};
+
+TEST_P(Log, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::log(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::log(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::log(gmat1, gdst);
+ };
+#endif
+
+}
+
+
+
+
+////////////////////////////////add/////////////////////////////////////////////////
+
+struct Add : ArithmTestBase {};
+
+TEST_P(Add, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::add(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::add(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::add(gmat1, gmat2, gdst);
+ };
+#endif
+}
+
+TEST_P(Add, Mat_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::add(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::add(gmat1, gmat2, gdst, gmask);
+ };
+#endif
+}
+TEST_P(Add, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::add(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::add(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::add(gmat1, val, gdst);
+ };
+#endif
+}
+
+TEST_P(Add, Scalar_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::add(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::add(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::add(gmat1, val, gdst, gmask);
+ };
+#endif
+}
+
+
+////////////////////////////////sub/////////////////////////////////////////////////
+struct Sub : ArithmTestBase {};
+
+TEST_P(Sub, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::subtract(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::subtract(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::subtract(gmat1, gmat2, gdst);
+ };
+#endif
+}
+
+TEST_P(Sub, Mat_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
+ };
+#endif
+}
+TEST_P(Sub, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::subtract(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::subtract(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::subtract(gmat1, val, gdst);
+ };
+#endif
+}
+
+TEST_P(Sub, Scalar_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::subtract(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::subtract(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::subtract(gmat1, val, gdst, gmask);
+ };
+#endif
+}
+
+
+////////////////////////////////Mul/////////////////////////////////////////////////
+struct Mul : ArithmTestBase {};
+
+TEST_P(Mul, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::multiply(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::multiply(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::multiply(gmat1, gmat2, gdst);
+ };
+#endif
+}
+
+TEST_P(Mul, Mat_Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ cv::RNG& rng = TS::ptr()->get_rng();
+ double s = rng.uniform(-10.0, 10.0);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::multiply(mat1_roi, mat2_roi, dst_roi, s);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::multiply(gmat1, gmat2, gdst, s);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ cv::RNG& rng = TS::ptr()->get_rng();
+ double s = rng.uniform(-10.0, 10.0);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::multiply(gmat1, gmat2, gdst, s);
+ };
+#endif
+}
+
+
+struct Div : ArithmTestBase {};
+
+TEST_P(Div, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::divide(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::divide(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::divide(gmat1, gmat2, gdst);
+ };
+#endif
+}
+
+TEST_P(Div, Mat_Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ cv::RNG& rng = TS::ptr()->get_rng();
+ double s = rng.uniform(-10.0, 10.0);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::divide(mat1_roi, mat2_roi, dst_roi, s);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::divide(gmat1, gmat2, gdst, s);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ cv::RNG& rng = TS::ptr()->get_rng();
+ double s = rng.uniform(-10.0, 10.0);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::divide(gmat1, gmat2, gdst, s);
+ };
+#endif
+}
+
+
+struct Absdiff : ArithmTestBase {};
+
+TEST_P(Absdiff, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::absdiff(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::absdiff(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::absdiff(gmat1, gmat2, gdst);
+ };
+#endif
+}
+
+TEST_P(Absdiff, Mat_Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::absdiff(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::absdiff(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::absdiff(gmat1, val, gdst);
+ };
+#endif
+}
+
+
+
+struct CartToPolar : ArithmTestBase {};
+
+TEST_P(CartToPolar, angleInDegree)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ cv::Mat cpu_dst1;
+ gdst1_whole.download(cpu_dst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
+ };
+#endif
+}
+
+TEST_P(CartToPolar, angleInRadians)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ cv::Mat cpu_dst1;
+ gdst1_whole.download(cpu_dst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
+ };
+#endif
+}
+
+
+struct PolarToCart : ArithmTestBase {};
+
+TEST_P(PolarToCart, angleInDegree)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ cv::Mat cpu_dst1;
+ gdst1_whole.download(cpu_dst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
+ };
+#endif
+}
+
+TEST_P(PolarToCart, angleInRadians)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ cv::Mat cpu_dst1;
+ gdst1_whole.download(cpu_dst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
+ };
+#endif
+}
+
+
+
+struct Magnitude : ArithmTestBase {};
+
+TEST_P(Magnitude, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::magnitude(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::magnitude(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::magnitude(gmat1, gmat2, gdst);
+ };
+#endif
+}
+
+struct Transpose : ArithmTestBase {};
+
+TEST_P(Transpose, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::transpose(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::transpose(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::transpose(gmat1, gdst);
+ };
+#endif
+}
+
+
+struct Flip : ArithmTestBase {};
+
+TEST_P(Flip, X)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::flip(mat1_roi, dst_roi, 0);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::flip(gmat1, gdst, 0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::flip(gmat1, gdst, 0);
+ };
+#endif
+}
+
+TEST_P(Flip, Y)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::flip(mat1_roi, dst_roi, 1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::flip(gmat1, gdst, 1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::flip(gmat1, gdst, 1);
+ };
+#endif
+}
+
+TEST_P(Flip, BOTH)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::flip(mat1_roi, dst_roi, -1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::flip(gmat1, gdst, -1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::flip(gmat1, gdst, -1);
+ };
+#endif
+}
+
+
+
+struct MinMax : ArithmTestBase {};
+
+TEST_P(MinMax, MAT)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ double minVal, maxVal;
+ cv::Point minLoc, maxLoc;
+ t0 = (double)cvGetTickCount();//cpu start
+ if (mat1.depth() != CV_8S)
+ {
+ cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
+ }
+ else
+ {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ for (int i = 0; i < mat1_roi.rows; ++i)
+ for (int j = 0; j < mat1_roi.cols; ++j)
+ {
+ signed char val = mat1_roi.at<signed char>(i, j);
+ if (val < minVal) minVal = val;
+ if (val > maxVal) maxVal = val;
+ }
+ }
+
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ double minVal_, maxVal_;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ double minVal_, maxVal_;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
+ };
+#endif
+}
+
+TEST_P(MinMax, MASK)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ double minVal, maxVal;
+ cv::Point minLoc, maxLoc;
+ t0 = (double)cvGetTickCount();//cpu start
+ if (mat1.depth() != CV_8S)
+ {
+ cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi);
+ }
+ else
+ {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ for (int i = 0; i < mat1_roi.rows; ++i)
+ for (int j = 0; j < mat1_roi.cols; ++j)
+ {
+ signed char val = mat1_roi.at<signed char>(i, j);
+ unsigned char m = mask_roi.at<unsigned char>(i, j);
+ if (val < minVal && m) minVal = val;
+ if (val > maxVal && m) maxVal = val;
+ }
+ }
+
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ double minVal_, maxVal_;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ double minVal_, maxVal_;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask);
+ };
+#endif
+}
+
+
+struct MinMaxLoc : ArithmTestBase {};
+
+TEST_P(MinMaxLoc, MAT)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ double minVal, maxVal;
+ cv::Point minLoc, maxLoc;
+ int depth = mat1.depth();
+ t0 = (double)cvGetTickCount();//cpu start
+ if (depth != CV_8S)
+ {
+ cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
+ }
+ else
+ {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ for (int i = 0; i < mat1_roi.rows; ++i)
+ for (int j = 0; j < mat1_roi.cols; ++j)
+ {
+ signed char val = mat1_roi.at<signed char>(i, j);
+ if (val < minVal) {
+ minVal = val;
+ minLoc.x = j;
+ minLoc.y = i;
+ }
+ if (val > maxVal) {
+ maxVal = val;
+ maxLoc.x = j;
+ maxLoc.y = i;
+ }
+ }
+ }
+
+
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ double minVal_, maxVal_;
+ cv::Point minLoc_, maxLoc_;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat());
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ double minVal_, maxVal_;
+ cv::Point minLoc_, maxLoc_;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat());
+ };
+#endif
+
+}
+
+
+TEST_P(MinMaxLoc, MASK)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ double minVal, maxVal;
+ cv::Point minLoc, maxLoc;
+ int depth = mat1.depth();
+ t0 = (double)cvGetTickCount();//cpu start
+ if (depth != CV_8S)
+ {
+ cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi);
+ }
+ else
+ {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ for (int i = 0; i < mat1_roi.rows; ++i)
+ for (int j = 0; j < mat1_roi.cols; ++j)
+ {
+ signed char val = mat1_roi.at<signed char>(i, j);
+ unsigned char m = mask_roi.at<unsigned char>(i ,j);
+ if (val < minVal && m) {
+ minVal = val;
+ minLoc.x = j;
+ minLoc.y = i;
+ }
+ if (val > maxVal && m) {
+ maxVal = val;
+ maxLoc.x = j;
+ maxLoc.y = i;
+ }
+ }
+ }
+
+
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ double minVal_, maxVal_;
+ cv::Point minLoc_, maxLoc_;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ double minVal_, maxVal_;
+ cv::Point minLoc_, maxLoc_;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask);
+ };
+#endif
+}
+
+
+struct Sum : ArithmTestBase {};
+
+TEST_P(Sum, MAT)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ Scalar cpures =cv::sum(mat1_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ Scalar gpures=cv::ocl::sum(gmat1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ Scalar gpures=cv::ocl::sum(gmat1);
+ };
+#endif
+}
+
+//TEST_P(Sum, MASK)
+//{
+// for(int j=0; j<LOOP_TIMES; j++)
+// {
+//
+// }
+//}
+
+struct CountNonZero : ArithmTestBase {};
+
+TEST_P(CountNonZero, MAT)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ int cpures =cv::countNonZero(mat1_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ int gpures=cv::ocl::countNonZero(gmat1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ int gpures=cv::ocl::countNonZero(gmat1);
+ };
+#endif
+
+}
+
+
+
+////////////////////////////////phase/////////////////////////////////////////////////
+struct Phase : ArithmTestBase {};
+
+TEST_P(Phase, Mat)
+{
+ if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F)
+ {
+ cout<<"\tUnsupported type\t\n";
+ }
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::phase(mat1_roi,mat2_roi,dst_roi,0);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::phase(gmat1,gmat2,gdst,0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::phase(gmat1,gmat2,gdst,0);
+ };
+#endif
+
+}
+
+
+////////////////////////////////bitwise_and/////////////////////////////////////////////////
+struct Bitwise_and : ArithmTestBase {};
+
+TEST_P(Bitwise_and, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_and(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_and(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_and(gmat1, gmat2, gdst);
+ };
+#endif
+
+}
+
+TEST_P(Bitwise_and, Mat_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_and(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
+ };
+#endif
+}
+
+TEST_P(Bitwise_and, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_and(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_and(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_and(gmat1, val, gdst);
+ };
+#endif
+}
+
+TEST_P(Bitwise_and, Scalar_Mask)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_and(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
+ };
+#endif
+}
+
+
+
+////////////////////////////////bitwise_or/////////////////////////////////////////////////
+
+struct Bitwise_or : ArithmTestBase {};
+
+TEST_P(Bitwise_or, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_or(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_or(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_or(gmat1, gmat2, gdst);
+ };
+#endif
+}
+
+TEST_P(Bitwise_or, Mat_Mask)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_or(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
+ };
+#endif
+}
+TEST_P(Bitwise_or, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_or(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_or(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_or(gmat1, val, gdst);
+ };
+#endif
+}
+
+TEST_P(Bitwise_or, Scalar_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_or(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
+ };
+#endif
+}
+
+
+////////////////////////////////bitwise_xor/////////////////////////////////////////////////
+
+struct Bitwise_xor : ArithmTestBase {};
+
+TEST_P(Bitwise_xor, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
+ };
+#endif
+}
+
+TEST_P(Bitwise_xor, Mat_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
+ };
+#endif
+}
+
+TEST_P(Bitwise_xor, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_xor(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_xor(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_xor(gmat1, val, gdst);
+ };
+#endif
+}
+
+TEST_P(Bitwise_xor, Scalar_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_xor(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
+ };
+#endif
+}
+
+
+////////////////////////////////bitwise_not/////////////////////////////////////////////////
+
+struct Bitwise_not : ArithmTestBase {};
+
+TEST_P(Bitwise_not, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_not(mat1_roi,dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bitwise_not(gmat1,gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bitwise_not(gmat1,gdst);
+ };
+#endif
+}
+
+////////////////////////////////compare/////////////////////////////////////////////////
+PARAM_TEST_CASE ( CompareTestBase, MatType, bool)
+{
+ int type;
+ cv::Scalar val;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::Mat dst1; //bak, for two outputs
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int dstx;
+ int dsty;
+ int maskx;
+ int masky;
+
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ cv::Mat dst1_roi; //bak
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+ cv::ocl::oclMat gdst1_whole; //bak
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gmat2;
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gdst1; //bak
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ //type = GET_PARAM(0);
+ type = CV_8UC1;
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+ mat2 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ dst1 = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums>0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols-1;
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src2x = 1;
+ src1y = 1;
+ src2y = 1;
+ dstx = 1;
+ dsty =1;
+ maskx =1;
+ masky =1;
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src2x = 0;
+ src1y = 0;
+ src2y = 0;
+ dstx = 0;
+ dsty = 0;
+ maskx =0;
+ masky =0;
+ };
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ //mat2_roi = mat2(Rect(src2x,src2y,256,1));
+ mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+ mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+ dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+ //gdst_whole = dst;
+ //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gdst1_whole = dst1;
+ //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gmat1 = mat1_roi;
+ //gmat2 = mat2_roi;
+ //gmask = mask_roi;
+ }
+
+};
+struct Compare : CompareTestBase {};
+
+TEST_P(Compare, Mat)
+{
+ if(mat1.type()==CV_8SC1)
+ {
+ cout << "\tUnsupported type\t\n";
+ }
+
+ int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE};
+ //const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
+ int cmp_num = sizeof(cmp_codes) / sizeof(int);
+ for (int i = 0; i < cmp_num; ++i)
+ {
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=1;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::compare(mat1_roi,mat2_roi,dst_roi,cmp_codes[i]);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]);
+ };
+#endif
+ }
+
+}
+
+struct Pow : ArithmTestBase {};
+
+TEST_P(Pow, Mat)
+{
+ if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F)
+ {
+ cout<<"\tUnsupported type\t\n";
+ }
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ double p=4.5;
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::pow(mat1_roi,p,dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::pow(gmat1,p,gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ double p=4.5;
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::pow(gmat1,p,gdst);
+ };
+#endif
+}
+
+
+struct MagnitudeSqr : ArithmTestBase {};
+
+TEST_P(MagnitudeSqr, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ for(int i = 0;i < mat1.rows;++i)
+ for(int j = 0;j < mat1.cols;++j)
+ {
+ float val1 = mat1.at<float>(i,j);
+ float val2 = mat2.at<float>(i,j);
+
+ ((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2;
+
+ }
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::magnitudeSqr(clmat1,clmat2, cldst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ cldst.download(cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::magnitudeSqr(clmat1,clmat2, cldst);
+ };
+#endif
+
+}
+
+
+struct AddWeighted : ArithmTestBase {};
+
+TEST_P(AddWeighted, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ double alpha=2.0,beta=1.0,gama=3.0;
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::addWeighted(mat1_roi,alpha,mat2_roi,beta,gama,dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::addWeighted(gmat1,alpha,gmat2,beta,gama, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download(cpu_dst);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+}
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ double alpha=2.0,beta=1.0,gama=3.0;
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::addWeighted(gmat1,alpha, gmat2,beta,gama, gdst);
+ // double alpha=2.0,beta=1.0,gama=3.0;
+ // cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+ // if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ // cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+ };
+#endif
+
+}
+/*
+struct AddWeighted : ArithmTestBase {};
+
+TEST_P(AddWeighted, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ double alpha=2.0,beta=1.0,gama=3.0;
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::addWeighted(mat1,alpha,mat2,beta,gama,dst);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ cldst.download(cpu_dst);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+
+#else
+ //for(int j = 0; j < 2; j ++)
+ // {
+ double alpha=2.0,beta=1.0,gama=3.0;
+ cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+ //if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+ // };
+#endif
+
+}
+
+*/
+//********test****************
+
+INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(
+ Values(CV_8UC1, CV_8UC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(
+ Values(CV_32FC1, CV_64FC1),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(
+ Values(CV_32FC1, CV_64FC1),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
+ Values(CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(
+ Values(CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(
+ Values(CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
+ Values(CV_8UC1, CV_32FC1),
+ Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(
+ Values(CV_8UC1, CV_32FC1),
+ Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(
+ Values(CV_8U, CV_32S, CV_32F),
+ Values(false)));
+
+INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(
+ Values(CV_8U, CV_32S, CV_32F),
+ Values(false)));
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
+ Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
+ Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
+ Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1,CV_16UC1,CV_16SC1,CV_32SC1,CV_32FC1,CV_64FC1), Values(false)));
+//Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC4), Values(false)));
+//Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(
+ Values(CV_8UC1, CV_32SC1, CV_32FC1),
+ Values(false))); // Values(false) is the reserved parameter
+
+
+
+
+#endif // HAVE_OPENCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Niko Li, newlife20080214@gmail.com
+// Jia Haipeng, jiahaipeng95@gmail.com
+// Zero Lin, Zero.Lin@amd.com
+// Zhang Ying, zhangying913@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+//using namespace cv::ocl;
+
+PARAM_TEST_CASE(FilterTestBase, MatType, bool)
+{
+ int type;
+ cv::Scalar val;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::Mat dst1; //bak, for two outputs
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int dstx;
+ int dsty;
+ int maskx;
+ int masky;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ cv::Mat dst1_roi; //bak
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+ cv::ocl::oclMat gdst1_whole; //bak
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gmat2;
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gdst1; //bak
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ mat2 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ dst1 = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ }
+
+ void random_roi()
+ {
+ cv::RNG& rng = TS::ptr()->get_rng();
+
+ //randomize ROI
+ roicols = rng.uniform(1, mat1.cols);
+ roirows = rng.uniform(1, mat1.rows);
+ src1x = rng.uniform(0, mat1.cols - roicols);
+ src1y = rng.uniform(0, mat1.rows - roirows);
+ src2x = rng.uniform(0, mat2.cols - roicols);
+ src2y = rng.uniform(0, mat2.rows - roirows);
+ dstx = rng.uniform(0, dst.cols - roicols);
+ dsty = rng.uniform(0, dst.rows - roirows);
+ maskx = rng.uniform(0, mask.cols - roicols);
+ masky = rng.uniform(0, mask.rows - roirows);
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+ mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+ dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
+
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ }
+
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// blur
+
+PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
+{
+ int type;
+ cv::Size ksize;
+ int bordertype;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ ksize = GET_PARAM(1);
+ bordertype = GET_PARAM(2);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols-1;
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty =1;
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+ }
+
+};
+
+TEST_P(Blur, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::blur(mat1_roi, dst_roi, ksize, Point(-1,-1), bordertype);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype);
+ };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+//Laplacian
+
+PARAM_TEST_CASE(LaplacianTestBase, MatType, int)
+{
+ int type;
+ int ksize;
+
+ //src mat
+ cv::Mat mat;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ ksize = GET_PARAM(1);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(2560, 2560);
+
+ mat = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat.cols-1;
+ roirows = mat.rows-1;
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty =1;
+ }else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+ }
+
+};
+
+struct Laplacian : LaplacianTestBase {};
+
+TEST_P(Laplacian, Accuracy)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat = mat_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat = mat_roi;
+
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
+ };
+#endif
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// erode & dilate
+
+PARAM_TEST_CASE(ErodeDilateBase, MatType, bool)
+{
+ int type;
+ //int iterations;
+
+ //erode or dilate kernel
+ cv::Mat kernel;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ // iterations = GET_PARAM(1);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(2560, 2560);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
+ kernel = randomMat(rng, Size(3,3), CV_8UC1, 0, 3, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols-1;
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty =1;
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+ }
+
+};
+
+// erode
+
+struct Erode : ErodeDilateBase{};
+
+TEST_P(Erode, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::erode(mat1_roi, dst_roi, kernel);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::erode(gmat1, gdst, kernel);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::erode(gmat1, gdst, kernel);
+ };
+#endif
+
+}
+
+// dilate
+
+struct Dilate : ErodeDilateBase{};
+
+TEST_P(Dilate, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::dilate(mat1_roi, dst_roi, kernel);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::dilate(gmat1, gdst, kernel);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::dilate(gmat1, gdst, kernel);
+ };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// Sobel
+
+PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
+{
+ int type;
+ int dx, dy, ksize, bordertype;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ dx = GET_PARAM(1);
+ dy = GET_PARAM(2);
+ ksize = GET_PARAM(3);
+ bordertype = GET_PARAM(4);
+ dx = 2; dy=0;
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(2560, 2560);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols-1;
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty =1;
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+ }
+
+};
+
+TEST_P(Sobel, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype);
+ };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// Scharr
+
+PARAM_TEST_CASE(Scharr, MatType, int, int, int)
+{
+ int type;
+ int dx, dy, bordertype;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ dx = GET_PARAM(1);
+ dy = GET_PARAM(2);
+ bordertype = GET_PARAM(3);
+ dx = 1; dy=0;
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(2560, 2560);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols-1;
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty =1;
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+ }
+};
+
+TEST_P(Scharr, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype);
+ };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// GaussianBlur
+
+PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int)
+{
+ int type;
+ cv::Size ksize;
+ int bordertype;
+
+ double sigma1, sigma2;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ ksize = GET_PARAM(1);
+ bordertype = GET_PARAM(2);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(2560, 2560);
+
+ sigma1 = rng.uniform(0.1, 1.0);
+ sigma2 = rng.uniform(0.1, 1.0);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols-1;
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty =1;
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+ }
+
+};
+
+TEST_P(GaussianBlur, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
+ };
+#endif
+
+}
+
+//************test**********
+
+INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(cv::Size(3, 3)/*, cv::Size(5, 5), cv::Size(7, 7)*/),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(1/*, 3*/)));
+
+//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
+
+INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+
+//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
+
+INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+
+
+INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(1, 2), Values(0, 1), Values(3, 5, 7), Values((MatType)cv::BORDER_CONSTANT,
+ (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+
+
+#endif // HAVE_OPENCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/objdetect/objdetect.hpp"
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv;
+
+struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
+
+PARAM_TEST_CASE(HaarTestBase, int, int)
+{
+ std::vector<cv::ocl::Info> oclinfo;
+ cv::ocl::OclCascadeClassifier cascade, nestedCascade;
+ cv::CascadeClassifier cpucascade, cpunestedCascade;
+// Mat img;
+
+ double scale;
+ int index;
+
+ virtual void SetUp()
+ {
+ scale = 1.1;
+
+#if WIN32
+ string cascadeName="E:\\opencvbuffer\\trunk\\data\\haarcascades\\haarcascade_frontalface_alt.xml";
+#else
+ string cascadeName="../data/haarcascades/haarcascade_frontalface_alt.xml";
+#endif
+
+ if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
+ {
+ cout << "ERROR: Could not load classifier cascade" << endl;
+ cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
+ " [--nested-cascade[=nested_cascade_path]]\n"
+ " [--scale[=<image scale>\n"
+ " [filename|camera_index]\n" << endl ;
+
+ return;
+ }
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums>0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath("E:\\");
+ }
+};
+
+////////////////////////////////faceDetect/////////////////////////////////////////////////
+
+struct Haar : HaarTestBase {};
+
+TEST_P(Haar, FaceDetect)
+{
+ for(int index = 1;index < 2; index++)
+ {
+ Mat img;
+ char buff[256];
+#if WIN32
+ sprintf(buff,"E:\\myDataBase\\%d.jpg",index);
+ img = imread( buff, 1 );
+#else
+ sprintf(buff,"%d.jpg",index);
+ img = imread( buff, 1 );
+ std::cout << "Now test " << index << ".jpg" <<std::endl;
+#endif
+ if(img.empty())
+ {
+ std::cout << "Couldn't read test" << index <<".jpg" << std::endl;
+ continue;
+ }
+
+ int i = 0;
+ double t = 0;
+ vector<Rect> faces;
+
+ const static Scalar colors[] = { CV_RGB(0,0,255),
+ CV_RGB(0,128,255),
+ CV_RGB(0,255,255),
+ CV_RGB(0,255,0),
+ CV_RGB(255,128,0),
+ CV_RGB(255,255,0),
+ CV_RGB(255,0,0),
+ CV_RGB(255,0,255)} ;
+
+ Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+ MemStorage storage(cvCreateMemStorage(0));
+ cvtColor( img, gray, CV_BGR2GRAY );
+ resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+ equalizeHist( smallImg, smallImg );
+ CvMat _image = smallImg;
+
+ Mat tempimg(&_image, false);
+
+ cv::ocl::oclMat image(tempimg);
+ CvSeq* _objects;
+
+#if 1
+ for(int k= 0; k<10; k++)
+ {
+ t = (double)cvGetTickCount();
+ _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
+ 2, 0
+ |CV_HAAR_SCALE_IMAGE
+ , Size(30,30), Size(0, 0) );
+
+ t = (double)cvGetTickCount() - t ;
+ printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
+ }
+
+#else
+ cpucascade.detectMultiScale( image, faces, 1.1,
+ 2, 0
+ |CV_HAAR_SCALE_IMAGE
+ , Size(30,30), Size(0, 0) );
+
+#endif
+ vector<CvAvgComp> vecAvgComp;
+ Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
+ faces.resize(vecAvgComp.size());
+ std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
+
+ for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
+ {
+ Mat smallImgROI;
+ vector<Rect> nestedObjects;
+ Point center;
+ Scalar color = colors[i%8];
+ int radius;
+ center.x = cvRound((r->x + r->width*0.5)*scale);
+ center.y = cvRound((r->y + r->height*0.5)*scale);
+ radius = cvRound((r->width + r->height)*0.25*scale);
+ circle( img, center, radius, color, 3, 8, 0 );
+ }
+
+#if WIN32
+ sprintf(buff,"E:\\result1\\%d.jpg",index);
+ imwrite(buff,img);
+#else
+ sprintf(buff,"testdet_%d.jpg",index);
+ imwrite(buff,img);
+#endif
+ }
+}
+
+
+//INSTANTIATE_TEST_CASE_P(HaarTestBase, Haar, Combine(Values(1),
+// Values(1)));
+
+
+#endif // HAVE_OPENCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Niko Li, newlife20080214@gmail.com
+// Jia Haipeng, jiahaipeng95@gmail.com
+// Shengen Yan, yanshengen@gmail.com
+// Jiang Liyuan, lyuan001.good@163.com
+// Rock Li, Rock.Li@amd.com
+// Zailong Wu, bullet@yeah.net
+// Xu Pang, pangxu010@163.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+
+
+MatType nulltype = -1;
+
+#define ONE_TYPE(type) testing::ValuesIn(typeVector(type))
+#define NULL_TYPE testing::ValuesIn(typeVector(nulltype))
+
+
+vector<MatType> typeVector(MatType type)
+{
+ vector<MatType> v;
+ v.push_back(type);
+ return v;
+}
+
+
+PARAM_TEST_CASE(ImgprocTestBase, MatType,MatType,MatType,MatType,MatType, bool)
+{
+ int type1,type2,type3,type4,type5;
+ cv::Scalar val;
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int dstx;
+ int dsty;
+ int dst1x;
+ int dst1y;
+ int maskx;
+ int masky;
+
+ //mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::Mat dst1; //bak, for two outputs
+
+ //mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ cv::Mat dst1_roi; //bak
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl mat
+ cv::ocl::oclMat clmat1;
+ cv::ocl::oclMat clmat2;
+ cv::ocl::oclMat clmask;
+ cv::ocl::oclMat cldst;
+ cv::ocl::oclMat cldst1; //bak
+
+ //ocl mat with roi
+ cv::ocl::oclMat clmat1_roi;
+ cv::ocl::oclMat clmat2_roi;
+ cv::ocl::oclMat clmask_roi;
+ cv::ocl::oclMat cldst_roi;
+ cv::ocl::oclMat cldst1_roi;
+
+ virtual void SetUp()
+ {
+ type1 = GET_PARAM(0);
+ type2 = GET_PARAM(1);
+ type3 = GET_PARAM(2);
+ type4 = GET_PARAM(3);
+ type5 = GET_PARAM(4);
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+ double min = 1,max = 20;
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums>0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ if(type1!=nulltype)
+ {
+ mat1 = randomMat(rng, size, type1, min, max, false);
+ clmat1 = mat1;
+ }
+ if(type2!=nulltype)
+ {
+ mat2 = randomMat(rng, size, type2, min, max, false);
+ clmat2 = mat2;
+ }
+ if(type3!=nulltype)
+ {
+ dst = randomMat(rng, size, type3, min, max, false);
+ cldst = dst;
+ }
+ if(type4!=nulltype)
+ {
+ dst1 = randomMat(rng, size, type4, min, max, false);
+ cldst1 = dst1;
+ }
+ if(type5!=nulltype)
+ {
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+ cv::threshold(mask, mask, 0.5, 255., type5);
+ clmask = mask;
+ }
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ }
+
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols-1; //start
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src2x = 1;
+ src1y = 1;
+ src2y = 1;
+ dstx = 1;
+ dsty =1;
+ dst1x = 1;
+ dst1y =1;
+ maskx =1;
+ masky =1;
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src2x = 0;
+ src1y = 0;
+ src2y = 0;
+ dstx = 0;
+ dsty = 0;
+ dst1x =0;
+ dst1y =0;
+ maskx =0;
+ masky =0;
+ };
+
+ if(type1!=nulltype)
+ {
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ if(type2!=nulltype)
+ {
+ mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+ //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
+ }
+ if(type3!=nulltype)
+ {
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+ //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+ }
+ if(type4!=nulltype)
+ {
+ dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+ //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
+ }
+ if(type5!=nulltype)
+ {
+ mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+ //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
+ }
+ }
+
+ void random_roi()
+ {
+ cv::RNG& rng = TS::ptr()->get_rng();
+
+ //randomize ROI
+ roicols = rng.uniform(1, mat1.cols);
+ roirows = rng.uniform(1, mat1.rows);
+ src1x = rng.uniform(0, mat1.cols - roicols);
+ src1y = rng.uniform(0, mat1.rows - roirows);
+ src2x = rng.uniform(0, mat2.cols - roicols);
+ src2y = rng.uniform(0, mat2.rows - roirows);
+ dstx = rng.uniform(0, dst.cols - roicols);
+ dsty = rng.uniform(0, dst.rows - roirows);
+ dst1x = rng.uniform(0, dst1.cols - roicols);
+ dst1y = rng.uniform(0, dst1.rows - roirows);
+ maskx = rng.uniform(0, mask.cols - roicols);
+ masky = rng.uniform(0, mask.rows - roirows);
+
+ if(type1!=nulltype)
+ {
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ if(type2!=nulltype)
+ {
+ mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+ //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
+ }
+ if(type3!=nulltype)
+ {
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+ //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+ }
+ if(type4!=nulltype)
+ {
+ dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+ //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
+ }
+ if(type5!=nulltype)
+ {
+ mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+ //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
+ }
+ }
+};
+////////////////////////////////equalizeHist//////////////////////////////////////////
+
+struct equalizeHist : ImgprocTestBase {};
+
+TEST_P(equalizeHist, MatType)
+{
+ if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
+ {
+ cout<<"Unsupported type"<<endl;
+ EXPECT_DOUBLE_EQ(0.0, 0.0);
+ }
+ else
+ {
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::equalizeHist(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ //cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
+ };
+#endif
+ }
+}
+
+
+////////////////////////////////bilateralFilter////////////////////////////////////////////
+
+struct bilateralFilter : ImgprocTestBase {};
+
+TEST_P(bilateralFilter, Mat)
+{
+ double sigmacolor = 50.0;
+ int radius = 9;
+ int d = 2*radius+1;
+ double sigmaspace = 20.0;
+ int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
+ //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+ if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
+ {
+ cout<<"Unsupported type"<<endl;
+ EXPECT_DOUBLE_EQ(0.0, 0.0);
+ }
+ else
+ {
+ for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bilateralFilter(mat1_roi, dst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ };
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
+ };
+
+#endif
+ };
+
+ }
+}
+
+////////////////////////////////copyMakeBorder////////////////////////////////////////////
+
+struct CopyMakeBorder : ImgprocTestBase {};
+
+TEST_P(CopyMakeBorder, Mat)
+{
+ int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
+ //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+
+ if ((mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1) || mat1.type() != dst.type())
+ {
+ cout<<"Unsupported type"<<endl;
+ EXPECT_DOUBLE_EQ(0.0, 0.0);
+ }
+ else
+ {
+ for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::copyMakeBorder(mat1_roi, dst_roi, 7,5,5,7, bordertype[i],cv::Scalar(1.0));
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi,7,5,5,7, bordertype[i],cv::Scalar(1.0));
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ };
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi,7,5,5,7, bordertype[i],cv::Scalar(1.0));
+ };
+#endif
+ };
+ }
+}
+
+////////////////////////////////cornerMinEigenVal//////////////////////////////////////////
+
+struct cornerMinEigenVal : ImgprocTestBase {};
+
+TEST_P(cornerMinEigenVal, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ int blockSize = 7, apertureSize= 1 + 2 * (rand() % 4);
+ int borderType = cv::BORDER_REFLECT;
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ int blockSize = 7, apertureSize= 1 + 2 * (rand() % 4);
+ int borderType = cv::BORDER_REFLECT;
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ };
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
+ };
+#endif
+}
+
+
+////////////////////////////////cornerHarris//////////////////////////////////////////
+
+struct cornerHarris : ImgprocTestBase {};
+
+TEST_P(cornerHarris, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ int blockSize = 7, apertureSize= 3;
+ int borderType = cv::BORDER_REFLECT;
+ double kk = 2;
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, kk, borderType);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ double kk = 2;
+ int blockSize = 7, apertureSize= 3;
+ int borderType = cv::BORDER_REFLECT;
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ };
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
+ };
+#endif
+
+}
+
+
+////////////////////////////////integral/////////////////////////////////////////////////
+
+struct integral : ImgprocTestBase {};
+
+TEST_P(integral, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::integral(mat1_roi, dst_roi, dst1_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cv::Mat cpu_cldst1;
+ cldst.download(cpu_cldst);//download
+ cldst1.download(cpu_cldst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ if(type1!=nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ };
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
+ };
+#endif
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// warpAffine & warpPerspective
+
+PARAM_TEST_CASE(WarpTestBase, MatType, int)
+{
+ int type;
+ cv::Size size;
+ int interpolation;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int src_roicols;
+ int src_roirows;
+ int dst_roicols;
+ int dst_roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ //dsize = GET_PARAM(1);
+ interpolation = GET_PARAM(1);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ size = cv::Size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ src_roicols = mat1.cols-1; //start
+ src_roirows = mat1.rows-1;
+ dst_roicols=dst.cols-1;
+ dst_roirows=dst.rows-1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty =1;
+
+ }else
+ {
+ src_roicols = mat1.cols;
+ src_roirows = mat1.rows;
+ dst_roicols=dst.cols;
+ dst_roirows=dst.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+
+ };
+ mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows));
+ dst_roi = dst(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+
+ }
+
+};
+
+/////warpAffine
+
+struct WarpAffine : WarpTestBase{};
+
+TEST_P(WarpAffine, Mat)
+{
+ static const double coeffs[2][3] =
+ {
+ {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+ {sin(3.14 / 6), cos(3.14 / 6), -100.0}
+ };
+ Mat M(2, 3, CV_64F, (void*)coeffs);
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
+ };
+#endif
+
+}
+
+
+// warpPerspective
+
+struct WarpPerspective : WarpTestBase{};
+
+TEST_P(WarpPerspective, Mat)
+{
+ static const double coeffs[3][3] =
+ {
+ {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+ {sin(3.14 / 6), cos(3.14 / 6), -100.0},
+ {0.0, 0.0, 1.0}
+ };
+ Mat M(3, 3, CV_64F, (void*)coeffs);
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
+ };
+#endif
+
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// resize
+
+PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
+{
+ int type;
+ cv::Size dsize;
+ double fx, fy;
+ int interpolation;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int src_roicols;
+ int src_roirows;
+ int dst_roicols;
+ int dst_roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ dsize = GET_PARAM(1);
+ fx = GET_PARAM(2);
+ fy = GET_PARAM(3);
+ interpolation = GET_PARAM(4);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ if(dsize == cv::Size() && !(fx > 0 && fy > 0))
+ {
+ cout << "invalid dsize and fx fy" << endl;
+ return;
+ }
+
+ if(dsize == cv::Size())
+ {
+ dsize.width = (int)(size.width * fx);
+ dsize.height = (int)(size.height * fy);
+ }
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, dsize, type, 5, 16, false);
+
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ src_roicols = mat1.cols-1; //start
+ src_roirows = mat1.rows-1;
+ dst_roicols=dst.cols-1;
+ dst_roirows=dst.rows-1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty =1;
+
+ }else
+ {
+ src_roicols = mat1.cols;
+ src_roirows = mat1.rows;
+ dst_roicols=dst.cols;
+ dst_roirows=dst.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+
+ };
+ mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows));
+ dst_roi = dst(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+
+ }
+
+};
+
+TEST_P(Resize, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
+ gmat1 = mat1_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
+ };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+//threshold
+
+PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
+{
+ int type;
+ int threshOp;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ threshOp = GET_PARAM(1);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols-1; //start
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty =1;
+
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+
+ };
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+
+ }
+};
+
+TEST_P(Threshold, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ double maxVal = randomDouble(20.0, 127.0);
+ double thresh = randomDouble(0.0, maxVal);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ double maxVal = randomDouble(20.0, 127.0);
+ double thresh = randomDouble(0.0, maxVal);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
+ };
+#endif
+
+}
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//meanShift
+
+PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria)
+{
+ int type, typeCoor;
+ int sp, sr;
+ cv::TermCriteria crit;
+ //src mat
+ cv::Mat src;
+ cv::Mat dst;
+ cv::Mat dstCoor;
+
+ //set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat src_roi;
+ cv::Mat dst_roi;
+ cv::Mat dstCoor_roi;
+
+ //ocl dst mat
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gdstCoor;
+
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl mat with roi
+ cv::ocl::oclMat gsrc_roi;
+ cv::ocl::oclMat gdst_roi;
+ cv::ocl::oclMat gdstCoor_roi;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ typeCoor = GET_PARAM(1);
+ sp = GET_PARAM(2);
+ sr = GET_PARAM(3);
+ crit = GET_PARAM(4);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+
+ // MWIDTH=256, MHEIGHT=256. defined in utility.hpp
+ cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+
+ src = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ dstCoor = randomMat(rng, size, typeCoor, 5, 16, false);
+
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ //randomize ROI
+ roicols = src.cols - 1;
+ roirows = src.rows - 1;
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty = 1;
+ }else
+ {
+ roicols = src.cols;
+ roirows = src.rows;
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ src_roi = src(Rect(srcx, srcy, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+ dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows));
+
+ gdst = dst;
+ gdstCoor = dstCoor;
+ }
+};
+
+/////////////////////////meanShiftFiltering/////////////////////////////
+struct meanShiftFiltering : meanShiftTestBase {};
+
+TEST_P(meanShiftFiltering, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++)
+ {
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ gsrc_roi = src_roi;
+ gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_gdst;
+ gdst.download(cpu_gdst);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+
+ gsrc_roi = src_roi;
+ gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
+ };
+#endif
+
+}
+
+///////////////////////////meanShiftProc//////////////////////////////////
+struct meanShiftProc : meanShiftTestBase {};
+
+TEST_P(meanShiftProc, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++)
+ {
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ gsrc_roi = src_roi;
+ gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
+ gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
+
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_gdstCoor;
+ gdstCoor.download(cpu_gdstCoor);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+
+ gsrc_roi = src_roi;
+ gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
+ gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
+ };
+#endif
+
+}
+
+
+
+//************test*******************
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
+ ONE_TYPE(CV_8UC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_8UC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
+
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
+// ONE_TYPE(CV_8UC1),
+// NULL_TYPE,
+// ONE_TYPE(CV_8UC1),
+// NULL_TYPE,
+// NULL_TYPE,
+// Values(false))); // Values(false) is the reserved parameter
+//
+//
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
+// Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/),
+// NULL_TYPE,
+// Values(CV_8UC1,CV_8UC4/*,CV_32SC1*/),
+// NULL_TYPE,
+// NULL_TYPE,
+// Values(false))); // Values(false) is the reserved parameter
+
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
+// Values(CV_8UC1,CV_32FC1),
+// NULL_TYPE,
+// ONE_TYPE(CV_32FC1),
+// NULL_TYPE,
+// NULL_TYPE,
+// Values(false))); // Values(false) is the reserved parameter
+//
+//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
+// Values(CV_8UC1,CV_32FC1),
+// NULL_TYPE,
+// ONE_TYPE(CV_32FC1),
+// NULL_TYPE,
+// NULL_TYPE,
+// Values(false))); // Values(false) is the reserved parameter
+
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
+ ONE_TYPE(CV_8UC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32SC1),
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
+ (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+ (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
+ (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
+ (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+ (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()),
+ Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));
+
+
+INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
+ Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY),
+ ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC),
+ ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))));
+
+INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine(
+ ONE_TYPE(CV_8UC4),
+ ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering
+ Values(5),
+ Values(6),
+ Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
+ ));
+
+INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
+ ONE_TYPE(CV_8UC4),
+ ONE_TYPE(CV_16SC2),
+ Values(5),
+ Values(6),
+ Values(cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 5, 1))
+ ));
+
+
+#endif // HAVE_OPENCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv::ocl;
+////////////////////////////////converto/////////////////////////////////////////////////
+PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
+{
+ int type;
+ int dst_type;
+
+ //src mat
+ cv::Mat mat;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ dst_type = GET_PARAM(1);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat.cols-1; //start
+ roirows = mat.rows-1;
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty =1;
+ }else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+ //gdst_whole = dst;
+ //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gmat = mat_roi;
+ }
+};
+
+
+struct ConvertTo :ConvertToTestBase {};
+
+TEST_P(ConvertTo, Accuracy)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.convertTo(dst_roi, dst_type);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat = mat_roi;
+ t2=(double)cvGetTickCount();//kernel
+ gmat.convertTo(gdst, dst_type);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat = mat_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ gmat.convertTo(gdst, dst_type);
+ };
+#endif
+
+}
+
+
+///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
+
+PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
+{
+ int type;
+
+ cv::Mat mat;
+ cv::Mat mask;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+ int maskx;
+ int masky;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat.cols-1; //start
+ roirows = mat.rows-1;
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty =1;
+ maskx = 1;
+ masky = 1;
+ }else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ maskx = 0;
+ masky = 0;
+ };
+
+ mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+ mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+
+ //gdst_whole = dst;
+ //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gmat = mat_roi;
+ //gmask = mask_roi;
+ }
+};
+
+struct CopyTo :CopyToTestBase {};
+
+TEST_P(CopyTo, Without_mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.copyTo(dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat = mat_roi;
+ t2=(double)cvGetTickCount();//kernel
+ gmat.copyTo(gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat = mat_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ gmat.copyTo(gdst);
+ };
+#endif
+}
+
+TEST_P(CopyTo, With_mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.copyTo(dst_roi,mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat = mat_roi;
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ gmat.copyTo(gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ gmat = mat_roi;
+ gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ gmat.copyTo(gdst, gmask);
+ };
+#endif
+}
+
+///////////////////////////////////////////copyto/////////////////////////////////////////////////////////////
+
+PARAM_TEST_CASE(SetToTestBase, MatType, bool)
+{
+ int type;
+ cv::Scalar val;
+
+ cv::Mat mat;
+ cv::Mat mask;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int maskx;
+ int masky;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+ cv::Mat mask_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gmat_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat.cols-1; //start
+ roirows = mat.rows-1;
+ srcx = 1;
+ srcy = 1;
+ maskx = 1;
+ masky = 1;
+ }else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ maskx = 0;
+ masky = 0;
+ };
+
+ mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+ mask_roi = mask(Rect(maskx,masky,roicols,roirows));
+
+ //gmat_whole = mat;
+ //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+ //gmask = mask_roi;
+ }
+};
+
+struct SetTo :SetToTestBase {};
+
+TEST_P(SetTo, Without_mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.setTo(val);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat_whole = mat;
+ gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+ t2=(double)cvGetTickCount();//kernel
+ gmat.setTo(val);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gmat_whole.download(cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat_whole = mat;
+ gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ gmat.setTo(val);
+ };
+#endif
+}
+
+TEST_P(SetTo, With_mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.setTo(val, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat_whole = mat;
+ gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+ gmask = mask_roi;
+ t2=(double)cvGetTickCount();//kernel
+ gmat.setTo(val, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gmat_whole.download(cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat_whole = mat;
+ gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+ gmask = mask_roi;
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ gmat.setTo(val, gmask);
+ };
+#endif
+}
+
+//**********test************
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)));
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+#endif
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Jia Haipeng, jiahaipeng95@gmail.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+using namespace cvtest;
+using namespace testing;
+using namespace std;
+using namespace cv::ocl;
+PARAM_TEST_CASE(MergeTestBase, MatType, int)
+{
+ int type;
+ int channels;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mat3;
+ cv::Mat mat4;
+
+ //dst mat
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int src3x;
+ int src3y;
+ int src4x;
+ int src4y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mat3_roi;
+ cv::Mat mat4_roi;
+
+ //dst mat with roi
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gmat2;
+ cv::ocl::oclMat gmat3;
+ cv::ocl::oclMat gmat4;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
+ }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols-1; //start
+ roirows = mat1.rows-1;
+ src1x = 1;
+ src1y = 1;
+ src2x = 1;
+ src2y = 1;
+ src3x = 1;
+ src3y = 1;
+ src4x = 1;
+ src4y = 1;
+ dstx = 1;
+ dsty =1;
+
+ }else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ src2x = 0;
+ src2y = 0;
+ src3x = 0;
+ src3y = 0;
+ src4x = 0;
+ src4y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
+ mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
+ mat3_roi = mat3(Rect(src3x,src3y,roicols,roirows));
+ mat4_roi = mat4(Rect(src4x,src4y,roicols,roirows));
+
+
+ dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
+ }
+
+};
+
+struct Merge : MergeTestBase {};
+
+TEST_P(Merge, Accuracy)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ std::vector<cv::Mat> dev_src;
+ dev_src.push_back(mat1_roi);
+ dev_src.push_back(mat2_roi);
+ dev_src.push_back(mat3_roi);
+ dev_src.push_back(mat4_roi);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::merge(dev_src, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1 ]
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmat3 = mat3_roi;
+ gmat4 = mat4_roi;
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ std::vector<cv::ocl::oclMat> dev_gsrc;
+ dev_gsrc.push_back(gmat1);
+ dev_gsrc.push_back(gmat2);
+ dev_gsrc.push_back(gmat3);
+ dev_gsrc.push_back(gmat4);
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::merge(dev_gsrc, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmat3 = mat3_roi;
+ gmat4 = mat4_roi;
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ std::vector<cv::ocl::oclMat> dev_gsrc;
+ dev_gsrc.push_back(gmat1);
+ dev_gsrc.push_back(gmat2);
+ dev_gsrc.push_back(gmat3);
+ dev_gsrc.push_back(gmat4);
+
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::merge(dev_gsrc, gdst);
+ };
+#endif
+}
+
+
+PARAM_TEST_CASE(SplitTestBase, MatType, int)
+{
+ int type;
+ int channels;
+
+ //src mat
+ cv::Mat mat;
+
+ //dstmat
+ cv::Mat dst1;
+ cv::Mat dst2;
+ cv::Mat dst3;
+ cv::Mat dst4;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dst1x;
+ int dst1y;
+ int dst2x;
+ int dst2y;
+ int dst3x;
+ int dst3y;
+ int dst4x;
+ int dst4y;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+
+ //dst mat with roi
+ cv::Mat dst1_roi;
+ cv::Mat dst2_roi;
+ cv::Mat dst3_roi;
+ cv::Mat dst4_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst1_whole;
+ cv::ocl::oclMat gdst2_whole;
+ cv::ocl::oclMat gdst3_whole;
+ cv::ocl::oclMat gdst4_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gdst1;
+ cv::ocl::oclMat gdst2;
+ cv::ocl::oclMat gdst3;
+ cv::ocl::oclMat gdst4;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+
+ cv::RNG& rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+ dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ int devnums = getDevice(oclinfo);
+ CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat.cols-1; //start
+ roirows = mat.rows-1;
+ srcx = 1;
+ srcx = 1;
+ dst1x = 1;
+ dst1y =1;
+ dst2x = 1;
+ dst2y =1;
+ dst3x = 1;
+ dst3y =1;
+ dst4x = 1;
+ dst4y =1;
+ }else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ dst1x = 0;
+ dst1y = 0;
+ dst2x = 0;
+ dst2y =0;
+ dst3x = 0;
+ dst3y =0;
+ dst4x = 0;
+ dst4y =0;
+ };
+
+ mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
+
+ dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
+ dst2_roi = dst2(Rect(dst2x,dst2y,roicols,roirows));
+ dst3_roi = dst3(Rect(dst3x,dst3y,roicols,roirows));
+ dst4_roi = dst4(Rect(dst4x,dst4y,roicols,roirows));
+ }
+
+};
+
+struct Split :SplitTestBase {};
+
+TEST_P(Split, Accuracy)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick=0;
+ double totalgputick=0;
+ double totalgputick_kernel=0;
+ double t0=0;
+ double t1=0;
+ double t2=0;
+ for(int k=0;k<2;k++){
+ totalcputick=0;
+ totalgputick=0;
+ totalgputick_kernel=0;
+ for(int j = 0; j < LOOP_TIMES+1; j ++)
+ {
+ Has_roi(k);
+ cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
+ cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::split(mat_roi, dev_dst);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
+
+ gdst2_whole = dst2;
+ gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
+
+ gdst3_whole = dst3;
+ gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
+
+ gdst4_whole = dst4;
+ gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
+
+ gmat = mat_roi;
+ t2=(double)cvGetTickCount();//kernel
+ cv::ocl::split(gmat, dev_gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst1;
+ cv::Mat cpu_dst2;
+ cv::Mat cpu_dst3;
+ cv::Mat cpu_dst4;
+ gdst1_whole.download(cpu_dst1);
+ gdst2_whole.download(cpu_dst2);
+ gdst3_whole.download(cpu_dst3);
+ gdst4_whole.download(cpu_dst4);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick=t1+totalgputick;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
+
+ }
+ if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
+ cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
+ cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
+
+ gdst2_whole = dst2;
+ gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
+
+ gdst3_whole = dst3;
+ gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
+
+ gdst4_whole = dst4;
+ gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
+ gmat = mat_roi;
+ if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ cv::ocl::split(gmat, dev_gdst);
+ };
+#endif
+}
+
+//*************test*****************
+INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
+ Values(CV_8UC4, CV_32FC4), Values(1, 4)));
+
+INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
+ Values(CV_8U, CV_32S, CV_32F), Values(1, 4)));
+
+#endif // HAVE_OPENCL
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#define VARNAME(A) #A
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+using namespace cvtest;
+
+
+//std::string generateVarList(int first,...)
+//{
+// vector<std::string> varname;
+//
+// va_list argp;
+// string s;
+// stringstream ss;
+// va_start(argp,first);
+// int i=first;
+// while(i!=-1)
+// {
+// ss<<i<<",";
+// i=va_arg(argp,int);
+// };
+// s=ss.str();
+// va_end(argp);
+// return s;
+//};
+
+//std::string generateVarList(int& p1,int& p2)
+//{
+// stringstream ss;
+// ss<<VARNAME(p1)<<":"<<src1x<<","<<VARNAME(p2)<<":"<<src1y;
+// return ss.str();
+//};
+
+int randomInt(int minVal, int maxVal)
+{
+ RNG& rng = TS::ptr()->get_rng();
+ return rng.uniform(minVal, maxVal);
+}
+
+double randomDouble(double minVal, double maxVal)
+{
+ RNG& rng = TS::ptr()->get_rng();
+ return rng.uniform(minVal, maxVal);
+}
+
+Size randomSize(int minVal, int maxVal)
+{
+ return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal));
+}
+
+Scalar randomScalar(double minVal, double maxVal)
+{
+ return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal));
+}
+
+Mat randomMat(Size size, int type, double minVal, double maxVal)
+{
+ return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
+}
+
+
+
+
+
+
+
+/*
+void showDiff(InputArray gold_, InputArray actual_, double eps)
+{
+ Mat gold;
+ if (gold_.kind() == _InputArray::MAT)
+ gold = gold_.getMat();
+ else
+ gold_.getGpuMat().download(gold);
+
+ Mat actual;
+ if (actual_.kind() == _InputArray::MAT)
+ actual = actual_.getMat();
+ else
+ actual_.getGpuMat().download(actual);
+
+ Mat diff;
+ absdiff(gold, actual, diff);
+ threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY);
+
+ namedWindow("gold", WINDOW_NORMAL);
+ namedWindow("actual", WINDOW_NORMAL);
+ namedWindow("diff", WINDOW_NORMAL);
+
+ imshow("gold", gold);
+ imshow("actual", actual);
+ imshow("diff", diff);
+
+ waitKey();
+}
+*/
+
+/*
+bool supportFeature(const DeviceInfo& info, FeatureSet feature)
+{
+ return TargetArchs::builtWith(feature) && info.supports(feature);
+}
+
+const vector<DeviceInfo>& devices()
+{
+ static vector<DeviceInfo> devs;
+ static bool first = true;
+
+ if (first)
+ {
+ int deviceCount = getCudaEnabledDeviceCount();
+
+ devs.reserve(deviceCount);
+
+ for (int i = 0; i < deviceCount; ++i)
+ {
+ DeviceInfo info(i);
+ if (info.isCompatible())
+ devs.push_back(info);
+ }
+
+ first = false;
+ }
+
+ return devs;
+}
+
+vector<DeviceInfo> devices(FeatureSet feature)
+{
+ const vector<DeviceInfo>& d = devices();
+
+ vector<DeviceInfo> devs_filtered;
+
+ if (TargetArchs::builtWith(feature))
+ {
+ devs_filtered.reserve(d.size());
+
+ for (size_t i = 0, size = d.size(); i < size; ++i)
+ {
+ const DeviceInfo& info = d[i];
+
+ if (info.supports(feature))
+ devs_filtered.push_back(info);
+ }
+ }
+
+ return devs_filtered;
+}
+*/
+
+vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
+{
+ vector<MatType> v;
+
+ v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
+
+ for (int depth = depth_start; depth <= depth_end; ++depth)
+ {
+ for (int cn = cn_start; cn <= cn_end; ++cn)
+ {
+ v.push_back(CV_MAKETYPE(depth, cn));
+ }
+ }
+
+ return v;
+}
+
+const vector<MatType>& all_types()
+{
+ static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
+
+ return v;
+}
+
+Mat readImage(const string& fileName, int flags)
+{
+ return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
+}
+
+Mat readImageType(const string& fname, int type)
+{
+ Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
+ if (CV_MAT_CN(type) == 4)
+ {
+ Mat temp;
+ cvtColor(src, temp, cv::COLOR_BGR2BGRA);
+ swap(src, temp);
+ }
+ src.convertTo(src, CV_MAT_DEPTH(type));
+ return src;
+}
+
+double checkNorm(const Mat& m)
+{
+ return norm(m, NORM_INF);
+}
+
+double checkNorm(const Mat& m1, const Mat& m2)
+{
+ return norm(m1, m2, NORM_INF);
+}
+
+double checkSimilarity(const Mat& m1, const Mat& m2)
+{
+ Mat diff;
+ matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+ return std::abs(diff.at<float>(0, 0) - 1.f);
+}
+
+/*
+void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os)
+{
+ (*os) << info.name();
+}
+*/
+
+void PrintTo(const Inverse& inverse, std::ostream* os)
+{
+ if (inverse)
+ (*os) << "inverse";
+ else
+ (*os) << "direct";
+}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEST_UTILITY_HPP__
+#define __OPENCV_TEST_UTILITY_HPP__
+//#define PRINT_KERNEL_RUN_TIME
+#ifdef PRINT_KERNEL_RUN_TIME
+#define LOOP_TIMES 1
+#else
+#define LOOP_TIMES 1
+#endif
+#define MWIDTH 2557
+#define MHEIGHT 2579
+#define CLBINPATH ".\\"
+int randomInt(int minVal, int maxVal);
+double randomDouble(double minVal, double maxVal);
+
+//std::string generateVarList(int first,...);
+std::string generateVarList(int& p1,int& p2);
+cv::Size randomSize(int minVal, int maxVal);
+cv::Scalar randomScalar(double minVal, double maxVal);
+cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
+
+void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
+
+//! return true if device supports specified feature and gpu module was built with support the feature.
+//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+
+//! return all devices compatible with current gpu module build.
+//const std::vector<cv::ocl::DeviceInfo>& devices();
+//! return all devices compatible with current gpu module build which support specified feature.
+//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
+
+//! read image from testdata folder.
+cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
+cv::Mat readImageType(const std::string& fname, int type);
+
+double checkNorm(const cv::Mat& m);
+double checkNorm(const cv::Mat& m1, const cv::Mat& m2);
+double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
+
+#define EXPECT_MAT_NORM(mat, eps) \
+{ \
+ EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \
+}
+
+//#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
+//{ \
+// ASSERT_EQ(mat1.type(), mat2.type()); \
+// ASSERT_EQ(mat1.size(), mat2.size()); \
+// EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \
+//}
+
+#define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \
+{ \
+ ASSERT_EQ(mat1.type(), mat2.type()); \
+ ASSERT_EQ(mat1.size(), mat2.size()); \
+ EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps)<<s; \
+}
+
+#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
+{ \
+ ASSERT_EQ(mat1.type(), mat2.type()); \
+ ASSERT_EQ(mat1.size(), mat2.size()); \
+ EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \
+}
+
+namespace cv
+{
+ namespace ocl
+ {
+ // void PrintTo(const DeviceInfo& info, std::ostream* os);
+ }
+}
+
+using perf::MatDepth;
+using perf::MatType;
+
+//! return vector with types from specified range.
+std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
+
+//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
+const std::vector<MatType>& all_types();
+
+class Inverse
+{
+ public:
+ inline Inverse(bool val = false) : val_(val) {}
+
+ inline operator bool() const { return val_; }
+
+ private:
+ bool val_;
+};
+
+void PrintTo(const Inverse& useRoi, std::ostream* os);
+
+CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+
+CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
+
+ enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
+CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
+
+CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+
+ CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
+
+CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+
+CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
+
+CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+
+CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
+
+CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+
+CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+void run_perf_test();
+
+#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
+
+#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+
+#define ALL_DEVICES testing::ValuesIn(devices())
+#define DEVICES(feature) testing::ValuesIn(devices(feature))
+
+#define ALL_TYPES testing::ValuesIn(all_types())
+#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
+
+#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
+
+#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
+
+#endif // __OPENCV_TEST_UTILITY_HPP__