From 37ca6d378bd8fa9245a54f6a25af704dc20c884f Mon Sep 17 00:00:00 2001 From: niko Date: Wed, 8 Aug 2012 17:09:29 +0800 Subject: [PATCH] add performance test code for ocl module --- modules/ocl/CMakeLists.txt | 8 +- modules/ocl/perf/interpolation.hpp | 120 + modules/ocl/perf/main.cpp | 108 + modules/ocl/perf/perf_test_ocl.cpp | 1191 +++++++++ modules/ocl/perf/precomp.cpp | 45 + modules/ocl/perf/precomp.hpp | 72 + modules/ocl/perf/test_arithm.cpp | 3658 ++++++++++++++++++++++++++++ modules/ocl/perf/test_filters.cpp | 1096 +++++++++ modules/ocl/perf/test_haar.cpp | 198 ++ modules/ocl/perf/test_imgproc.cpp | 1551 ++++++++++++ modules/ocl/perf/test_matrix_operation.cpp | 616 +++++ modules/ocl/perf/test_split_merge.cpp | 455 ++++ modules/ocl/perf/utility.cpp | 265 ++ modules/ocl/perf/utility.hpp | 177 ++ 14 files changed, 9556 insertions(+), 4 deletions(-) create mode 100644 modules/ocl/perf/interpolation.hpp create mode 100644 modules/ocl/perf/main.cpp create mode 100644 modules/ocl/perf/perf_test_ocl.cpp create mode 100644 modules/ocl/perf/precomp.cpp create mode 100644 modules/ocl/perf/precomp.hpp create mode 100644 modules/ocl/perf/test_arithm.cpp create mode 100644 modules/ocl/perf/test_filters.cpp create mode 100644 modules/ocl/perf/test_haar.cpp create mode 100644 modules/ocl/perf/test_imgproc.cpp create mode 100644 modules/ocl/perf/test_matrix_operation.cpp create mode 100644 modules/ocl/perf/test_split_merge.cpp create mode 100644 modules/ocl/perf/utility.cpp create mode 100644 modules/ocl/perf/utility.hpp diff --git a/modules/ocl/CMakeLists.txt b/modules/ocl/CMakeLists.txt index 2459577..a6496ae 100644 --- a/modules/ocl/CMakeLists.txt +++ b/modules/ocl/CMakeLists.txt @@ -64,8 +64,8 @@ ocv_add_accuracy_tests(FILES "Include" ${test_hdrs} ################################################################################################################ ################################ OpenCL Module Performance ################################################## ################################################################################################################ -#file(GLOB perf_srcs "perf/*.cpp") -#file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h") +file(GLOB perf_srcs "perf/*.cpp") +file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h") -#ocv_add_perf_tests(FILES "Include" ${perf_hdrs} -# FILES "Src" ${perf_srcs}) +ocv_add_perf_tests(FILES "Include" ${perf_hdrs} + FILES "Src" ${perf_srcs}) diff --git a/modules/ocl/perf/interpolation.hpp b/modules/ocl/perf/interpolation.hpp new file mode 100644 index 0000000..d918004 --- /dev/null +++ b/modules/ocl/perf/interpolation.hpp @@ -0,0 +1,120 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_TEST_INTERPOLATION_HPP__ +#define __OPENCV_TEST_INTERPOLATION_HPP__ + +template T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) +{ + if (border_type == cv::BORDER_CONSTANT) + return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at(y, x * src.channels() + c) : cv::saturate_cast(borderVal.val[c]); + + return src.at(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c); +} + +template struct NearestInterpolator +{ + static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) + { + return readVal(src, cvFloor(y), cvFloor(x), c, border_type, borderVal); + } +}; + +template struct LinearInterpolator +{ + static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) + { + x -= 0.5f; + y -= 0.5f; + + int x1 = cvFloor(x); + int y1 = cvFloor(y); + int x2 = x1 + 1; + int y2 = y1 + 1; + + float res = 0; + + res += readVal(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y)); + res += readVal(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y)); + res += readVal(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1)); + res += readVal(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1)); + + return cv::saturate_cast(res); + } +}; + +template struct CubicInterpolator +{ + static float getValue(float p[4], float x) + { + return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0]))); + } + + static float getValue(float p[4][4], float x, float y) + { + float arr[4]; + + arr[0] = getValue(p[0], x); + arr[1] = getValue(p[1], x); + arr[2] = getValue(p[2], x); + arr[3] = getValue(p[3], x); + + return getValue(arr, y); + } + + static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) + { + int ix = cvRound(x); + int iy = cvRound(y); + + float vals[4][4] = + { + {readVal(src, iy - 2, ix - 2, c, border_type, borderVal), readVal(src, iy - 2, ix - 1, c, border_type, borderVal), readVal(src, iy - 2, ix, c, border_type, borderVal), readVal(src, iy - 2, ix + 1, c, border_type, borderVal)}, + {readVal(src, iy - 1, ix - 2, c, border_type, borderVal), readVal(src, iy - 1, ix - 1, c, border_type, borderVal), readVal(src, iy - 1, ix, c, border_type, borderVal), readVal(src, iy - 1, ix + 1, c, border_type, borderVal)}, + {readVal(src, iy , ix - 2, c, border_type, borderVal), readVal(src, iy , ix - 1, c, border_type, borderVal), readVal(src, iy , ix, c, border_type, borderVal), readVal(src, iy , ix + 1, c, border_type, borderVal)}, + {readVal(src, iy + 1, ix - 2, c, border_type, borderVal), readVal(src, iy + 1, ix - 1, c, border_type, borderVal), readVal(src, iy + 1, ix, c, border_type, borderVal), readVal(src, iy + 1, ix + 1, c, border_type, borderVal)}, + }; + + return cv::saturate_cast(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0)); + } +}; + +#endif // __OPENCV_TEST_INTERPOLATION_HPP__ diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp new file mode 100644 index 0000000..0d9d967 --- /dev/null +++ b/modules/ocl/perf/main.cpp @@ -0,0 +1,108 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace std; +using namespace cv; +using namespace cv::ocl; +using namespace cvtest; +using namespace testing; + +void print_info() +{ + printf("\n"); +#if defined _WIN32 +# if defined _WIN64 + puts("OS: Windows 64"); +# else + puts("OS: Windows 32"); +# endif +#elif defined linux +# if defined _LP64 + puts("OS: Linux 64"); +# else + puts("OS: Linux 32"); +# endif +#elif defined __APPLE__ +# if defined _LP64 + puts("OS: Apple 64"); +# else + puts("OS: Apple 32"); +# endif +#endif + +} + +#if PERF_TEST_OCL +int main(int argc, char** argv) +{ + + static std::vector ocl_info; + ocl::getDevice(ocl_info); + + run_perf_test(); + return 0; +} +#else +int main(int argc, char** argv) +{ + TS::ptr()->init("ocl"); + InitGoogleTest(&argc, argv); + + print_info(); + + return RUN_ALL_TESTS(); +} +#endif // PERF_TEST_OCL + +#else // HAVE_OPENC + +int main() +{ + printf("OpenCV was built without OpenCL support\n"); + return 0; +} + + +#endif // HAVE_OPENCL diff --git a/modules/ocl/perf/perf_test_ocl.cpp b/modules/ocl/perf/perf_test_ocl.cpp new file mode 100644 index 0000000..67f20a3 --- /dev/null +++ b/modules/ocl/perf/perf_test_ocl.cpp @@ -0,0 +1,1191 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicore Ware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include + +#if PERF_TEST_OCL + +#ifdef HAVE_OPENCL + +#define SHOW_CPU false +#define REPEAT 1000 +#define COUNT_U 0 // count the uploading execution time for ocl mat structures +#define COUNT_D 0 + + +// the following macro section tests the target function (kernel) performance +// upload is the code snippet for converting cv::mat to cv::ocl::oclMat +// downloading is the code snippet for converting cv::ocl::oclMat back to cv::mat +// change COUNT_U and COUNT_D to take downloading and uploading time into account +#define P_TEST_FULL( upload, kernel_call, download ) \ +{ \ + std::cout<< "\n" #kernel_call "\n----------------------"; \ + {upload;} \ + R_TEST( kernel_call, 15 ); \ + double t = (double)cvGetTickCount(); \ + R_T( { \ + if( COUNT_U ) {upload;} \ + kernel_call; \ + if( COUNT_D ) {download;} \ + } ); \ + t = (double)cvGetTickCount() - t; \ + std::cout << "runtime is " << t/((double)cvGetTickFrequency()* 1000.) << "ms" << std::endl; \ +} + + +#define R_T2( test ) \ +{ \ + std::cout<< "\n" #test "\n----------------------"; \ + R_TEST( test, 15 ) \ + clock_t st = clock(); \ + R_T( test ) \ + std::cout<< clock() - st << "ms\n"; \ +} +#define R_T( test ) \ + R_TEST( test, REPEAT ) +#define R_TEST( test, repeat ) \ + try{ \ + for( int i = 0; i < repeat; i ++ ) { test; } \ + } catch( ... ) { std::cout << "||||| Exception catched! |||||\n"; return; } + +#define FILTER_TEST_IMAGE "C:/Windows/Web/Wallpaper/Landscapes/img9.jpg" +#define WARN_NRUN( name ) \ + std::cout << "Warning: " #name " is not runnable!\n"; + + +void print_info(); + +// performance base class +struct PerfTest +{ + virtual void Run() = 0; + protected: + virtual void SetUp() = 0; +}; +/////////////////////////////////////// +// Arithm +struct ArithmTestP : PerfTest +{ + int type; + cv::Scalar val; + + cv::Size size; + cv::Mat mat1, mat2; + cv::Mat mask; + cv::Mat dst; + cv::ocl::oclMat oclRes, oclmat1, oclmat2; + cv::ocl::oclMat oclmask; + std::vector dstv; + protected: + ArithmTestP() : type( CV_8UC4 ) {} + virtual void SetUp() + { + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + size = cv::Size( 3000, 3000 ); // big input image + mat1 = cvtest::randomMat(rng, size, type, 1, 255, false); + mat2 = cvtest::randomMat(rng, size, type, 1, 255, false); + mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + + oclmat1 = cv::ocl::oclMat(mat1); + oclmat2 = cv::ocl::oclMat(mat2); + oclmask = cv::ocl::oclMat(mask); + } +}; + +struct AddArrayP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::add(oclmat1, oclmat2, oclRes), + oclRes.download(dst); + ); + } +}; + +struct SubtractArrayP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::subtract(oclmat1, oclmat2, oclRes), + oclRes.download(dst); + ); + } +}; + +struct MultiplyArrayP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + clock_t start = clock(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::multiply(oclmat1, oclmat2, oclRes), + oclRes.download(dst); + ); + } +}; + +struct DivideArrayP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::divide(oclmat1, oclmat2, oclRes), + oclRes.download(dst); + ); + } +}; + +struct ExpP : ArithmTestP +{ + void Run() + { + type = CV_32FC1; + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::exp(oclmat1, oclRes), + oclRes.download(dst); + ); + } +}; + +struct LogP : ArithmTestP +{ + void Run() + { + type = CV_32FC1; + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::log(oclmat1, oclRes), + oclRes.download(dst); + ); + } +}; + +struct CompareP : ArithmTestP +{ + virtual void Run() + { + type = CV_32FC1; + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::compare(oclmat1, oclmat2, oclRes, cv::CMP_EQ), + oclRes.download(dst); + ); + } +}; + +struct FlipP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::flip(oclmat1, oclRes, 0), + oclRes.download(dst); + ); + } + protected: + virtual void SetUp() + { + type = CV_8UC4; + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + size = cv::Size(3000, 3000); + mat1 = cvtest::randomMat(rng, size, type, 1, 255, false); + oclmat1 = cv::ocl::oclMat(mat1); + } +}; + +struct MagnitudeP : ArithmTestP +{ + virtual void Run() + { + type = CV_32F; + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::magnitude(oclmat1, oclmat1, oclRes), + oclRes.download(dst); + ); + } +}; + +struct LUTP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);ocllut = cv::ocl::oclMat(lut), + cv::ocl::LUT(oclmat1, ocllut, oclRes), + oclRes.download(dst); + ); + } + protected: + cv::Mat lut; + cv::ocl::oclMat ocllut; + virtual void SetUp() + { + type = CV_8UC1; + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + size = cv::Size(3000, 3000); + mat1 = cvtest::randomMat(rng, size, type, 1, 255, false); + lut = cvtest::randomMat(rng, cv::Size(256, 1), CV_8UC1, 100, 200, false); + oclmat1 = cv::ocl::oclMat(mat1); + ocllut = cv::ocl::oclMat(lut); + } +}; + +struct MinMaxP : ArithmTestP +{ + double minVal_gold, minVal; + double maxVal_gold, maxVal; + + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::minMax(oclmat1, &minVal, &maxVal, oclmat2), + {}; + ); + } + + protected: + virtual void SetUp() + { + type = CV_64F; + + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + + size = cv::Size(3000, 3000); + + mat1 = cvtest::randomMat(rng, size, type, 0.0, 127.0, false); + mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false); + + oclmat1 = cv::ocl::oclMat(mat1); + oclmat2 = cv::ocl::oclMat(mat2); + } +}; + +struct MinMaxLocP : MinMaxP +{ + cv::Point minLoc_gold; + cv::Point maxLoc_gold; + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::minMaxLoc(oclmat1, &minVal, &maxVal, &minLoc_gold, &maxLoc_gold, oclmat2), + {} + ); + } +}; + +struct CountNonZeroP : ArithmTestP +{ + int n; + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + n = cv::ocl::countNonZero(oclmat1), + {} + ); + } + protected: + virtual void SetUp() + { + type = 6; + + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + + size = cv::Size( 3000, 3000 ); + + cv::Mat matBase = cvtest::randomMat(rng, size, CV_8U, 0.0, 1.0, false); + matBase.convertTo(mat1, type); + + oclmat1 = cv::ocl::oclMat(mat1); + } +}; + +struct SumP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + cv::Scalar n; + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + n = cv::ocl::sum(oclmat1), + {} + ); + } +}; + +struct BitwiseP : ArithmTestP +{ + protected: + virtual void SetUp() + { + type = CV_8UC4; + + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + + size = cv::Size( 3000, 3000 ); + + mat1.create(size, type); + mat2.create(size, type); + + for (int i = 0; i < mat1.rows; ++i) + { + cv::Mat row1(1, static_cast(mat1.cols * mat1.elemSize()), CV_8U, (void*)mat1.ptr(i)); + rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255)); + + cv::Mat row2(1, static_cast(mat2.cols * mat2.elemSize()), CV_8U, (void*)mat2.ptr(i)); + rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255)); + } + oclmat1 = cv::ocl::oclMat(mat1); + oclmat2 = cv::ocl::oclMat(mat2); + } +}; + +struct BitwiseNotP : BitwiseP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::bitwise_not(oclmat1, oclRes), + oclRes.download(dst) + ); + } +}; + +struct BitwiseAndP : BitwiseP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::bitwise_and(oclmat1, oclmat2, oclRes), + oclRes.download(dst) + ); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::bitwise_and(oclmat1, val, oclRes), + oclRes.download(dst) + ); + } +}; + +struct BitwiseXorP : BitwiseP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::bitwise_xor(oclmat1, oclmat2, oclRes), + oclRes.download(dst) + ); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::bitwise_xor(oclmat1, val, oclRes), + oclRes.download(dst) + ); + + } +}; + +struct BitwiseOrP : BitwiseP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::bitwise_or(oclmat1, oclmat2, oclRes), + oclRes.download(dst) + ); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::bitwise_or(oclmat1, val, oclRes), + oclRes.download(dst) + ); + } +}; + +struct TransposeP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::transpose(oclmat1, oclRes), + oclRes.download(dst) + ); + } +}; + +struct AbsdiffArrayP : ArithmTestP +{ + virtual void Run() + { + type = CV_32FC1; + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::absdiff(oclmat1, oclmat2, oclRes), + oclRes.download(dst) + ); + } +}; + +struct PhaseP : ArithmTestP +{ + virtual void Run() + { + type = CV_32F; + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1);oclmat2 = cv::ocl::oclMat(mat2), + cv::ocl::phase(oclmat1,oclmat2,oclRes,1), + oclRes.download(dst) + ); + } +}; + +struct CartToPolarP : ArithmTestP +{ + cv::ocl::oclMat oclRes1; + virtual void Run() + { + type = CV_64FC4; + SetUp(); + clock_t start = clock(); + R_TEST( + cv::ocl::cartToPolar(oclmat1,oclmat2,oclRes, oclRes1, 1); + if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);} + , 5); + std::cout<< "ocl::CartToPolar -- " << clock() - start << "ms\n"; + } +}; + +struct PolarToCartP : ArithmTestP +{ + cv::ocl::oclMat oclRes1; + virtual void Run() + { + type = CV_64FC4; + SetUp(); + clock_t start = clock(); + R_TEST( + cv::ocl::polarToCart(oclmat1,oclmat2,oclRes, oclRes1, 1); + if( COUNT_D ) {oclRes.download(dst);oclRes1.download(dst);} + , 2); + std::cout<< "ocl::polarToCart -- " << clock() - start << "ms\n"; + } +}; + +/////////////////////////////////////// +// split & merge +struct SplitP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::split(oclmat1, dev_dst), + { + dstv.resize(dev_dst.size()); + for (size_t i = 0; i < dev_dst.size(); ++i) + { + dev_dst[i].download(dstv[i]); + } + } + ); + } + protected: + std::vector dev_dst; + virtual void SetUp() + { + size = cv::Size( 3000, 3000 ); + + mat1.create(size, type); + mat1.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0)); + + oclmat1 = cv::ocl::oclMat(mat1); + } +}; + +struct MergeP : SplitP +{ + virtual void Run() + { + SetUp(); + cv::ocl::split(oclmat1, dev_dst); + cv::split(mat1, dstv); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + cv::ocl::merge(dev_dst, oclmat2), + oclmat2.download(dst) + ); + } +}; + +struct SetToP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + static cv::Scalar s = cv::Scalar(1, 2, 3, 4); + P_TEST_FULL( + oclmat2 = cv::ocl::oclMat(mat2), + oclmat1.setTo( s, oclmat2 ), + oclmat1.download(dst); + ); + } + protected: + virtual void SetUp() + { + type = CV_32FC4; + size = cv::Size(3000, 3000); + + mat1.create(size, type); + oclmat1.create(size, type); + + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + mat2 = cvtest::randomMat(rng, size, CV_8UC1, 0.0, 1.5, false); + oclmat2 = cv::ocl::oclMat(mat2); + } +}; + +struct CopyToP : SetToP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + oclmat1.copyTo( oclRes, oclmat2 ), + oclRes.download(dst) + ); + } +}; + +struct ConvertToP : ArithmTestP +{ + virtual void Run() + { + type = CV_32FC1;; + SetUp(); + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + const double a = rng.uniform(0.0, 1.0); + const double b = rng.uniform(-10.0, 10.0); + + int type2 = CV_32FC4; + + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat(mat1), + oclmat1.convertTo( oclRes, type2 /*, a, b */ ), // fails when scaling factors a and b are specified + oclRes.download(dst) + ); + } +}; + +//////////////////////////////////////////// +// Filters + +struct FilterTestP : PerfTest +{ + protected: + int ksize; + int dx, dy; + + cv::Mat img_rgba; + cv::Mat img_gray; + + cv::ocl::oclMat ocl_img_rgba; + cv::ocl::oclMat ocl_img_gray; + + cv::ocl::oclMat dev_dst_rgba; + cv::ocl::oclMat dev_dst_gray; + + cv::Mat dst_rgba; + cv::Mat dst_gray; + + cv::Mat kernel; + + int bordertype; + + virtual void SetUp() + { + bordertype = (int)cv::BORDER_DEFAULT; + ksize = 7; + dx = ksize/2; dy = ksize/2; + + kernel = cv::Mat::ones(ksize, ksize, CV_8U); + + cv::Mat img = readImage(FILTER_TEST_IMAGE); + ASSERT_FALSE(img.empty()); + + cv::cvtColor(img, img_rgba, CV_BGR2BGRA); + cv::cvtColor(img, img_gray, CV_BGR2GRAY); + + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + } +}; + +struct BlurP : FilterTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + { + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::blur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype); + cv::ocl::blur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), cv::Point(-1,-1), bordertype); + }, + { + dev_dst_rgba.download(dst_rgba); + dev_dst_gray.download(dst_gray); + }); + } +}; + +struct SobelP : FilterTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + { + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::Sobel(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, ksize, 1, 0, bordertype); + cv::ocl::Sobel(ocl_img_gray, dev_dst_gray, -1, dx, dy, ksize, 1, 0, bordertype); + }, + { + dev_dst_rgba.download(dst_rgba); + dev_dst_gray.download(dst_gray); + }); + } +}; + +struct ScharrP : FilterTestP +{ + virtual void Run() + { + SetUp(); + dx = 0; dy = 1; + P_TEST_FULL( + { + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::Scharr(ocl_img_rgba, dev_dst_rgba, -1, dx, dy, 1, 0, bordertype); + cv::ocl::Scharr(ocl_img_gray, dev_dst_gray, -1, dx, dy, 1, 0, bordertype); + }, + { + dev_dst_rgba.download(dst_rgba); + dev_dst_gray.download(dst_gray); + }); + } +}; + +struct GaussianBlurP : FilterTestP +{ + virtual void Run() + { + double sigma1 = 3, sigma2 = 3; + SetUp(); + P_TEST_FULL( + { + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::GaussianBlur(ocl_img_rgba, dev_dst_rgba, cv::Size(ksize, ksize), sigma1, sigma2); + cv::ocl::GaussianBlur(ocl_img_gray, dev_dst_gray, cv::Size(ksize, ksize), sigma1, sigma2); + }, + { + dev_dst_rgba.download(dst_rgba); + dev_dst_gray.download(dst_gray); + }); + } +}; + +struct DilateP : FilterTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + { + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::dilate(ocl_img_rgba, dev_dst_rgba, kernel); + cv::ocl::dilate(ocl_img_gray, dev_dst_gray, kernel); + }, + { + dev_dst_rgba.download(dst_rgba); + dev_dst_gray.download(dst_gray); + }); + } +}; + +struct ErodeP : FilterTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + { + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::erode(ocl_img_rgba, dev_dst_rgba, kernel); + cv::ocl::erode(ocl_img_gray, dev_dst_gray, kernel); + }, + { + dev_dst_rgba.download(dst_rgba); + dev_dst_gray.download(dst_gray); + }); + } +}; + +struct MorphExP : FilterTestP +{ + virtual void Run() + { + SetUp(); + cv::ocl::oclMat okernel; + P_TEST_FULL( + { + okernel = cv::ocl::oclMat(kernel); + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::morphologyEx(ocl_img_rgba, dev_dst_rgba, 3, okernel); + cv::ocl::morphologyEx(ocl_img_gray, dev_dst_gray, 3, okernel); + }, + { + dev_dst_rgba.download(dst_rgba); + dev_dst_gray.download(dst_gray); + }); + } +}; + +struct LaplacianP : FilterTestP +{ + void Run() + { + SetUp(); + P_TEST_FULL( + { + ocl_img_rgba = cv::ocl::oclMat(img_rgba); + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::Laplacian(ocl_img_rgba, dev_dst_rgba, -1, 3 ); + cv::ocl::Laplacian(ocl_img_gray, dev_dst_gray, -1, 3 ); + }, + { + dev_dst_rgba.download(dst_rgba); + dev_dst_gray.download(dst_gray); + }); + } +}; + +//////////////////// +// histograms +struct CalcHistP : PerfTest +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat = cv::ocl::oclMat( src ), + cv::ocl::calcHist(oclmat, oclRes), + oclRes.download(hist) + ); + } + protected: + cv::Size size; + cv::Mat src, hist; + + cv::ocl::oclMat oclmat; + cv::ocl::oclMat oclRes; + + virtual void SetUp() + { + cv::RNG& rng = cvtest::TS::ptr()->get_rng(); + size = cv::Size(3000, 3000); + src = cvtest::randomMat(rng, size, CV_8UC1, 0, 255, false); + oclmat = cv::ocl::oclMat( src ); + } +}; + +struct EqualizeHistP : CalcHistP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat = cv::ocl::oclMat( src ), + cv::ocl::equalizeHist(oclmat, oclRes), + oclRes.download(hist) + ); + } +}; + +struct ThresholdP : CalcHistP +{ + virtual void Run() + { + SetUp(); + int threshOp = (int)cv::THRESH_TOZERO_INV;; + double maxVal = 200; + double thresh = 125; + + clock_t start = clock(); + + P_TEST_FULL( + oclmat = cv::ocl::oclMat( src ), + cv::ocl::threshold(oclmat, oclRes, thresh, maxVal, threshOp ), + oclRes.download(hist) + ); + } +}; + +struct ResizeP : ArithmTestP +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat( mat1 ), + cv::ocl::resize(oclmat1, oclRes, cv::Size(), 2.0, 2.0), + oclRes.download(dst) + ); + } +}; + +struct CvtColorP : PerfTest +{ + virtual void Run() + { + SetUp(); + P_TEST_FULL( + oclmat = cv::ocl::oclMat( img ), + cv::ocl::cvtColor(oclmat, ocldst, cvtcode), + ocldst.download(dst) + ); + } + protected: + int type; + int cvtcode; + + cv::Mat img, dst; + cv::ocl::oclMat oclmat, ocldst; + virtual void SetUp() + { + type = CV_8U; + cvtcode = CV_BGR2GRAY; + cv::Mat imgBase = readImage(FILTER_TEST_IMAGE); + ASSERT_FALSE(imgBase.empty()); + + imgBase.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0); + oclmat = cv::ocl::oclMat( img ); + }; +}; + + +struct WarpAffineP : ArithmTestP +{ + void Run() + { + SetUp(); + const double aplha = CV_PI / 4; + double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2}, + {std::sin(aplha), std::cos(aplha), 0}}; + cv::Mat M(2, 3, CV_64F, (void*) mat); + + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat( mat1 ), + cv::ocl::warpAffine( oclmat1, oclRes, M, cv::Size(1500, 1500) ), + oclRes.download(dst) + ); + } +}; + +struct WarpPerspectiveP : ArithmTestP +{ + void Run() + { + SetUp(); + const double aplha = CV_PI / 4; + double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), mat1.cols / 2}, + {std::sin(aplha), std::cos(aplha), 0}, + {0.0, 0.0, 1.0}}; + cv::Mat M(3, 3, CV_64F, (void*) mat); + + P_TEST_FULL( + oclmat1 = cv::ocl::oclMat( mat1 ), + cv::ocl::warpPerspective( oclmat1, oclRes, M, cv::Size(1500, 1500) ), + oclRes.download(dst) + ); + } +}; + + +struct CornerHarrisP : FilterTestP +{ + void Run() + { + SetUp(); + bordertype = 2; + P_TEST_FULL( + { + ocl_img_gray = cv::ocl::oclMat(img_gray); + }, + { + cv::ocl::cornerHarris(ocl_img_gray, dev_dst_gray, 3, ksize, 0.5, bordertype ); + }, + { + dev_dst_gray.download(dst_gray); + }); + } +}; + +void test() +{ + clock_t start = clock(); + std::cout << ">>>>>>>> Performance test started <<<<<<<<\n"; + /* + { + AddArrayP AddArrayP; + AddArrayP.Run(); + SubtractArrayP subarray; + subarray.Run(); + MultiplyArrayP MultiplyArrayP; + MultiplyArrayP.Run(); + DivideArrayP DivideArrayP; + DivideArrayP.Run(); + } + std::cout.flush(); + { + CompareP comp; + comp.Run(); + MagnitudeP magnitude; + magnitude.Run(); + LUTP lut; + lut.Run(); + FlipP FlipP; + FlipP.Run(); + MinMaxP minmax; + minmax.Run(); + MinMaxLocP minmaxloc; + minmaxloc.Run(); + CountNonZeroP cnz; + cnz.Run(); + SumP sum; + sum.Run(); + }*/ + /* std::cout.flush(); + { + BitwiseNotP bn; + bn.Run(); + BitwiseOrP bo; + bo.Run(); + BitwiseAndP ba; + ba.Run(); + BitwiseXorP bx; + bx.Run(); + }*/ + + std::cout.flush(); + { + // TransposeP transpose; + // transpose.Run(); + // AbsdiffArrayP absdiff; + // absdiff.Run(); + // SplitP split; + // split.Run(); + // MergeP merge; + // merge.Run(); + /* + SetToP setto; + setto.Run(); + CopyToP copyto; + copyto.Run(); + ConvertToP convertto; + convertto.Run(); + */ + } + /* + std::cout.flush(); + { + BlurP blur; + blur.Run(); + SobelP sobel; + sobel.Run(); + ScharrP scharr; + scharr.Run(); + GaussianBlurP gblur; + gblur.Run(); + DilateP dilate; + dilate.Run(); + ErodeP erode; + erode.Run(); + } + std::cout.flush(); + { + MorphExP morphex; + morphex.Run(); + CalcHistP calchist; + calchist.Run(); + EqualizeHistP eqhist; + eqhist.Run(); + ThresholdP threshold; + threshold.Run(); + ResizeP resize; + resize.Run(); + CvtColorP cvtcolor; + cvtcolor.Run(); + } + + { + LogP log; + log.Run(); + ExpP exp; + exp.Run(); + } + + std::cout.flush(); + { + //PhaseP phase; + //phase.Run(); + } + std::cout.flush(); + { + CartToPolarP ctop; + ctop.Run(); + } + std::cout.flush(); + { + PolarToCartP ptoc; + ptoc.Run(); + } + { + WarpAffineP warpA; + warpA.Run(); + WarpPerspectiveP warpP; + warpP.Run(); + } + + { + CornerHarrisP ch; + ch.Run(); + } + + { + LaplacianP laplacian; + laplacian.Run(); + } + + + */ + std::cout << ">>>>>>>> Performance test ended <<<<<<<<\ntotal - " << clock() - start << "ms\n"; + std::cout.flush(); +} + +void run_perf_test() +{ + print_info(); + cvtest::TS::ptr()->init("ocl"); + test(); +} + +#endif // WITH_OPENCL + +#endif // PREF_TEST_OCL diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp new file mode 100644 index 0000000..f505dac --- /dev/null +++ b/modules/ocl/perf/precomp.cpp @@ -0,0 +1,45 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + + + \ No newline at end of file diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp new file mode 100644 index 0000000..cad26fc --- /dev/null +++ b/modules/ocl/perf/precomp.hpp @@ -0,0 +1,72 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#ifndef __OPENCV_TEST_PRECOMP_HPP__ +#define __OPENCV_TEST_PRECOMP_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cvconfig.h" +#include "opencv2/core/core.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/calib3d/calib3d.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/video/video.hpp" +#include "opencv2/ts/ts.hpp" +#include "opencv2/ts/ts_perf.hpp" +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/nonfree/nonfree.hpp" + +#include "utility.hpp" +#include "interpolation.hpp" +//#include "add_test_info.h" +//#define PERF_TEST_OCL 1 + +#endif + diff --git a/modules/ocl/perf/test_arithm.cpp b/modules/ocl/perf/test_arithm.cpp new file mode 100644 index 0000000..0e6cf6e --- /dev/null +++ b/modules/ocl/perf/test_arithm.cpp @@ -0,0 +1,3658 @@ +/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Niko Li, newlife20080214@gmail.com +// Jia Haipeng, jiahaipeng95@gmail.com +// Shengen Yan, yanshengen@gmail.com +// Jiang Liyuan,jlyuan001.good@163.com +// Rock Li, Rock.Li@amd.com +// Zailong Wu, bullet@yeah.net +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#include "precomp.hpp" +#include + +#ifdef HAVE_OPENCL +using namespace cv; +using namespace cv::ocl; +using namespace cvtest; +using namespace testing; +using namespace std; +PARAM_TEST_CASE(ArithmTestBase, MatType, bool) +{ + int type; + cv::Scalar val; + + //src mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mask; + cv::Mat dst; + cv::Mat dst1; //bak, for two outputs + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int dstx; + int dsty; + int maskx; + int masky; + + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + cv::Mat dst1_roi; //bak + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + cv::ocl::oclMat gdst1_whole; //bak + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdst1; //bak + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); + + cv::RNG& rng = TS::ptr()->get_rng(); + + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); + mat2 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + dst1 = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + int devnums = getDevice(oclinfo); + CV_Assert(devnums>0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols-1; + roirows = mat1.rows-1; + src1x = 1; + src2x = 1; + src1y = 1; + src2y = 1; + dstx = 1; + dsty =1; + maskx =1; + masky =1; + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src2x = 0; + src1y = 0; + src2y = 0; + dstx = 0; + dsty = 0; + maskx =0; + masky =0; + }; + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + //mat2_roi = mat2(Rect(src2x,src2y,256,1)); + mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); + mask_roi = mask(Rect(maskx,masky,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows)); + + //gdst_whole = dst; + //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + //gdst1_whole = dst1; + //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + + //gmat1 = mat1_roi; + //gmat2 = mat2_roi; + //gmask = mask_roi; + } + +}; +////////////////////////////////lut///////////////////////////////////////////////// + +struct Lut : ArithmTestBase {}; + +TEST_P(Lut, Mat) +{ + + cv::Mat mat2(3, 512, CV_8UC1); + cv::RNG& rng = TS::ptr()->get_rng(); + rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256)); + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); + mat2_roi = mat2(Rect(src2x,src2y,256,1)); + + + t0 = (double)cvGetTickCount();//cpu start + cv::LUT(mat1_roi, mat2_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + + t2=(double)cvGetTickCount();//kernel + cv::ocl::LUT(gmat1, gmat2, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + // s=GetParam(); + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + // src2x = rng.uniform( 0,mat2.cols - 256); + // src2y = rng.uniform (0,mat2.rows - 1); + + // cv::Mat mat2_roi = mat2(Rect(src2x,src2y,256,1)); + mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); + mat2_roi = mat2(Rect(src2x,src2y,256,1)); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + // gdst1_whole = dst1; + // gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + // gmask = mask_roi; + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::LUT(gmat1, gmat2, gdst); + }; +#endif + +} + + + +////////////////////////////////exp///////////////////////////////////////////////// + +struct Exp : ArithmTestBase {}; + +TEST_P(Exp, Mat) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::exp(mat1_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + + t2=(double)cvGetTickCount();//kernel + cv::ocl::exp(gmat1, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download(cpu_dst); + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + //EXPECT_MAT_NEAR(dst, cpu_dst, 0,""); + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::exp(gmat1, gdst); + }; +#endif + +} + + +////////////////////////////////log///////////////////////////////////////////////// + +struct Log : ArithmTestBase {}; + +TEST_P(Log, Mat) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::log(mat1_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::log(gmat1, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::log(gmat1, gdst); + }; +#endif + +} + + + + +////////////////////////////////add///////////////////////////////////////////////// + +struct Add : ArithmTestBase {}; + +TEST_P(Add, Mat) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::add(mat1_roi, mat2_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::add(gmat1, gmat2, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::add(gmat1, gmat2, gdst); + }; +#endif +} + +TEST_P(Add, Mat_Mask) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmask = mask_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::add(gmat1, gmat2, gdst, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmask = mask_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::add(gmat1, gmat2, gdst, gmask); + }; +#endif +} +TEST_P(Add, Scalar) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::add(mat1_roi, val, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::add(gmat1, val, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::add(gmat1, val, gdst); + }; +#endif +} + +TEST_P(Add, Scalar_Mask) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::add(mat1_roi, val, dst_roi, mask_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmask = mask_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::add(gmat1, val, gdst, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmask = mask_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::add(gmat1, val, gdst, gmask); + }; +#endif +} + + +////////////////////////////////sub///////////////////////////////////////////////// +struct Sub : ArithmTestBase {}; + +TEST_P(Sub, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::subtract(mat1_roi, mat2_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::subtract(gmat1, gmat2, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::subtract(gmat1, gmat2, gdst); + }; +#endif +} + +TEST_P(Sub, Mat_Mask) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmask = mask_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::subtract(gmat1, gmat2, gdst, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmask = mask_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::subtract(gmat1, gmat2, gdst, gmask); + }; +#endif +} +TEST_P(Sub, Scalar) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::subtract(mat1_roi, val, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::subtract(gmat1, val, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::subtract(gmat1, val, gdst); + }; +#endif +} + +TEST_P(Sub, Scalar_Mask) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::subtract(mat1_roi, val, dst_roi, mask_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmask = mask_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::subtract(gmat1, val, gdst, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmask = mask_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::subtract(gmat1, val, gdst, gmask); + }; +#endif +} + + +////////////////////////////////Mul///////////////////////////////////////////////// +struct Mul : ArithmTestBase {}; + +TEST_P(Mul, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::multiply(mat1_roi, mat2_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::multiply(gmat1, gmat2, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::multiply(gmat1, gmat2, gdst); + }; +#endif +} + +TEST_P(Mul, Mat_Scalar) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + cv::RNG& rng = TS::ptr()->get_rng(); + double s = rng.uniform(-10.0, 10.0); + t0 = (double)cvGetTickCount();//cpu start + cv::multiply(mat1_roi, mat2_roi, dst_roi, s); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::multiply(gmat1, gmat2, gdst, s); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + cv::RNG& rng = TS::ptr()->get_rng(); + double s = rng.uniform(-10.0, 10.0); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::multiply(gmat1, gmat2, gdst, s); + }; +#endif +} + + +struct Div : ArithmTestBase {}; + +TEST_P(Div, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::divide(mat1_roi, mat2_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::divide(gmat1, gmat2, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::divide(gmat1, gmat2, gdst); + }; +#endif +} + +TEST_P(Div, Mat_Scalar) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + cv::RNG& rng = TS::ptr()->get_rng(); + double s = rng.uniform(-10.0, 10.0); + t0 = (double)cvGetTickCount();//cpu start + cv::divide(mat1_roi, mat2_roi, dst_roi, s); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::divide(gmat1, gmat2, gdst, s); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + cv::RNG& rng = TS::ptr()->get_rng(); + double s = rng.uniform(-10.0, 10.0); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::divide(gmat1, gmat2, gdst, s); + }; +#endif +} + + +struct Absdiff : ArithmTestBase {}; + +TEST_P(Absdiff, Mat) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::absdiff(mat1_roi, mat2_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::absdiff(gmat1, gmat2, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::absdiff(gmat1, gmat2, gdst); + }; +#endif +} + +TEST_P(Absdiff, Mat_Scalar) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::absdiff(mat1_roi, val, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::absdiff(gmat1, val, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::absdiff(gmat1, val, gdst); + }; +#endif +} + + + +struct CartToPolar : ArithmTestBase {}; + +TEST_P(CartToPolar, angleInDegree) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + t2=(double)cvGetTickCount();//kernel + cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + cv::Mat cpu_dst1; + gdst1_whole.download(cpu_dst1); + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1); + }; +#endif +} + +TEST_P(CartToPolar, angleInRadians) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + cv::Mat cpu_dst1; + gdst1_whole.download(cpu_dst1); + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0); + }; +#endif +} + + +struct PolarToCart : ArithmTestBase {}; + +TEST_P(PolarToCart, angleInDegree) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + t2=(double)cvGetTickCount();//kernel + cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + cv::Mat cpu_dst1; + gdst1_whole.download(cpu_dst1); + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1); + }; +#endif +} + +TEST_P(PolarToCart, angleInRadians) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + t2=(double)cvGetTickCount();//kernel + cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + cv::Mat cpu_dst1; + gdst1_whole.download(cpu_dst1); + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0); + }; +#endif +} + + + +struct Magnitude : ArithmTestBase {}; + +TEST_P(Magnitude, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::magnitude(mat1_roi, mat2_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::magnitude(gmat1, gmat2, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::magnitude(gmat1, gmat2, gdst); + }; +#endif +} + +struct Transpose : ArithmTestBase {}; + +TEST_P(Transpose, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::transpose(mat1_roi, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::transpose(gmat1, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::transpose(gmat1, gdst); + }; +#endif +} + + +struct Flip : ArithmTestBase {}; + +TEST_P(Flip, X) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::flip(mat1_roi, dst_roi, 0); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::flip(gmat1, gdst, 0); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::flip(gmat1, gdst, 0); + }; +#endif +} + +TEST_P(Flip, Y) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::flip(mat1_roi, dst_roi, 1); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::flip(gmat1, gdst, 1); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::flip(gmat1, gdst, 1); + }; +#endif +} + +TEST_P(Flip, BOTH) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::flip(mat1_roi, dst_roi, -1); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::flip(gmat1, gdst, -1); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::flip(gmat1, gdst, -1); + }; +#endif +} + + + +struct MinMax : ArithmTestBase {}; + +TEST_P(MinMax, MAT) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + t0 = (double)cvGetTickCount();//cpu start + if (mat1.depth() != CV_8S) + { + cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + for (int i = 0; i < mat1_roi.rows; ++i) + for (int j = 0; j < mat1_roi.cols; ++j) + { + signed char val = mat1_roi.at(i, j); + if (val < minVal) minVal = val; + if (val > maxVal) maxVal = val; + } + } + + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + double minVal_, maxVal_; + t2=(double)cvGetTickCount();//kernel + cv::ocl::minMax(gmat1, &minVal_, &maxVal_); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + double minVal_, maxVal_; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::minMax(gmat1, &minVal_, &maxVal_); + }; +#endif +} + +TEST_P(MinMax, MASK) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + t0 = (double)cvGetTickCount();//cpu start + if (mat1.depth() != CV_8S) + { + cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + for (int i = 0; i < mat1_roi.rows; ++i) + for (int j = 0; j < mat1_roi.cols; ++j) + { + signed char val = mat1_roi.at(i, j); + unsigned char m = mask_roi.at(i, j); + if (val < minVal && m) minVal = val; + if (val > maxVal && m) maxVal = val; + } + } + + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + gmask = mask_roi; + double minVal_, maxVal_; + t2=(double)cvGetTickCount();//kernel + cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + gmask = mask_roi; + double minVal_, maxVal_; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask); + }; +#endif +} + + +struct MinMaxLoc : ArithmTestBase {}; + +TEST_P(MinMaxLoc, MAT) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + int depth = mat1.depth(); + t0 = (double)cvGetTickCount();//cpu start + if (depth != CV_8S) + { + cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + for (int i = 0; i < mat1_roi.rows; ++i) + for (int j = 0; j < mat1_roi.cols; ++j) + { + signed char val = mat1_roi.at(i, j); + if (val < minVal) { + minVal = val; + minLoc.x = j; + minLoc.y = i; + } + if (val > maxVal) { + maxVal = val; + maxLoc.x = j; + maxLoc.y = i; + } + } + } + + + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + t2=(double)cvGetTickCount();//kernel + cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat()); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat()); + }; +#endif + +} + + +TEST_P(MinMaxLoc, MASK) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + int depth = mat1.depth(); + t0 = (double)cvGetTickCount();//cpu start + if (depth != CV_8S) + { + cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + for (int i = 0; i < mat1_roi.rows; ++i) + for (int j = 0; j < mat1_roi.cols; ++j) + { + signed char val = mat1_roi.at(i, j); + unsigned char m = mask_roi.at(i ,j); + if (val < minVal && m) { + minVal = val; + minLoc.x = j; + minLoc.y = i; + } + if (val > maxVal && m) { + maxVal = val; + maxLoc.x = j; + maxLoc.y = i; + } + } + } + + + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + gmask = mask_roi; + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + t2=(double)cvGetTickCount();//kernel + cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + gmask = mask_roi; + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask); + }; +#endif +} + + +struct Sum : ArithmTestBase {}; + +TEST_P(Sum, MAT) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + Scalar cpures =cv::sum(mat1_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + Scalar gpures=cv::ocl::sum(gmat1); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + Scalar gpures=cv::ocl::sum(gmat1); + }; +#endif +} + +//TEST_P(Sum, MASK) +//{ +// for(int j=0; j oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + cv::ocl::oclMat gdst1_whole; //bak + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdst1; //bak + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + //type = GET_PARAM(0); + type = CV_8UC1; + + cv::RNG& rng = TS::ptr()->get_rng(); + + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); + mat2 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + dst1 = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + int devnums = getDevice(oclinfo); + CV_Assert(devnums>0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols-1; + roirows = mat1.rows-1; + src1x = 1; + src2x = 1; + src1y = 1; + src2y = 1; + dstx = 1; + dsty =1; + maskx =1; + masky =1; + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src2x = 0; + src1y = 0; + src2y = 0; + dstx = 0; + dsty = 0; + maskx =0; + masky =0; + }; + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + //mat2_roi = mat2(Rect(src2x,src2y,256,1)); + mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); + mask_roi = mask(Rect(maskx,masky,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows)); + + //gdst_whole = dst; + //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + //gdst1_whole = dst1; + //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + + //gmat1 = mat1_roi; + //gmat2 = mat2_roi; + //gmask = mask_roi; + } + +}; +struct Compare : CompareTestBase {}; + +TEST_P(Compare, Mat) +{ + if(mat1.type()==CV_8SC1) + { + cout << "\tUnsupported type\t\n"; + } + + int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE}; + //const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"}; + int cmp_num = sizeof(cmp_codes) / sizeof(int); + for (int i = 0; i < cmp_num; ++i) + { + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=1;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::compare(mat1_roi,mat2_roi,dst_roi,cmp_codes[i]); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]); + }; +#endif + } + +} + +struct Pow : ArithmTestBase {}; + +TEST_P(Pow, Mat) +{ + if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F) + { + cout<<"\tUnsupported type\t\n"; + } + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + double p=4.5; + t0 = (double)cvGetTickCount();//cpu start + cv::pow(mat1_roi,p,dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::pow(gmat1,p,gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + double p=4.5; + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::pow(gmat1,p,gdst); + }; +#endif +} + + +struct MagnitudeSqr : ArithmTestBase {}; + +TEST_P(MagnitudeSqr, Mat) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + for(int i = 0;i < mat1.rows;++i) + for(int j = 0;j < mat1.cols;++j) + { + float val1 = mat1.at(i,j); + float val2 = mat2.at(i,j); + + ((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2; + + } + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; + t2=(double)cvGetTickCount();//kernel + cv::ocl::magnitudeSqr(clmat1,clmat2, cldst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + cldst.download(cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::magnitudeSqr(clmat1,clmat2, cldst); + }; +#endif + +} + + +struct AddWeighted : ArithmTestBase {}; + +TEST_P(AddWeighted, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + double alpha=2.0,beta=1.0,gama=3.0; + + t0 = (double)cvGetTickCount();//cpu start + cv::addWeighted(mat1_roi,alpha,mat2_roi,beta,gama,dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + + t2=(double)cvGetTickCount();//kernel + cv::ocl::addWeighted(gmat1,alpha,gmat2,beta,gama, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download(cpu_dst); + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; +} +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + double alpha=2.0,beta=1.0,gama=3.0; + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::addWeighted(gmat1,alpha, gmat2,beta,gama, gdst); + // double alpha=2.0,beta=1.0,gama=3.0; + // cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; + // if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + // cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); + }; +#endif + +} +/* +struct AddWeighted : ArithmTestBase {}; + +TEST_P(AddWeighted, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + double alpha=2.0,beta=1.0,gama=3.0; + + t0 = (double)cvGetTickCount();//cpu start + cv::addWeighted(mat1,alpha,mat2,beta,gama,dst); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; + + t2=(double)cvGetTickCount();//kernel + cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + cldst.download(cpu_dst); + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + +#else + //for(int j = 0; j < 2; j ++) + // { + double alpha=2.0,beta=1.0,gama=3.0; + cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; + //if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); + // }; +#endif + +} + +*/ +//********test**************** + +INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine( + Values(CV_8UC1, CV_8UC4), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine( + Values(CV_32FC1, CV_64FC1), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine( + Values(CV_32FC1, CV_64FC1), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); + +INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + + +INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine( + Values(CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine( + Values(CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine( + Values(CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine( + Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine( + Values(CV_8UC1, CV_32FC1), + Values(false))); + +INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine( + Values(CV_8UC1, CV_32FC1), + Values(false))); + +INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine( + Values(CV_8U, CV_32S, CV_32F), + Values(false))); + +INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine( + Values(CV_8U, CV_32S, CV_32F), + Values(false))); + + +INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC4), Values(false))); +//Values(false) is the reserved parameter + + +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine( + Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); +//Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); +//Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine( + Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); +//Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine( + Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); +//Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1,CV_16UC1,CV_16SC1,CV_32SC1,CV_32FC1,CV_64FC1), Values(false))); +//Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC4), Values(false))); +//Values(false) is the reserved parameter + + +INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine( + Values(CV_8UC1, CV_32SC1, CV_32FC1), + Values(false))); // Values(false) is the reserved parameter + + + + +#endif // HAVE_OPENCL diff --git a/modules/ocl/perf/test_filters.cpp b/modules/ocl/perf/test_filters.cpp new file mode 100644 index 0000000..ac9a865 --- /dev/null +++ b/modules/ocl/perf/test_filters.cpp @@ -0,0 +1,1096 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Niko Li, newlife20080214@gmail.com +// Jia Haipeng, jiahaipeng95@gmail.com +// Zero Lin, Zero.Lin@amd.com +// Zhang Ying, zhangying913@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cvtest; +using namespace testing; +using namespace std; +//using namespace cv::ocl; + +PARAM_TEST_CASE(FilterTestBase, MatType, bool) +{ + int type; + cv::Scalar val; + + //src mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mask; + cv::Mat dst; + cv::Mat dst1; //bak, for two outputs + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int dstx; + int dsty; + int maskx; + int masky; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + cv::Mat dst1_roi; //bak + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + cv::ocl::oclMat gdst1_whole; //bak + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdst1; //bak + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + mat2 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + dst1 = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + } + + void random_roi() + { + cv::RNG& rng = TS::ptr()->get_rng(); + + //randomize ROI + roicols = rng.uniform(1, mat1.cols); + roirows = rng.uniform(1, mat1.rows); + src1x = rng.uniform(0, mat1.cols - roicols); + src1y = rng.uniform(0, mat1.rows - roirows); + src2x = rng.uniform(0, mat2.cols - roicols); + src2y = rng.uniform(0, mat2.rows - roirows); + dstx = rng.uniform(0, dst.cols - roicols); + dsty = rng.uniform(0, dst.rows - roirows); + maskx = rng.uniform(0, mask.cols - roicols); + masky = rng.uniform(0, mask.rows - roirows); + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); + mask_roi = mask(Rect(maskx,masky,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows)); + + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmask = mask_roi; + } + +}; + +///////////////////////////////////////////////////////////////////////////////////////////////// +// blur + +PARAM_TEST_CASE(Blur, MatType, cv::Size, int) +{ + int type; + cv::Size ksize; + int bordertype; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + bordertype = GET_PARAM(2); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols-1; + roirows = mat1.rows-1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty =1; + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + } + +}; + +TEST_P(Blur, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::blur(mat1_roi, dst_roi, ksize, Point(-1,-1), bordertype); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype); + }; +#endif + +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +//Laplacian + +PARAM_TEST_CASE(LaplacianTestBase, MatType, int) +{ + int type; + int ksize; + + //src mat + cv::Mat mat; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(2560, 2560); + + mat = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat.cols-1; + roirows = mat.rows-1; + srcx = 1; + srcy = 1; + dstx = 1; + dsty =1; + }else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + }; + + mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + } + +}; + +struct Laplacian : LaplacianTestBase {}; + +TEST_P(Laplacian, Accuracy) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat = mat_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat = mat_roi; + + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1); + }; +#endif +} + + +///////////////////////////////////////////////////////////////////////////////////////////////// +// erode & dilate + +PARAM_TEST_CASE(ErodeDilateBase, MatType, bool) +{ + int type; + //int iterations; + + //erode or dilate kernel + cv::Mat kernel; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + // iterations = GET_PARAM(1); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(2560, 2560); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3)); + kernel = randomMat(rng, Size(3,3), CV_8UC1, 0, 3, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols-1; + roirows = mat1.rows-1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty =1; + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + } + +}; + +// erode + +struct Erode : ErodeDilateBase{}; + +TEST_P(Erode, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::erode(mat1_roi, dst_roi, kernel); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + + t2=(double)cvGetTickCount();//kernel + cv::ocl::erode(gmat1, gdst, kernel); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::erode(gmat1, gdst, kernel); + }; +#endif + +} + +// dilate + +struct Dilate : ErodeDilateBase{}; + +TEST_P(Dilate, Mat) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + t0 = (double)cvGetTickCount();//cpu start + cv::dilate(mat1_roi, dst_roi, kernel); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::dilate(gmat1, gdst, kernel); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::dilate(gmat1, gdst, kernel); + }; +#endif + +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +// Sobel + +PARAM_TEST_CASE(Sobel, MatType, int, int, int, int) +{ + int type; + int dx, dy, ksize, bordertype; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + dx = GET_PARAM(1); + dy = GET_PARAM(2); + ksize = GET_PARAM(3); + bordertype = GET_PARAM(4); + dx = 2; dy=0; + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(2560, 2560); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols-1; + roirows = mat1.rows-1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty =1; + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + } + +}; + +TEST_P(Sobel, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype); + }; +#endif + +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +// Scharr + +PARAM_TEST_CASE(Scharr, MatType, int, int, int) +{ + int type; + int dx, dy, bordertype; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + dx = GET_PARAM(1); + dy = GET_PARAM(2); + bordertype = GET_PARAM(3); + dx = 1; dy=0; + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(2560, 2560); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols-1; + roirows = mat1.rows-1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty =1; + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + } +}; + +TEST_P(Scharr, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype); + }; +#endif + +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +// GaussianBlur + +PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int) +{ + int type; + cv::Size ksize; + int bordertype; + + double sigma1, sigma2; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + bordertype = GET_PARAM(2); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(2560, 2560); + + sigma1 = rng.uniform(0.1, 1.0); + sigma2 = rng.uniform(0.1, 1.0); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols-1; + roirows = mat1.rows-1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty =1; + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + } + +}; + +TEST_P(GaussianBlur, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype); + }; +#endif + +} + +//************test********** + +INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(cv::Size(3, 3)/*, cv::Size(5, 5), cv::Size(7, 7)*/), + Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); + + +INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(1/*, 3*/))); + +//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); + +INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); + +//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); + +INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); + + +INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(1, 2), Values(0, 1), Values(3, 5, 7), Values((MatType)cv::BORDER_CONSTANT, + (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); + + +INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1), + Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); + +INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)), + Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); + + +#endif // HAVE_OPENCL diff --git a/modules/ocl/perf/test_haar.cpp b/modules/ocl/perf/test_haar.cpp new file mode 100644 index 0000000..8aabd67 --- /dev/null +++ b/modules/ocl/perf/test_haar.cpp @@ -0,0 +1,198 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/objdetect/objdetect.hpp" +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cvtest; +using namespace testing; +using namespace std; +using namespace cv; + +struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; + +PARAM_TEST_CASE(HaarTestBase, int, int) +{ + std::vector oclinfo; + cv::ocl::OclCascadeClassifier cascade, nestedCascade; + cv::CascadeClassifier cpucascade, cpunestedCascade; +// Mat img; + + double scale; + int index; + + virtual void SetUp() + { + scale = 1.1; + +#if WIN32 + string cascadeName="E:\\opencvbuffer\\trunk\\data\\haarcascades\\haarcascade_frontalface_alt.xml"; +#else + string cascadeName="../data/haarcascades/haarcascade_frontalface_alt.xml"; +#endif + + if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName))) + { + cout << "ERROR: Could not load classifier cascade" << endl; + cout << "Usage: facedetect [--cascade=]\n" + " [--nested-cascade[=nested_cascade_path]]\n" + " [--scale[=\n" + " [filename|camera_index]\n" << endl ; + + return; + } + int devnums = getDevice(oclinfo); + CV_Assert(devnums>0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath("E:\\"); + } +}; + +////////////////////////////////faceDetect///////////////////////////////////////////////// + +struct Haar : HaarTestBase {}; + +TEST_P(Haar, FaceDetect) +{ + for(int index = 1;index < 2; index++) + { + Mat img; + char buff[256]; +#if WIN32 + sprintf(buff,"E:\\myDataBase\\%d.jpg",index); + img = imread( buff, 1 ); +#else + sprintf(buff,"%d.jpg",index); + img = imread( buff, 1 ); + std::cout << "Now test " << index << ".jpg" < faces; + + const static Scalar colors[] = { CV_RGB(0,0,255), + CV_RGB(0,128,255), + CV_RGB(0,255,255), + CV_RGB(0,255,0), + CV_RGB(255,128,0), + CV_RGB(255,255,0), + CV_RGB(255,0,0), + CV_RGB(255,0,255)} ; + + Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); + MemStorage storage(cvCreateMemStorage(0)); + cvtColor( img, gray, CV_BGR2GRAY ); + resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); + equalizeHist( smallImg, smallImg ); + CvMat _image = smallImg; + + Mat tempimg(&_image, false); + + cv::ocl::oclMat image(tempimg); + CvSeq* _objects; + +#if 1 + for(int k= 0; k<10; k++) + { + t = (double)cvGetTickCount(); + _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, + 2, 0 + |CV_HAAR_SCALE_IMAGE + , Size(30,30), Size(0, 0) ); + + t = (double)cvGetTickCount() - t ; + printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); + } + +#else + cpucascade.detectMultiScale( image, faces, 1.1, + 2, 0 + |CV_HAAR_SCALE_IMAGE + , Size(30,30), Size(0, 0) ); + +#endif + vector vecAvgComp; + Seq(_objects).copyTo(vecAvgComp); + faces.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect()); + + for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) + { + Mat smallImgROI; + vector nestedObjects; + Point center; + Scalar color = colors[i%8]; + int radius; + center.x = cvRound((r->x + r->width*0.5)*scale); + center.y = cvRound((r->y + r->height*0.5)*scale); + radius = cvRound((r->width + r->height)*0.25*scale); + circle( img, center, radius, color, 3, 8, 0 ); + } + +#if WIN32 + sprintf(buff,"E:\\result1\\%d.jpg",index); + imwrite(buff,img); +#else + sprintf(buff,"testdet_%d.jpg",index); + imwrite(buff,img); +#endif + } +} + + +//INSTANTIATE_TEST_CASE_P(HaarTestBase, Haar, Combine(Values(1), +// Values(1))); + + +#endif // HAVE_OPENCL diff --git a/modules/ocl/perf/test_imgproc.cpp b/modules/ocl/perf/test_imgproc.cpp new file mode 100644 index 0000000..e01e976 --- /dev/null +++ b/modules/ocl/perf/test_imgproc.cpp @@ -0,0 +1,1551 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Niko Li, newlife20080214@gmail.com +// Jia Haipeng, jiahaipeng95@gmail.com +// Shengen Yan, yanshengen@gmail.com +// Jiang Liyuan, lyuan001.good@163.com +// Rock Li, Rock.Li@amd.com +// Zailong Wu, bullet@yeah.net +// Xu Pang, pangxu010@163.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cvtest; +using namespace testing; +using namespace std; + + +MatType nulltype = -1; + +#define ONE_TYPE(type) testing::ValuesIn(typeVector(type)) +#define NULL_TYPE testing::ValuesIn(typeVector(nulltype)) + + +vector typeVector(MatType type) +{ + vector v; + v.push_back(type); + return v; +} + + +PARAM_TEST_CASE(ImgprocTestBase, MatType,MatType,MatType,MatType,MatType, bool) +{ + int type1,type2,type3,type4,type5; + cv::Scalar val; + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int dstx; + int dsty; + int dst1x; + int dst1y; + int maskx; + int masky; + + //mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mask; + cv::Mat dst; + cv::Mat dst1; //bak, for two outputs + + //mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + cv::Mat dst1_roi; //bak + std::vector oclinfo; + //ocl mat + cv::ocl::oclMat clmat1; + cv::ocl::oclMat clmat2; + cv::ocl::oclMat clmask; + cv::ocl::oclMat cldst; + cv::ocl::oclMat cldst1; //bak + + //ocl mat with roi + cv::ocl::oclMat clmat1_roi; + cv::ocl::oclMat clmat2_roi; + cv::ocl::oclMat clmask_roi; + cv::ocl::oclMat cldst_roi; + cv::ocl::oclMat cldst1_roi; + + virtual void SetUp() + { + type1 = GET_PARAM(0); + type2 = GET_PARAM(1); + type3 = GET_PARAM(2); + type4 = GET_PARAM(3); + type5 = GET_PARAM(4); + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + double min = 1,max = 20; + int devnums = getDevice(oclinfo); + CV_Assert(devnums>0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + if(type1!=nulltype) + { + mat1 = randomMat(rng, size, type1, min, max, false); + clmat1 = mat1; + } + if(type2!=nulltype) + { + mat2 = randomMat(rng, size, type2, min, max, false); + clmat2 = mat2; + } + if(type3!=nulltype) + { + dst = randomMat(rng, size, type3, min, max, false); + cldst = dst; + } + if(type4!=nulltype) + { + dst1 = randomMat(rng, size, type4, min, max, false); + cldst1 = dst1; + } + if(type5!=nulltype) + { + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + cv::threshold(mask, mask, 0.5, 255., type5); + clmask = mask; + } + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + } + + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols-1; //start + roirows = mat1.rows-1; + src1x = 1; + src2x = 1; + src1y = 1; + src2y = 1; + dstx = 1; + dsty =1; + dst1x = 1; + dst1y =1; + maskx =1; + masky =1; + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src2x = 0; + src1y = 0; + src2y = 0; + dstx = 0; + dsty = 0; + dst1x =0; + dst1y =0; + maskx =0; + masky =0; + }; + + if(type1!=nulltype) + { + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); + } + if(type2!=nulltype) + { + mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); + //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); + } + if(type3!=nulltype) + { + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); + } + if(type4!=nulltype) + { + dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows)); + //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); + } + if(type5!=nulltype) + { + mask_roi = mask(Rect(maskx,masky,roicols,roirows)); + //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); + } + } + + void random_roi() + { + cv::RNG& rng = TS::ptr()->get_rng(); + + //randomize ROI + roicols = rng.uniform(1, mat1.cols); + roirows = rng.uniform(1, mat1.rows); + src1x = rng.uniform(0, mat1.cols - roicols); + src1y = rng.uniform(0, mat1.rows - roirows); + src2x = rng.uniform(0, mat2.cols - roicols); + src2y = rng.uniform(0, mat2.rows - roirows); + dstx = rng.uniform(0, dst.cols - roicols); + dsty = rng.uniform(0, dst.rows - roirows); + dst1x = rng.uniform(0, dst1.cols - roicols); + dst1y = rng.uniform(0, dst1.rows - roirows); + maskx = rng.uniform(0, mask.cols - roicols); + masky = rng.uniform(0, mask.rows - roirows); + + if(type1!=nulltype) + { + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); + } + if(type2!=nulltype) + { + mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); + //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); + } + if(type3!=nulltype) + { + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); + } + if(type4!=nulltype) + { + dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows)); + //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); + } + if(type5!=nulltype) + { + mask_roi = mask(Rect(maskx,masky,roicols,roirows)); + //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); + } + } +}; +////////////////////////////////equalizeHist////////////////////////////////////////// + +struct equalizeHist : ImgprocTestBase {}; + +TEST_P(equalizeHist, MatType) +{ + if (mat1.type() != CV_8UC1 || mat1.type() != dst.type()) + { + cout<<"Unsupported type"< oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + //dsize = GET_PARAM(1); + interpolation = GET_PARAM(1); + + cv::RNG& rng = TS::ptr()->get_rng(); + size = cv::Size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + src_roicols = mat1.cols-1; //start + src_roirows = mat1.rows-1; + dst_roicols=dst.cols-1; + dst_roirows=dst.rows-1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty =1; + + }else + { + src_roicols = mat1.cols; + src_roirows = mat1.rows; + dst_roicols=dst.cols; + dst_roirows=dst.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + + }; + mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows)); + dst_roi = dst(Rect(dstx,dsty,dst_roicols,dst_roirows)); + + + } + +}; + +/////warpAffine + +struct WarpAffine : WarpTestBase{}; + +TEST_P(WarpAffine, Mat) +{ + static const double coeffs[2][3] = + { + {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, + {sin(3.14 / 6), cos(3.14 / 6), -100.0} + }; + Mat M(2, 3, CV_64F, (void*)coeffs); + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation); + }; +#endif + +} + + +// warpPerspective + +struct WarpPerspective : WarpTestBase{}; + +TEST_P(WarpPerspective, Mat) +{ + static const double coeffs[3][3] = + { + {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, + {sin(3.14 / 6), cos(3.14 / 6), -100.0}, + {0.0, 0.0, 1.0} + }; + Mat M(3, 3, CV_64F, (void*)coeffs); + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation); + }; +#endif + +} + + +///////////////////////////////////////////////////////////////////////////////////////////////// +// resize + +PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) +{ + int type; + cv::Size dsize; + double fx, fy; + int interpolation; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int src_roicols; + int src_roirows; + int dst_roicols; + int dst_roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + dsize = GET_PARAM(1); + fx = GET_PARAM(2); + fy = GET_PARAM(3); + interpolation = GET_PARAM(4); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + if(dsize == cv::Size() && !(fx > 0 && fy > 0)) + { + cout << "invalid dsize and fx fy" << endl; + return; + } + + if(dsize == cv::Size()) + { + dsize.width = (int)(size.width * fx); + dsize.height = (int)(size.height * fy); + } + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, dsize, type, 5, 16, false); + + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + src_roicols = mat1.cols-1; //start + src_roirows = mat1.rows-1; + dst_roicols=dst.cols-1; + dst_roirows=dst.rows-1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty =1; + + }else + { + src_roicols = mat1.cols; + src_roirows = mat1.rows; + dst_roicols=dst.cols; + dst_roirows=dst.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + + }; + mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows)); + dst_roi = dst(Rect(dstx,dsty,dst_roicols,dst_roirows)); + + + } + +}; + +TEST_P(Resize, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows)); + + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows)); + gmat1 = mat1_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); + }; +#endif + +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +//threshold + +PARAM_TEST_CASE(Threshold, MatType, ThreshOp) +{ + int type; + int threshOp; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + threshOp = GET_PARAM(1); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols-1; //start + roirows = mat1.rows-1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty =1; + + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + + }; + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + + } +}; + +TEST_P(Threshold, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + double maxVal = randomDouble(20.0, 127.0); + double thresh = randomDouble(0.0, maxVal); + t0 = (double)cvGetTickCount();//cpu start + cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + double maxVal = randomDouble(20.0, 127.0); + double thresh = randomDouble(0.0, maxVal); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + gmat1 = mat1_roi; + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); + }; +#endif + +} +/////////////////////////////////////////////////////////////////////////////////////////////////// +//meanShift + +PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria) +{ + int type, typeCoor; + int sp, sr; + cv::TermCriteria crit; + //src mat + cv::Mat src; + cv::Mat dst; + cv::Mat dstCoor; + + //set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dstx; + int dsty; + + //src mat with roi + cv::Mat src_roi; + cv::Mat dst_roi; + cv::Mat dstCoor_roi; + + //ocl dst mat + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdstCoor; + + std::vector oclinfo; + //ocl mat with roi + cv::ocl::oclMat gsrc_roi; + cv::ocl::oclMat gdst_roi; + cv::ocl::oclMat gdstCoor_roi; + + virtual void SetUp() + { + type = GET_PARAM(0); + typeCoor = GET_PARAM(1); + sp = GET_PARAM(2); + sr = GET_PARAM(3); + crit = GET_PARAM(4); + + cv::RNG &rng = TS::ptr()->get_rng(); + + // MWIDTH=256, MHEIGHT=256. defined in utility.hpp + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + + src = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + dstCoor = randomMat(rng, size, typeCoor, 5, 16, false); + + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + //randomize ROI + roicols = src.cols - 1; + roirows = src.rows - 1; + srcx = 1; + srcy = 1; + dstx = 1; + dsty = 1; + }else + { + roicols = src.cols; + roirows = src.rows; + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + }; + + src_roi = src(Rect(srcx, srcy, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows)); + + gdst = dst; + gdstCoor = dstCoor; + } +}; + +/////////////////////////meanShiftFiltering///////////////////////////// +struct meanShiftFiltering : meanShiftTestBase {}; + +TEST_P(meanShiftFiltering, Mat) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double t1=0; + double t2=0; + for(int k=0;k<2;k++) + { + double totalgputick=0; + double totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t1 = (double)cvGetTickCount();//gpu start1 + + gsrc_roi = src_roi; + gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi + + t2=(double)cvGetTickCount();//kernel + cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_gdst; + gdst.download(cpu_gdst);//download + + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + + gsrc_roi = src_roi; + gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); + }; +#endif + +} + +///////////////////////////meanShiftProc////////////////////////////////// +struct meanShiftProc : meanShiftTestBase {}; + +TEST_P(meanShiftProc, Mat) +{ + +#ifndef PRINT_KERNEL_RUN_TIME + double t1=0; + double t2=0; + for(int k=0;k<2;k++) + { + double totalgputick=0; + double totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t1 = (double)cvGetTickCount();//gpu start1 + + gsrc_roi = src_roi; + gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi + gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); + + t2=(double)cvGetTickCount();//kernel + cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_gdstCoor; + gdstCoor.download(cpu_gdstCoor);//download + + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + + gsrc_roi = src_roi; + gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi + gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); + }; +#endif + +} + + + +//************test******************* + +INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( + ONE_TYPE(CV_8UC1), + NULL_TYPE, + ONE_TYPE(CV_8UC1), + NULL_TYPE, + NULL_TYPE, + Values(false))); // Values(false) is the reserved parameter + +//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine( +// ONE_TYPE(CV_8UC1), +// NULL_TYPE, +// ONE_TYPE(CV_8UC1), +// NULL_TYPE, +// NULL_TYPE, +// Values(false))); // Values(false) is the reserved parameter +// +// +//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( +// Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/), +// NULL_TYPE, +// Values(CV_8UC1,CV_8UC4/*,CV_32SC1*/), +// NULL_TYPE, +// NULL_TYPE, +// Values(false))); // Values(false) is the reserved parameter + +//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine( +// Values(CV_8UC1,CV_32FC1), +// NULL_TYPE, +// ONE_TYPE(CV_32FC1), +// NULL_TYPE, +// NULL_TYPE, +// Values(false))); // Values(false) is the reserved parameter +// +//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine( +// Values(CV_8UC1,CV_32FC1), +// NULL_TYPE, +// ONE_TYPE(CV_32FC1), +// NULL_TYPE, +// NULL_TYPE, +// Values(false))); // Values(false) is the reserved parameter + + +INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine( + ONE_TYPE(CV_8UC1), + NULL_TYPE, + ONE_TYPE(CV_32SC1), + ONE_TYPE(CV_32FC1), + NULL_TYPE, + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, + (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), + (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); + + +INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine + (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, + (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), + (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); + + +INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()), + Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR))); + + +INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine( + Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY), + ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), + ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV)))); + +INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine( + ONE_TYPE(CV_8UC4), + ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering + Values(5), + Values(6), + Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1)) + )); + +INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine( + ONE_TYPE(CV_8UC4), + ONE_TYPE(CV_16SC2), + Values(5), + Values(6), + Values(cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 5, 1)) + )); + + +#endif // HAVE_OPENCL diff --git a/modules/ocl/perf/test_matrix_operation.cpp b/modules/ocl/perf/test_matrix_operation.cpp new file mode 100644 index 0000000..cc9a142 --- /dev/null +++ b/modules/ocl/perf/test_matrix_operation.cpp @@ -0,0 +1,616 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cvtest; +using namespace testing; +using namespace std; +using namespace cv::ocl; +////////////////////////////////converto///////////////////////////////////////////////// +PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType) +{ + int type; + int dst_type; + + //src mat + cv::Mat mat; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + dst_type = GET_PARAM(1); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat.cols-1; //start + roirows = mat.rows-1; + srcx = 1; + srcy = 1; + dstx = 1; + dsty =1; + }else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + }; + + mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + //gdst_whole = dst; + //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + //gmat = mat_roi; + } +}; + + +struct ConvertTo :ConvertToTestBase {}; + +TEST_P(ConvertTo, Accuracy) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + mat_roi.convertTo(dst_roi, dst_type); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat = mat_roi; + t2=(double)cvGetTickCount();//kernel + gmat.convertTo(gdst, dst_type); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat = mat_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + gmat.convertTo(gdst, dst_type); + }; +#endif + +} + + +///////////////////////////////////////////copyto///////////////////////////////////////////////////////////// + +PARAM_TEST_CASE(CopyToTestBase, MatType, bool) +{ + int type; + + cv::Mat mat; + cv::Mat mask; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dstx; + int dsty; + int maskx; + int masky; + + //src mat with roi + cv::Mat mat_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat.cols-1; //start + roirows = mat.rows-1; + srcx = 1; + srcy = 1; + dstx = 1; + dsty =1; + maskx = 1; + masky = 1; + }else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + maskx = 0; + masky = 0; + }; + + mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); + mask_roi = mask(Rect(maskx,masky,roicols,roirows)); + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + + //gdst_whole = dst; + //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + //gmat = mat_roi; + //gmask = mask_roi; + } +}; + +struct CopyTo :CopyToTestBase {}; + +TEST_P(CopyTo, Without_mask) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + mat_roi.copyTo(dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat = mat_roi; + t2=(double)cvGetTickCount();//kernel + gmat.copyTo(gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat = mat_roi; + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + gmat.copyTo(gdst); + }; +#endif +} + +TEST_P(CopyTo, With_mask) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + mat_roi.copyTo(dst_roi,mask_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat = mat_roi; + gmask = mask_roi; + t2=(double)cvGetTickCount();//kernel + gmat.copyTo(gdst, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + gmat = mat_roi; + gmask = mask_roi; + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + gmat.copyTo(gdst, gmask); + }; +#endif +} + +///////////////////////////////////////////copyto///////////////////////////////////////////////////////////// + +PARAM_TEST_CASE(SetToTestBase, MatType, bool) +{ + int type; + cv::Scalar val; + + cv::Mat mat; + cv::Mat mask; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int maskx; + int masky; + + //src mat with roi + cv::Mat mat_roi; + cv::Mat mask_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gmat_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat.cols-1; //start + roirows = mat.rows-1; + srcx = 1; + srcy = 1; + maskx = 1; + masky = 1; + }else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + maskx = 0; + masky = 0; + }; + + mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); + mask_roi = mask(Rect(maskx,masky,roicols,roirows)); + + //gmat_whole = mat; + //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); + + //gmask = mask_roi; + } +}; + +struct SetTo :SetToTestBase {}; + +TEST_P(SetTo, Without_mask) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + mat_roi.setTo(val); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat_whole = mat; + gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); + t2=(double)cvGetTickCount();//kernel + gmat.setTo(val); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gmat_whole.download(cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gmat_whole = mat; + gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + gmat.setTo(val); + }; +#endif +} + +TEST_P(SetTo, With_mask) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + mat_roi.setTo(val, mask_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat_whole = mat; + gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); + + gmask = mask_roi; + t2=(double)cvGetTickCount();//kernel + gmat.setTo(val, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gmat_whole.download(cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gmat_whole = mat; + gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); + + gmask = mask_roi; + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + gmat.setTo(val, gmask); + }; +#endif +} + +//**********test************ + +INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4))); + +INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter +#endif diff --git a/modules/ocl/perf/test_split_merge.cpp b/modules/ocl/perf/test_split_merge.cpp new file mode 100644 index 0000000..e3e8ee4 --- /dev/null +++ b/modules/ocl/perf/test_split_merge.cpp @@ -0,0 +1,455 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_OPENCL + +using namespace cvtest; +using namespace testing; +using namespace std; +using namespace cv::ocl; +PARAM_TEST_CASE(MergeTestBase, MatType, int) +{ + int type; + int channels; + + //src mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mat3; + cv::Mat mat4; + + //dst mat + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int src3x; + int src3y; + int src4x; + int src4y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mat3_roi; + cv::Mat mat4_roi; + + //dst mat with roi + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gmat3; + cv::ocl::oclMat gmat4; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + channels = GET_PARAM(1); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + setBinpath(CLBINPATH); + } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols-1; //start + roirows = mat1.rows-1; + src1x = 1; + src1y = 1; + src2x = 1; + src2y = 1; + src3x = 1; + src3y = 1; + src4x = 1; + src4y = 1; + dstx = 1; + dsty =1; + + }else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + src2x = 0; + src2y = 0; + src3x = 0; + src3y = 0; + src4x = 0; + src4y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); + mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); + mat3_roi = mat3(Rect(src3x,src3y,roicols,roirows)); + mat4_roi = mat4(Rect(src4x,src4y,roicols,roirows)); + + + dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); + } + +}; + +struct Merge : MergeTestBase {}; + +TEST_P(Merge, Accuracy) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + std::vector dev_src; + dev_src.push_back(mat1_roi); + dev_src.push_back(mat2_roi); + dev_src.push_back(mat3_roi); + dev_src.push_back(mat4_roi); + t0 = (double)cvGetTickCount();//cpu start + cv::merge(dev_src, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 ] + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmat3 = mat3_roi; + gmat4 = mat4_roi; + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + std::vector dev_gsrc; + dev_gsrc.push_back(gmat1); + dev_gsrc.push_back(gmat2); + dev_gsrc.push_back(gmat3); + dev_gsrc.push_back(gmat4); + t2=(double)cvGetTickCount();//kernel + cv::ocl::merge(dev_gsrc, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmat3 = mat3_roi; + gmat4 = mat4_roi; + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + std::vector dev_gsrc; + dev_gsrc.push_back(gmat1); + dev_gsrc.push_back(gmat2); + dev_gsrc.push_back(gmat3); + dev_gsrc.push_back(gmat4); + + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::merge(dev_gsrc, gdst); + }; +#endif +} + + +PARAM_TEST_CASE(SplitTestBase, MatType, int) +{ + int type; + int channels; + + //src mat + cv::Mat mat; + + //dstmat + cv::Mat dst1; + cv::Mat dst2; + cv::Mat dst3; + cv::Mat dst4; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dst1x; + int dst1y; + int dst2x; + int dst2y; + int dst3x; + int dst3y; + int dst4x; + int dst4y; + + //src mat with roi + cv::Mat mat_roi; + + //dst mat with roi + cv::Mat dst1_roi; + cv::Mat dst2_roi; + cv::Mat dst3_roi; + cv::Mat dst4_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst1_whole; + cv::ocl::oclMat gdst2_whole; + cv::ocl::oclMat gdst3_whole; + cv::ocl::oclMat gdst4_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gdst1; + cv::ocl::oclMat gdst2; + cv::ocl::oclMat gdst3; + cv::ocl::oclMat gdst4; + + virtual void SetUp() + { + type = GET_PARAM(0); + channels = GET_PARAM(1); + + cv::RNG& rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); + dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + int devnums = getDevice(oclinfo); + CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat.cols-1; //start + roirows = mat.rows-1; + srcx = 1; + srcx = 1; + dst1x = 1; + dst1y =1; + dst2x = 1; + dst2y =1; + dst3x = 1; + dst3y =1; + dst4x = 1; + dst4y =1; + }else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + dst1x = 0; + dst1y = 0; + dst2x = 0; + dst2y =0; + dst3x = 0; + dst3y =0; + dst4x = 0; + dst4y =0; + }; + + mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); + + dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows)); + dst2_roi = dst2(Rect(dst2x,dst2y,roicols,roirows)); + dst3_roi = dst3(Rect(dst3x,dst3y,roicols,roirows)); + dst4_roi = dst4(Rect(dst4x,dst4y,roicols,roirows)); + } + +}; + +struct Split :SplitTestBase {}; + +TEST_P(Split, Accuracy) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick=0; + double totalgputick=0; + double totalgputick_kernel=0; + double t0=0; + double t1=0; + double t2=0; + for(int k=0;k<2;k++){ + totalcputick=0; + totalgputick=0; + totalgputick_kernel=0; + for(int j = 0; j < LOOP_TIMES+1; j ++) + { + Has_roi(k); + cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi}; + cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4}; + t0 = (double)cvGetTickCount();//cpu start + cv::split(mat_roi, dev_dst); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows)); + + gdst2_whole = dst2; + gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows)); + + gdst3_whole = dst3; + gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows)); + + gdst4_whole = dst4; + gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows)); + + gmat = mat_roi; + t2=(double)cvGetTickCount();//kernel + cv::ocl::split(gmat, dev_gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst1; + cv::Mat cpu_dst2; + cv::Mat cpu_dst3; + cv::Mat cpu_dst4; + gdst1_whole.download(cpu_dst1); + gdst2_whole.download(cpu_dst2); + gdst3_whole.download(cpu_dst3); + gdst4_whole.download(cpu_dst4); + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick=t1+totalgputick; + totalcputick=t0+totalcputick; + totalgputick_kernel=t2+totalgputick_kernel; + + } + if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; + cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi}; + cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4}; + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows)); + + gdst2_whole = dst2; + gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows)); + + gdst3_whole = dst3; + gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows)); + + gdst4_whole = dst4; + gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows)); + gmat = mat_roi; + if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; + cv::ocl::split(gmat, dev_gdst); + }; +#endif +} + +//*************test***************** +INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine( + Values(CV_8UC4, CV_32FC4), Values(1, 4))); + +INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine( + Values(CV_8U, CV_32S, CV_32F), Values(1, 4))); + +#endif // HAVE_OPENCL diff --git a/modules/ocl/perf/utility.cpp b/modules/ocl/perf/utility.cpp new file mode 100644 index 0000000..417f72f --- /dev/null +++ b/modules/ocl/perf/utility.cpp @@ -0,0 +1,265 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#define VARNAME(A) #A +using namespace std; +using namespace cv; +using namespace cv::gpu; +using namespace cvtest; + + +//std::string generateVarList(int first,...) +//{ +// vector varname; +// +// va_list argp; +// string s; +// stringstream ss; +// va_start(argp,first); +// int i=first; +// while(i!=-1) +// { +// ss<get_rng(); + return rng.uniform(minVal, maxVal); +} + +double randomDouble(double minVal, double maxVal) +{ + RNG& rng = TS::ptr()->get_rng(); + return rng.uniform(minVal, maxVal); +} + +Size randomSize(int minVal, int maxVal) +{ + return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal)); +} + +Scalar randomScalar(double minVal, double maxVal) +{ + return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal)); +} + +Mat randomMat(Size size, int type, double minVal, double maxVal) +{ + return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); +} + + + + + + + +/* +void showDiff(InputArray gold_, InputArray actual_, double eps) +{ + Mat gold; + if (gold_.kind() == _InputArray::MAT) + gold = gold_.getMat(); + else + gold_.getGpuMat().download(gold); + + Mat actual; + if (actual_.kind() == _InputArray::MAT) + actual = actual_.getMat(); + else + actual_.getGpuMat().download(actual); + + Mat diff; + absdiff(gold, actual, diff); + threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY); + + namedWindow("gold", WINDOW_NORMAL); + namedWindow("actual", WINDOW_NORMAL); + namedWindow("diff", WINDOW_NORMAL); + + imshow("gold", gold); + imshow("actual", actual); + imshow("diff", diff); + + waitKey(); +} +*/ + +/* +bool supportFeature(const DeviceInfo& info, FeatureSet feature) +{ + return TargetArchs::builtWith(feature) && info.supports(feature); +} + +const vector& devices() +{ + static vector devs; + static bool first = true; + + if (first) + { + int deviceCount = getCudaEnabledDeviceCount(); + + devs.reserve(deviceCount); + + for (int i = 0; i < deviceCount; ++i) + { + DeviceInfo info(i); + if (info.isCompatible()) + devs.push_back(info); + } + + first = false; + } + + return devs; +} + +vector devices(FeatureSet feature) +{ + const vector& d = devices(); + + vector devs_filtered; + + if (TargetArchs::builtWith(feature)) + { + devs_filtered.reserve(d.size()); + + for (size_t i = 0, size = d.size(); i < size; ++i) + { + const DeviceInfo& info = d[i]; + + if (info.supports(feature)) + devs_filtered.push_back(info); + } + } + + return devs_filtered; +} +*/ + +vector types(int depth_start, int depth_end, int cn_start, int cn_end) +{ + vector v; + + v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1)); + + for (int depth = depth_start; depth <= depth_end; ++depth) + { + for (int cn = cn_start; cn <= cn_end; ++cn) + { + v.push_back(CV_MAKETYPE(depth, cn)); + } + } + + return v; +} + +const vector& all_types() +{ + static vector v = types(CV_8U, CV_64F, 1, 4); + + return v; +} + +Mat readImage(const string& fileName, int flags) +{ + return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags); +} + +Mat readImageType(const string& fname, int type) +{ + Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR); + if (CV_MAT_CN(type) == 4) + { + Mat temp; + cvtColor(src, temp, cv::COLOR_BGR2BGRA); + swap(src, temp); + } + src.convertTo(src, CV_MAT_DEPTH(type)); + return src; +} + +double checkNorm(const Mat& m) +{ + return norm(m, NORM_INF); +} + +double checkNorm(const Mat& m1, const Mat& m2) +{ + return norm(m1, m2, NORM_INF); +} + +double checkSimilarity(const Mat& m1, const Mat& m2) +{ + Mat diff; + matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED); + return std::abs(diff.at(0, 0) - 1.f); +} + +/* +void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os) +{ + (*os) << info.name(); +} +*/ + +void PrintTo(const Inverse& inverse, std::ostream* os) +{ + if (inverse) + (*os) << "inverse"; + else + (*os) << "direct"; +} diff --git a/modules/ocl/perf/utility.hpp b/modules/ocl/perf/utility.hpp new file mode 100644 index 0000000..0a0bfba --- /dev/null +++ b/modules/ocl/perf/utility.hpp @@ -0,0 +1,177 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_TEST_UTILITY_HPP__ +#define __OPENCV_TEST_UTILITY_HPP__ +//#define PRINT_KERNEL_RUN_TIME +#ifdef PRINT_KERNEL_RUN_TIME +#define LOOP_TIMES 1 +#else +#define LOOP_TIMES 1 +#endif +#define MWIDTH 2557 +#define MHEIGHT 2579 +#define CLBINPATH ".\\" +int randomInt(int minVal, int maxVal); +double randomDouble(double minVal, double maxVal); + +//std::string generateVarList(int first,...); +std::string generateVarList(int& p1,int& p2); +cv::Size randomSize(int minVal, int maxVal); +cv::Scalar randomScalar(double minVal, double maxVal); +cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0); + +void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); + +//! return true if device supports specified feature and gpu module was built with support the feature. +//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature); + +//! return all devices compatible with current gpu module build. +//const std::vector& devices(); +//! return all devices compatible with current gpu module build which support specified feature. +//std::vector devices(cv::gpu::FeatureSet feature); + +//! read image from testdata folder. +cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR); +cv::Mat readImageType(const std::string& fname, int type); + +double checkNorm(const cv::Mat& m); +double checkNorm(const cv::Mat& m1, const cv::Mat& m2); +double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2); + +#define EXPECT_MAT_NORM(mat, eps) \ +{ \ + EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \ +} + +//#define EXPECT_MAT_NEAR(mat1, mat2, eps) \ +//{ \ +// ASSERT_EQ(mat1.type(), mat2.type()); \ +// ASSERT_EQ(mat1.size(), mat2.size()); \ +// EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \ +//} + +#define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \ +{ \ + ASSERT_EQ(mat1.type(), mat2.type()); \ + ASSERT_EQ(mat1.size(), mat2.size()); \ + EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps)< types(int depth_start, int depth_end, int cn_start, int cn_end); + +//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4). +const std::vector& all_types(); + +class Inverse +{ + public: + inline Inverse(bool val = false) : val_(val) {} + + inline operator bool() const { return val_; } + + private: + bool val_; +}; + +void PrintTo(const Inverse& useRoi, std::ostream* os); + +CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE) + +CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX) + + enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; +CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) + +CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN) + + CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T); + +CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT) + +CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV) + +CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC) + +CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP) + +CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP) + +CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED) + +CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT) + +void run_perf_test(); + +#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > + +#define GET_PARAM(k) std::tr1::get< k >(GetParam()) + +#define ALL_DEVICES testing::ValuesIn(devices()) +#define DEVICES(feature) testing::ValuesIn(devices(feature)) + +#define ALL_TYPES testing::ValuesIn(all_types()) +#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end)) + +#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113)) + +#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true)) + +#endif // __OPENCV_TEST_UTILITY_HPP__ -- 2.7.4