From 5bc9f15dda83d1974aff629c8070498e6a302490 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 11 Dec 2013 22:58:05 -0200 Subject: [PATCH] OCL: included sqrt matrix operation. --- modules/ocl/doc/operations_on_matrices.rst | 12 ++++ modules/ocl/include/opencv2/ocl.hpp | 4 ++ modules/ocl/perf/perf_arithm.cpp | 34 +++++++++ modules/ocl/src/arithm.cpp | 11 ++- modules/ocl/src/opencl/arithm_sqrt.cl | 111 +++++++++++++++++++++++++++++ modules/ocl/test/test_arithm.cpp | 17 +++++ 6 files changed, 186 insertions(+), 3 deletions(-) create mode 100644 modules/ocl/src/opencl/arithm_sqrt.cl diff --git a/modules/ocl/doc/operations_on_matrices.rst b/modules/ocl/doc/operations_on_matrices.rst index 7efd719..1763d33 100644 --- a/modules/ocl/doc/operations_on_matrices.rst +++ b/modules/ocl/doc/operations_on_matrices.rst @@ -557,6 +557,18 @@ Returns void The functions split split multi-channel array into separate single-channel arrays. Supports all data types. +ocl::sqrt +------------------ +Returns void + +.. ocv:function:: void ocl::sqrt(const oclMat &src, oclMat &dst) + + :param src: the first source array. + + :param dst: the dst array; must have the same size and same type as ``src``. + +The function ``sqrt`` calculates the square root of each input array element. Supports only ``CV_32FC1`` and ``CV_64F`` data types. + ocl::subtract ------------------ Returns void diff --git a/modules/ocl/include/opencv2/ocl.hpp b/modules/ocl/include/opencv2/ocl.hpp index 1f03170..6a972bc 100644 --- a/modules/ocl/include/opencv2/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl.hpp @@ -559,6 +559,10 @@ namespace cv // supports only CV_32FC1, CV_64FC1 type CV_EXPORTS void log(const oclMat &src, oclMat &dst); + //! computes square root of each matrix element + // supports only CV_32FC1, CV_64FC1 type + CV_EXPORTS void sqrt(const oclMat &src, oclMat &dst); + //! computes magnitude of each (x(i), y(i)) vector // supports only CV_32F, CV_64F type CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude); diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index 2699b44..592c652 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -162,6 +162,40 @@ PERF_TEST_P(LogFixture, Log, OCL_TYPICAL_MAT_SIZES) SANITY_CHECK(dst, eps, ERROR_RELATIVE); } +///////////// SQRT /////////////////////// + +typedef TestBaseWithParam SqrtFixture; + +PERF_TEST_P(SqrtFixture, Sqrt, OCL_TYPICAL_MAT_SIZES) +{ + // getting params + const Size srcSize = GetParam(); + const double eps = 1e-6; + + // creating src data + Mat src(srcSize, CV_32F), dst(srcSize, src.type()); + randu(src, 0, 10); + declare.in(src).out(dst); + + // select implementation + if (RUN_OCL_IMPL) + { + ocl::oclMat oclSrc(src), oclDst(srcSize, src.type()); + + OCL_TEST_CYCLE() cv::ocl::sqrt(oclSrc, oclDst); + + oclDst.download(dst); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() cv::sqrt(src, dst); + } + else + OCL_PERF_ELSE + + SANITY_CHECK(dst, eps, ERROR_RELATIVE); +} + ///////////// Add //////////////////////// typedef Size_MatType AddFixture; diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 08f9dfc..d008e8b 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -839,7 +839,7 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst) //////////////////////////////// exp log ///////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source) +static void arithmetic_exp_log_sqrt_run(const oclMat &src, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source) { Context *clCxt = src.clCxt; if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) @@ -882,12 +882,17 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, String kernel void cv::ocl::exp(const oclMat &src, oclMat &dst) { - arithmetic_exp_log_run(src, dst, "arithm_exp", &arithm_exp); + arithmetic_exp_log_sqrt_run(src, dst, "arithm_exp", &arithm_exp); } void cv::ocl::log(const oclMat &src, oclMat &dst) { - arithmetic_exp_log_run(src, dst, "arithm_log", &arithm_log); + arithmetic_exp_log_sqrt_run(src, dst, "arithm_log", &arithm_log); +} + +void cv::ocl::sqrt(const oclMat &src, oclMat &dst) +{ + arithmetic_exp_log_sqrt_run(src, dst, "arithm_sqrt", &arithm_sqrt); } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/opencl/arithm_sqrt.cl b/modules/ocl/src/opencl/arithm_sqrt.cl new file mode 100644 index 0000000..142a52a --- /dev/null +++ b/modules/ocl/src/opencl/arithm_sqrt.cl @@ -0,0 +1,111 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peter Andreas Entschev, peter@entschev.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////LOG///////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////////////////////// + +__kernel void arithm_sqrt_C1(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < cols1 && y < rows) + { + int srcIdx = mad24(y, srcStep1, x + srcOffset1); + int dstIdx = mad24(y, dstStep1, x + dstOffset1); + + dst[dstIdx] = sqrt(src[srcIdx]); + } +} + +__kernel void arithm_sqrt_C2(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) +{ + int x1 = get_global_id(0) << 1; + int y = get_global_id(1); + + if(x1 < cols1 && y < rows) + { + int srcIdx = mad24(y, srcStep1, x1 + srcOffset1); + int dstIdx = mad24(y, dstStep1, x1 + dstOffset1); + + dst[dstIdx] = sqrt(src[srcIdx]); + dst[dstIdx + 1] = x1 + 1 < cols1 ? sqrt(src[srcIdx + 1]) : dst[dstIdx + 1]; + } +} + +__kernel void arithm_sqrt_C4(__global srcT *src, __global srcT *dst, + int cols1, int rows, + int srcOffset1, int dstOffset1, + int srcStep1, int dstStep1) +{ + int x1 = get_global_id(0) << 2; + int y = get_global_id(1); + + if(x1 < cols1 && y < rows) + { + int srcIdx = mad24(y, srcStep1, x1 + srcOffset1); + int dstIdx = mad24(y, dstStep1, x1 + dstOffset1); + + dst[dstIdx] = sqrt(src[srcIdx]); + dst[dstIdx + 1] = x1 + 1 < cols1 ? sqrt(src[srcIdx + 1]) : dst[dstIdx + 1]; + dst[dstIdx + 2] = x1 + 2 < cols1 ? sqrt(src[srcIdx + 2]) : dst[dstIdx + 2]; + dst[dstIdx + 3] = x1 + 3 < cols1 ? sqrt(src[srcIdx + 3]) : dst[dstIdx + 3]; + } +} diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index 1726058..bf37afd 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -278,6 +278,22 @@ OCL_TEST_P(Log, Mat) } } +//////////////////////////////// Sqrt //////////////////////////////////////////////// + +typedef ArithmTestBase Sqrt; + +OCL_TEST_P(Sqrt, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + cv::sqrt(src1_roi, dst1_roi); + cv::ocl::sqrt(gsrc1_roi, gdst1_roi); + Near(1); + } +} + //////////////////////////////// Add ///////////////////////////////////////////////// typedef ArithmTestBase Add; @@ -1569,6 +1585,7 @@ OCL_TEST_P(Repeat, Mat) INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool(), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); -- 2.7.4