From 6ad4823f0c421fd3458817d347b8af10867a587a Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 22 Jan 2014 19:13:29 +0400 Subject: [PATCH] ported superres to T-API --- modules/core/include/opencv2/core/mat.hpp | 2 +- .../core/include/opencv2/core/opencl/ocl_defs.hpp | 2 + modules/core/src/matrix.cpp | 12 +- modules/superres/CMakeLists.txt | 3 +- modules/superres/include/opencv2/superres.hpp | 2 + modules/superres/perf/perf_superres.cpp | 67 +- modules/superres/perf/perf_superres_ocl.cpp | 143 ---- modules/superres/src/btv_l1.cpp | 611 +++++++++++++++-- modules/superres/src/btv_l1_ocl.cpp | 725 --------------------- modules/superres/src/frame_source.cpp | 15 +- modules/superres/src/input_array_utility.cpp | 149 ++--- modules/superres/src/input_array_utility.hpp | 9 +- modules/superres/src/opencl/superres_btvl1.cl | 189 +++--- modules/superres/src/optical_flow.cpp | 342 ++-------- modules/superres/src/precomp.hpp | 4 - modules/superres/src/super_resolution.cpp | 5 + modules/superres/test/test_superres.cpp | 51 +- modules/ts/include/opencv2/ts/ocl_perf.hpp | 4 + 18 files changed, 861 insertions(+), 1474 deletions(-) delete mode 100644 modules/superres/perf/perf_superres_ocl.cpp delete mode 100644 modules/superres/src/btv_l1_ocl.cpp diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index dcbac6b..d9f06cb 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -217,7 +217,7 @@ public: virtual void createSameSize(const _InputArray& arr, int mtype) const; virtual void release() const; virtual void clear() const; - virtual void setTo(const _InputArray& value) const; + virtual void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const; }; diff --git a/modules/core/include/opencv2/core/opencl/ocl_defs.hpp b/modules/core/include/opencv2/core/opencl/ocl_defs.hpp index 55abd7c..cec5846 100644 --- a/modules/core/include/opencv2/core/opencl/ocl_defs.hpp +++ b/modules/core/include/opencv2/core/opencl/ocl_defs.hpp @@ -5,6 +5,8 @@ // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. +//#define CV_OPENCL_RUN_VERBOSE + #ifdef HAVE_OPENCL #ifdef CV_OPENCL_RUN_VERBOSE diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index af2ca7d..510b178 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -2560,7 +2560,7 @@ cuda::CudaMem& _OutputArray::getCudaMemRef() const return *(cuda::CudaMem*)obj; } -void _OutputArray::setTo(const _InputArray& arr) const +void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const { int k = kind(); @@ -2569,10 +2569,16 @@ void _OutputArray::setTo(const _InputArray& arr) const else if( k == MAT || k == MATX || k == STD_VECTOR ) { Mat m = getMat(); - m.setTo(arr); + m.setTo(arr, mask); } else if( k == UMAT ) - ((UMat*)obj)->setTo(arr); + ((UMat*)obj)->setTo(arr, mask); + else if( k == GPU_MAT ) + { + Mat value = arr.getMat(); + CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::GPU_MAT) ); + ((cuda::GpuMat*)obj)->setTo(Scalar(Vec((double *)value.data)), mask); + } else CV_Error(Error::StsNotImplemented, ""); } diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt index 1182a3c..7514833 100644 --- a/modules/superres/CMakeLists.txt +++ b/modules/superres/CMakeLists.txt @@ -5,5 +5,4 @@ endif() set(the_description "Super Resolution") ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef) ocv_define_module(superres opencv_imgproc opencv_video - OPTIONAL opencv_highgui opencv_ocl - opencv_cudaarithm opencv_cudafilters opencv_cudawarping opencv_cudaimgproc opencv_cudaoptflow opencv_cudacodec) + OPTIONAL opencv_highgui opencv_cudaarithm opencv_cudafilters opencv_cudawarping opencv_cudaimgproc opencv_cudaoptflow opencv_cudacodec) diff --git a/modules/superres/include/opencv2/superres.hpp b/modules/superres/include/opencv2/superres.hpp index 26de781..3d96e0f 100644 --- a/modules/superres/include/opencv2/superres.hpp +++ b/modules/superres/include/opencv2/superres.hpp @@ -83,6 +83,8 @@ namespace cv virtual void initImpl(Ptr& frameSource) = 0; virtual void processImpl(Ptr& frameSource, OutputArray output) = 0; + bool isUmat_; + private: Ptr frameSource_; bool firstCall_; diff --git a/modules/superres/perf/perf_superres.cpp b/modules/superres/perf/perf_superres.cpp index 810460b..e8b3ef7 100644 --- a/modules/superres/perf/perf_superres.cpp +++ b/modules/superres/perf/perf_superres.cpp @@ -41,6 +41,7 @@ //M*/ #include "perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" using namespace std; using namespace std::tr1; @@ -91,37 +92,26 @@ namespace class ZeroOpticalFlow : public DenseOpticalFlowExt { public: - void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2) + virtual void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2) { cv::Size size = frame0.size(); if (!flow2.needed()) { flow1.create(size, CV_32FC2); - - if (flow1.kind() == cv::_InputArray::GPU_MAT) - flow1.getGpuMatRef().setTo(cv::Scalar::all(0)); - else - flow1.getMatRef().setTo(cv::Scalar::all(0)); + flow1.setTo(cv::Scalar::all(0)); } else { flow1.create(size, CV_32FC1); flow2.create(size, CV_32FC1); - if (flow1.kind() == cv::_InputArray::GPU_MAT) - flow1.getGpuMatRef().setTo(cv::Scalar::all(0)); - else - flow1.getMatRef().setTo(cv::Scalar::all(0)); - - if (flow2.kind() == cv::_InputArray::GPU_MAT) - flow2.getGpuMatRef().setTo(cv::Scalar::all(0)); - else - flow2.getMatRef().setTo(cv::Scalar::all(0)); + flow1.setTo(cv::Scalar::all(0)); + flow2.setTo(cv::Scalar::all(0)); } } - void collectGarbage() + virtual void collectGarbage() { } }; @@ -181,3 +171,48 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1, CPU_SANITY_CHECK(dst); } } + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +typedef Size_MatType SuperResolution_BTVL1; + +OCL_PERF_TEST_P(SuperResolution_BTVL1 ,BTVL1, + Combine(Values(szSmall64, szSmall128), + Values(MatType(CV_8UC1), MatType(CV_8UC3)))) +{ + Size_MatType_t params = GetParam(); + const Size size = get<0>(params); + const int type = get<1>(params); + + Mat frame(size, type); + UMat dst(1, 1, 0); + declare.in(frame, WARMUP_RNG); + + const int scale = 2; + const int iterations = 50; + const int temporalAreaRadius = 1; + + Ptr opticalFlow(new ZeroOpticalFlow); + Ptr superRes = createSuperResolution_BTVL1(); + + superRes->set("scale", scale); + superRes->set("iterations", iterations); + superRes->set("temporalAreaRadius", temporalAreaRadius); + superRes->set("opticalFlow", opticalFlow); + + superRes->setInput(makePtr(frame)); + + // skip first frame + superRes->nextFrame(dst); + + OCL_TEST_CYCLE_N(10) superRes->nextFrame(dst); + + SANITY_CHECK_NOTHING(); +} + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL diff --git a/modules/superres/perf/perf_superres_ocl.cpp b/modules/superres/perf/perf_superres_ocl.cpp deleted file mode 100644 index 04a3f7e..0000000 --- a/modules/superres/perf/perf_superres_ocl.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "perf_precomp.hpp" - -#ifdef HAVE_OPENCV_OCL - -#include "opencv2/ocl.hpp" -using namespace std; -using namespace testing; -using namespace perf; -using namespace cv; -using namespace cv::superres; - -namespace -{ - class OneFrameSource_OCL : public FrameSource - { - public: - explicit OneFrameSource_OCL(const ocl::oclMat& frame) : frame_(frame) {} - - void nextFrame(OutputArray frame) - { - ocl::getOclMatRef(frame) = frame_; - } - void reset() - { - } - - private: - ocl::oclMat frame_; - }; - - - class ZeroOpticalFlowOCL : public DenseOpticalFlowExt - { - public: - void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2) - { - ocl::oclMat& frame0_ = ocl::getOclMatRef(frame0); - ocl::oclMat& flow1_ = ocl::getOclMatRef(flow1); - ocl::oclMat& flow2_ = ocl::getOclMatRef(flow2); - - cv::Size size = frame0_.size(); - - if(!flow2.needed()) - { - flow1_.create(size, CV_32FC2); - flow1_.setTo(Scalar::all(0)); - } - else - { - flow1_.create(size, CV_32FC1); - flow2_.create(size, CV_32FC1); - - flow1_.setTo(Scalar::all(0)); - flow2_.setTo(Scalar::all(0)); - } - } - - void collectGarbage() - { - } - }; -} - -PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL, - Combine(Values(szSmall64, szSmall128), - Values(MatType(CV_8UC1), MatType(CV_8UC3)))) -{ - declare.time(5 * 60); - - const Size size = std::tr1::get<0>(GetParam()); - const int type = std::tr1::get<1>(GetParam()); - - Mat frame(size, type); - declare.in(frame, WARMUP_RNG); - - ocl::oclMat frame_ocl; - frame_ocl.upload(frame); - - - const int scale = 2; - const int iterations = 50; - const int temporalAreaRadius = 1; - Ptr opticalFlowOcl(new ZeroOpticalFlowOCL); - - Ptr superRes_ocl = createSuperResolution_BTVL1_OCL(); - - superRes_ocl->set("scale", scale); - superRes_ocl->set("iterations", iterations); - superRes_ocl->set("temporalAreaRadius", temporalAreaRadius); - superRes_ocl->set("opticalFlow", opticalFlowOcl); - - superRes_ocl->setInput(makePtr(frame_ocl)); - - ocl::oclMat dst_ocl; - superRes_ocl->nextFrame(dst_ocl); - - TEST_CYCLE_N(10) superRes_ocl->nextFrame(dst_ocl); - frame_ocl.release(); - CPU_SANITY_CHECK(dst_ocl); -} -#endif diff --git a/modules/superres/src/btv_l1.cpp b/modules/superres/src/btv_l1.cpp index bafce91..1e4aa48 100644 --- a/modules/superres/src/btv_l1.cpp +++ b/modules/superres/src/btv_l1.cpp @@ -44,6 +44,7 @@ // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow. #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::superres; @@ -51,10 +52,17 @@ using namespace cv::superres::detail; namespace { - void calcRelativeMotions(const std::vector& forwardMotions, const std::vector& backwardMotions, - std::vector& relForwardMotions, std::vector& relBackwardMotions, - int baseIdx, Size size) +#ifdef HAVE_OPENCL + + bool ocl_calcRelativeMotions(InputArrayOfArrays _forwardMotions, InputArrayOfArrays _backwardMotions, + OutputArrayOfArrays _relForwardMotions, OutputArrayOfArrays _relBackwardMotions, + int baseIdx, const Size & size) { + std::vector & forwardMotions = *(std::vector *)_forwardMotions.getObj(), + & backwardMotions = *(std::vector *)_backwardMotions.getObj(), + & relForwardMotions = *(std::vector *)_relForwardMotions.getObj(), + & relBackwardMotions = *(std::vector *)_relBackwardMotions.getObj(); + const int count = static_cast(forwardMotions.size()); relForwardMotions.resize(count); @@ -68,20 +76,84 @@ namespace for (int i = baseIdx - 1; i >= 0; --i) { add(relForwardMotions[i + 1], forwardMotions[i], relForwardMotions[i]); - add(relBackwardMotions[i + 1], backwardMotions[i + 1], relBackwardMotions[i]); } for (int i = baseIdx + 1; i < count; ++i) { add(relForwardMotions[i - 1], backwardMotions[i], relForwardMotions[i]); + add(relBackwardMotions[i - 1], forwardMotions[i - 1], relBackwardMotions[i]); + } + + return true; + } + +#endif + + void calcRelativeMotions(InputArrayOfArrays _forwardMotions, InputArrayOfArrays _backwardMotions, + OutputArrayOfArrays _relForwardMotions, OutputArrayOfArrays _relBackwardMotions, + int baseIdx, const Size & size) + { + CV_OCL_RUN(_forwardMotions.isUMatVector() && _backwardMotions.isUMatVector() && + _relForwardMotions.isUMatVector() && _relBackwardMotions.isUMatVector(), + ocl_calcRelativeMotions(_forwardMotions, _backwardMotions, _relForwardMotions, + _relBackwardMotions, baseIdx, size)) + + std::vector & forwardMotions = *(std::vector *)_forwardMotions.getObj(), + & backwardMotions = *(std::vector *)_backwardMotions.getObj(), + & relForwardMotions = *(std::vector *)_relForwardMotions.getObj(), + & relBackwardMotions = *(std::vector *)_relBackwardMotions.getObj(); + + const int count = static_cast(forwardMotions.size()); + + relForwardMotions.resize(count); + relForwardMotions[baseIdx].create(size, CV_32FC2); + relForwardMotions[baseIdx].setTo(Scalar::all(0)); + + relBackwardMotions.resize(count); + relBackwardMotions[baseIdx].create(size, CV_32FC2); + relBackwardMotions[baseIdx].setTo(Scalar::all(0)); + + for (int i = baseIdx - 1; i >= 0; --i) + { + add(relForwardMotions[i + 1], forwardMotions[i], relForwardMotions[i]); + add(relBackwardMotions[i + 1], backwardMotions[i + 1], relBackwardMotions[i]); + } + for (int i = baseIdx + 1; i < count; ++i) + { + add(relForwardMotions[i - 1], backwardMotions[i], relForwardMotions[i]); add(relBackwardMotions[i - 1], forwardMotions[i - 1], relBackwardMotions[i]); } } +#ifdef HAVE_OPENCL - void upscaleMotions(const std::vector& lowResMotions, std::vector& highResMotions, int scale) + bool ocl_upscaleMotions(InputArrayOfArrays _lowResMotions, OutputArrayOfArrays _highResMotions, int scale) { + std::vector & lowResMotions = *(std::vector *)_lowResMotions.getObj(), + & highResMotions = *(std::vector *)_highResMotions.getObj(); + + highResMotions.resize(lowResMotions.size()); + + for (size_t i = 0; i < lowResMotions.size(); ++i) + { + resize(lowResMotions[i], highResMotions[i], Size(), scale, scale, INTER_LINEAR); // TODO + multiply(highResMotions[i], Scalar::all(scale), highResMotions[i]); + } + + return true; + } + +#endif + + void upscaleMotions(InputArrayOfArrays _lowResMotions, OutputArrayOfArrays _highResMotions, int scale) + { + CV_OCL_RUN(_lowResMotions.isUMatVector() && _highResMotions.isUMatVector(), + ocl_upscaleMotions(_lowResMotions, _highResMotions, scale)) + + std::vector & lowResMotions = *(std::vector *)_lowResMotions.getObj(), + & highResMotions = *(std::vector *)_highResMotions.getObj(); + highResMotions.resize(lowResMotions.size()); for (size_t i = 0; i < lowResMotions.size(); ++i) @@ -91,10 +163,47 @@ namespace } } - void buildMotionMaps(const Mat& forwardMotion, const Mat& backwardMotion, Mat& forwardMap, Mat& backwardMap) +#ifdef HAVE_OPENCL + + bool ocl_buildMotionMaps(InputArray _forwardMotion, InputArray _backwardMotion, + OutputArray _forwardMap, OutputArray _backwardMap) { - forwardMap.create(forwardMotion.size(), CV_32FC2); - backwardMap.create(forwardMotion.size(), CV_32FC2); + ocl::Kernel k("buildMotionMaps", ocl::superres::superres_btvl1_oclsrc); + if (k.empty()) + return false; + + UMat forwardMotion = _forwardMotion.getUMat(), backwardMotion = _backwardMotion.getUMat(); + Size size = forwardMotion.size(); + + _forwardMap.create(size, CV_32FC2); + _backwardMap.create(size, CV_32FC2); + + UMat forwardMap = _forwardMap.getUMat(), backwardMap = _backwardMap.getUMat(); + + k.args(ocl::KernelArg::ReadOnlyNoSize(forwardMotion), + ocl::KernelArg::ReadOnlyNoSize(backwardMotion), + ocl::KernelArg::WriteOnlyNoSize(forwardMap), + ocl::KernelArg::WriteOnly(backwardMap)); + + size_t globalsize[2] = { size.width, size.height }; + return k.run(2, globalsize, NULL, false); + } + +#endif + + void buildMotionMaps(InputArray _forwardMotion, InputArray _backwardMotion, + OutputArray _forwardMap, OutputArray _backwardMap) + { + CV_OCL_RUN(_forwardMap.isUMat() && _backwardMap.isUMat(), + ocl_buildMotionMaps(_forwardMotion, _backwardMotion, _forwardMap, + _backwardMap)); + + Mat forwardMotion = _forwardMotion.getMat(), backwardMotion = _backwardMotion.getMat(); + + _forwardMap.create(forwardMotion.size(), CV_32FC2); + _backwardMap.create(forwardMotion.size(), CV_32FC2); + + Mat forwardMap = _forwardMap.getMat(), backwardMap = _backwardMap.getMat(); for (int y = 0; y < forwardMotion.rows; ++y) { @@ -114,40 +223,73 @@ namespace } template - void upscaleImpl(const Mat& src, Mat& dst, int scale) + void upscaleImpl(InputArray _src, OutputArray _dst, int scale) { - dst.create(src.rows * scale, src.cols * scale, src.type()); - dst.setTo(Scalar::all(0)); + Mat src = _src.getMat(); + _dst.create(src.rows * scale, src.cols * scale, src.type()); + _dst.setTo(Scalar::all(0)); + Mat dst = _dst.getMat(); for (int y = 0, Y = 0; y < src.rows; ++y, Y += scale) { - const T* srcRow = src.ptr(y); - T* dstRow = dst.ptr(Y); + const T * const srcRow = src.ptr(y); + T * const dstRow = dst.ptr(Y); for (int x = 0, X = 0; x < src.cols; ++x, X += scale) dstRow[X] = srcRow[x]; } } - void upscale(const Mat& src, Mat& dst, int scale) +#ifdef HAVE_OPENCL + + static bool ocl_upscale(InputArray _src, OutputArray _dst, int scale) + { + int type = _src.type(), cn = CV_MAT_CN(type); + ocl::Kernel k("upscale", ocl::superres::superres_btvl1_oclsrc, + format("-D cn=%d", cn)); + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(src.rows * scale, src.cols * scale, type); + _dst.setTo(Scalar::all(0)); + UMat dst = _dst.getUMat(); + + k.args(ocl::KernelArg::ReadOnly(src), + ocl::KernelArg::ReadWriteNoSize(dst), scale); + + size_t globalsize[2] = { src.cols, src.rows }; + return k.run(2, globalsize, NULL, false); + } + +#endif + + typedef struct _Point4f { float ar[4]; } Point4f; + + void upscale(InputArray _src, OutputArray _dst, int scale) { - typedef void (*func_t)(const Mat& src, Mat& dst, int scale); + int cn = _src.channels(); + CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + + CV_OCL_RUN(_dst.isUMat(), + ocl_upscale(_src, _dst, scale)) + + typedef void (*func_t)(InputArray src, OutputArray dst, int scale); static const func_t funcs[] = { - 0, upscaleImpl, 0, upscaleImpl + 0, upscaleImpl, 0, upscaleImpl, upscaleImpl }; - CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); - - const func_t func = funcs[src.channels()]; - - func(src, dst, scale); + const func_t func = funcs[cn]; + CV_Assert(func != 0); + func(_src, _dst, scale); } - float diffSign(float a, float b) + inline float diffSign(float a, float b) { return a > b ? 1.0f : a < b ? -1.0f : 0.0f; } + Point3f diffSign(Point3f a, Point3f b) { return Point3f( @@ -157,16 +299,44 @@ namespace ); } - void diffSign(const Mat& src1, const Mat& src2, Mat& dst) +#ifdef HAVE_OPENCL + + static bool ocl_diffSign(InputArray _src1, OutputArray _src2, OutputArray _dst) { - const int count = src1.cols * src1.channels(); + ocl::Kernel k("diffSign", ocl::superres::superres_btvl1_oclsrc); + if (k.empty()) + return false; + + UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); + _dst.create(src1.size(), src1.type()); + UMat dst = _dst.getUMat(); - dst.create(src1.size(), src1.type()); + int cn = src1.channels(); + k.args(ocl::KernelArg::ReadOnlyNoSize(src1), + ocl::KernelArg::ReadOnlyNoSize(src2), + ocl::KernelArg::WriteOnly(dst, cn)); + + size_t globalsize[2] = { src1.cols * cn, src1.rows }; + return k.run(2, globalsize, NULL, false); + } + +#endif + + void diffSign(InputArray _src1, OutputArray _src2, OutputArray _dst) + { + CV_OCL_RUN(_dst.isUMat(), + ocl_diffSign(_src1, _src2, _dst)) + + Mat src1 = _src1.getMat(), src2 = _src2.getMat(); + _dst.create(src1.size(), src1.type()); + Mat dst = _dst.getMat(); + + const int count = src1.cols * src1.channels(); for (int y = 0; y < src1.rows; ++y) { - const float* src1Ptr = src1.ptr(y); - const float* src2Ptr = src2.ptr(y); + const float * const src1Ptr = src1.ptr(y); + const float * const src2Ptr = src2.ptr(y); float* dstPtr = dst.ptr(y); for (int x = 0; x < count; ++x) @@ -206,8 +376,8 @@ namespace { for (int i = range.start; i < range.end; ++i) { - const T* srcRow = src.ptr(i); - T* dstRow = dst.ptr(i); + const T * const srcRow = src.ptr(i); + T * const dstRow = dst.ptr(i); for(int j = ksize; j < src.cols - ksize; ++j) { @@ -219,19 +389,20 @@ namespace const T* srcRow3 = src.ptr(i + m); for (int l = ksize; l + m >= 0; --l, ++ind) - { - dstRow[j] += btvWeights[ind] * (diffSign(srcVal, srcRow3[j + l]) - diffSign(srcRow2[j - l], srcVal)); - } + dstRow[j] += btvWeights[ind] * (diffSign(srcVal, srcRow3[j + l]) + - diffSign(srcRow2[j - l], srcVal)); } } } } template - void calcBtvRegularizationImpl(const Mat& src, Mat& dst, int btvKernelSize, const std::vector& btvWeights) + void calcBtvRegularizationImpl(InputArray _src, OutputArray _dst, int btvKernelSize, const std::vector& btvWeights) { - dst.create(src.size(), src.type()); - dst.setTo(Scalar::all(0)); + Mat src = _src.getMat(); + _dst.create(src.size(), src.type()); + _dst.setTo(Scalar::all(0)); + Mat dst = _dst.getMat(); const int ksize = (btvKernelSize - 1) / 2; @@ -245,17 +416,48 @@ namespace parallel_for_(Range(ksize, src.rows - ksize), body); } - void calcBtvRegularization(const Mat& src, Mat& dst, int btvKernelSize, const std::vector& btvWeights) +#ifdef HAVE_OPENCL + + static bool ocl_calcBtvRegularization(InputArray _src, OutputArray _dst, int btvKernelSize, const UMat & ubtvWeights) + { + int cn = _src.channels(); + ocl::Kernel k("calcBtvRegularization", ocl::superres::superres_btvl1_oclsrc, + format("-D cn=%d", cn)); + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(src.size(), src.type()); + _dst.setTo(Scalar::all(0)); + UMat dst = _dst.getUMat(); + + const int ksize = (btvKernelSize - 1) / 2; + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), + ksize, ocl::KernelArg::PtrReadOnly(ubtvWeights)); + + size_t globalsize[2] = { src.cols, src.rows }; + return k.run(2, globalsize, NULL, false); + } + +#endif + + void calcBtvRegularization(InputArray _src, OutputArray _dst, int btvKernelSize, + const std::vector& btvWeights, const UMat & ubtvWeights) { - typedef void (*func_t)(const Mat& src, Mat& dst, int btvKernelSize, const std::vector& btvWeights); + CV_OCL_RUN(_dst.isUMat(), + ocl_calcBtvRegularization(_src, _dst, btvKernelSize, ubtvWeights)) + (void)ubtvWeights; + + typedef void (*func_t)(InputArray _src, OutputArray _dst, int btvKernelSize, const std::vector& btvWeights); static const func_t funcs[] = { - 0, calcBtvRegularizationImpl, 0, calcBtvRegularizationImpl + 0, calcBtvRegularizationImpl, 0, calcBtvRegularizationImpl, 0 }; - const func_t func = funcs[src.channels()]; - - func(src, dst, btvKernelSize, btvWeights); + const func_t func = funcs[_src.channels()]; + CV_Assert(func != 0); + func(_src, _dst, btvKernelSize, btvWeights); } class BTVL1_Base @@ -263,9 +465,8 @@ namespace public: BTVL1_Base(); - void process(const std::vector& src, Mat& dst, - const std::vector& forwardMotions, const std::vector& backwardMotions, - int baseIdx); + void process(InputArrayOfArrays src, OutputArray dst, InputArrayOfArrays forwardMotions, + InputArrayOfArrays backwardMotions, int baseIdx); void collectGarbage(); @@ -281,15 +482,21 @@ namespace Ptr opticalFlow_; private: + bool ocl_process(InputArrayOfArrays src, OutputArray dst, InputArrayOfArrays forwardMotions, + InputArrayOfArrays backwardMotions, int baseIdx); + Ptr filter_; int curBlurKernelSize_; double curBlurSigma_; int curSrcType_; std::vector btvWeights_; + UMat ubtvWeights_; + int curBtvKernelSize_; double curAlpha_; + // Mat std::vector lowResForwardMotions_; std::vector lowResBackwardMotions_; @@ -303,6 +510,23 @@ namespace Mat diffTerm_, regTerm_; Mat a_, b_, c_; + +#ifdef HAVE_OPENCL + // UMat + std::vector ulowResForwardMotions_; + std::vector ulowResBackwardMotions_; + + std::vector uhighResForwardMotions_; + std::vector uhighResBackwardMotions_; + + std::vector uforwardMaps_; + std::vector ubackwardMaps_; + + UMat uhighRes_; + + UMat udiffTerm_, uregTerm_; + UMat ua_, ub_, uc_; +#endif }; BTVL1_Base::BTVL1_Base() @@ -325,7 +549,101 @@ namespace curAlpha_ = -1.0; } - void BTVL1_Base::process(const std::vector& src, Mat& dst, const std::vector& forwardMotions, const std::vector& backwardMotions, int baseIdx) +#ifdef HAVE_OPENCL + + bool BTVL1_Base::ocl_process(InputArrayOfArrays _src, OutputArray _dst, InputArrayOfArrays _forwardMotions, + InputArrayOfArrays _backwardMotions, int baseIdx) + { + std::vector & src = *(std::vector *)_src.getObj(), + & forwardMotions = *(std::vector *)_forwardMotions.getObj(), + & backwardMotions = *(std::vector *)_backwardMotions.getObj(); + + // update blur filter and btv weights + if (!filter_ || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_) + { + filter_ = createGaussianFilter(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_); + curBlurKernelSize_ = blurKernelSize_; + curBlurSigma_ = blurSigma_; + curSrcType_ = src[0].type(); + } + + if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_) + { + calcBtvWeights(btvKernelSize_, alpha_, btvWeights_); + Mat(btvWeights_, true).copyTo(ubtvWeights_); + + curBtvKernelSize_ = btvKernelSize_; + curAlpha_ = alpha_; + } + + // calc high res motions + calcRelativeMotions(forwardMotions, backwardMotions, ulowResForwardMotions_, ulowResBackwardMotions_, baseIdx, src[0].size()); + + upscaleMotions(ulowResForwardMotions_, uhighResForwardMotions_, scale_); + upscaleMotions(ulowResBackwardMotions_, uhighResBackwardMotions_, scale_); + + uforwardMaps_.resize(uhighResForwardMotions_.size()); + ubackwardMaps_.resize(uhighResForwardMotions_.size()); + for (size_t i = 0; i < uhighResForwardMotions_.size(); ++i) + buildMotionMaps(uhighResForwardMotions_[i], uhighResBackwardMotions_[i], uforwardMaps_[i], ubackwardMaps_[i]); + + // initial estimation + const Size lowResSize = src[0].size(); + const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_); + + resize(src[baseIdx], uhighRes_, highResSize, 0, 0, INTER_LINEAR); // TODO + + // iterations + udiffTerm_.create(highResSize, uhighRes_.type()); + ua_.create(highResSize, uhighRes_.type()); + ub_.create(highResSize, uhighRes_.type()); + uc_.create(lowResSize, uhighRes_.type()); + + for (int i = 0; i < iterations_; ++i) + { + udiffTerm_.setTo(Scalar::all(0)); + + for (size_t k = 0; k < src.size(); ++k) + { + // a = M * Ih + remap(uhighRes_, ua_, ubackwardMaps_[k], noArray(), INTER_NEAREST); + // b = HM * Ih + GaussianBlur(ua_, ub_, Size(blurKernelSize_, blurKernelSize_), blurSigma_); + // c = DHM * Ih + resize(ub_, uc_, lowResSize, 0, 0, INTER_NEAREST); + + diffSign(src[k], uc_, uc_); + + // a = Dt * diff + upscale(uc_, ua_, scale_); + + // b = HtDt * diff + GaussianBlur(ua_, ub_, Size(blurKernelSize_, blurKernelSize_), blurSigma_); + // a = MtHtDt * diff + remap(ub_, ua_, uforwardMaps_[k], noArray(), INTER_NEAREST); + + add(udiffTerm_, ua_, udiffTerm_); + } + + if (lambda_ > 0) + { + calcBtvRegularization(uhighRes_, uregTerm_, btvKernelSize_, btvWeights_, ubtvWeights_); + addWeighted(udiffTerm_, 1.0, uregTerm_, -lambda_, 0.0, udiffTerm_); + } + + addWeighted(uhighRes_, 1.0, udiffTerm_, tau_, 0.0, uhighRes_); + } + + Rect inner(btvKernelSize_, btvKernelSize_, uhighRes_.cols - 2 * btvKernelSize_, uhighRes_.rows - 2 * btvKernelSize_); + uhighRes_(inner).copyTo(_dst); + + return true; + } + +#endif + + void BTVL1_Base::process(InputArrayOfArrays _src, OutputArray _dst, InputArrayOfArrays _forwardMotions, + InputArrayOfArrays _backwardMotions, int baseIdx) { CV_Assert( scale_ > 1 ); CV_Assert( iterations_ > 0 ); @@ -335,8 +653,15 @@ namespace CV_Assert( blurKernelSize_ > 0 ); CV_Assert( blurSigma_ >= 0.0 ); - // update blur filter and btv weights + CV_OCL_RUN(_src.isUMatVector() && _dst.isUMat() && _forwardMotions.isUMatVector() && + _backwardMotions.isUMatVector(), + ocl_process(_src, _dst, _forwardMotions, _backwardMotions, baseIdx)) + + std::vector & src = *(std::vector *)_src.getObj(), + & forwardMotions = *(std::vector *)_forwardMotions.getObj(), + & backwardMotions = *(std::vector *)_backwardMotions.getObj(); + // update blur filter and btv weights if (!filter_ || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_) { filter_ = createGaussianFilter(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_); @@ -353,7 +678,6 @@ namespace } // calc high res motions - calcRelativeMotions(forwardMotions, backwardMotions, lowResForwardMotions_, lowResBackwardMotions_, baseIdx, src[0].size()); upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_); @@ -365,14 +689,12 @@ namespace buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]); // initial estimation - const Size lowResSize = src[0].size(); const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_); resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_CUBIC); // iterations - diffTerm_.create(highResSize, highRes_.type()); a_.create(highResSize, highRes_.type()); b_.create(highResSize, highRes_.type()); @@ -405,7 +727,7 @@ namespace if (lambda_ > 0) { - calcBtvRegularization(highRes_, regTerm_, btvKernelSize_, btvWeights_); + calcBtvRegularization(highRes_, regTerm_, btvKernelSize_, btvWeights_, ubtvWeights_); addWeighted(diffTerm_, 1.0, regTerm_, -lambda_, 0.0, diffTerm_); } @@ -413,13 +735,14 @@ namespace } Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_); - highRes_(inner).copyTo(dst); + highRes_(inner).copyTo(_dst); } void BTVL1_Base::collectGarbage() { filter_.release(); + // Mat lowResForwardMotions_.clear(); lowResBackwardMotions_.clear(); @@ -436,11 +759,32 @@ namespace a_.release(); b_.release(); c_.release(); + +#ifdef HAVE_OPENCL + // UMat + ulowResForwardMotions_.clear(); + ulowResBackwardMotions_.clear(); + + uhighResForwardMotions_.clear(); + uhighResBackwardMotions_.clear(); + + uforwardMaps_.clear(); + ubackwardMaps_.clear(); + + uhighRes_.release(); + + udiffTerm_.release(); + uregTerm_.release(); + ua_.release(); + ub_.release(); + uc_.release(); +#endif } //////////////////////////////////////////////////////////////////// - class BTVL1 : public SuperResolution, private BTVL1_Base + class BTVL1 : + public SuperResolution, private BTVL1_Base { public: AlgorithmInfo* info() const; @@ -451,14 +795,25 @@ namespace protected: void initImpl(Ptr& frameSource); + bool ocl_initImpl(Ptr& frameSource); + void processImpl(Ptr& frameSource, OutputArray output); + bool ocl_processImpl(Ptr& frameSource, OutputArray output); private: int temporalAreaRadius_; void readNextFrame(Ptr& frameSource); + bool ocl_readNextFrame(Ptr& frameSource); + void processFrame(int idx); + bool ocl_processFrame(int idx); + + int storePos_; + int procPos_; + int outPos_; + // Mat Mat curFrame_; Mat prevFrame_; @@ -467,14 +822,25 @@ namespace std::vector backwardMotions_; std::vector outputs_; - int storePos_; - int procPos_; - int outPos_; - std::vector srcFrames_; std::vector srcForwardMotions_; std::vector srcBackwardMotions_; Mat finalOutput_; + +#ifdef HAVE_OPENCL + // UMat + UMat ucurFrame_; + UMat uprevFrame_; + + std::vector uframes_; + std::vector uforwardMotions_; + std::vector ubackwardMotions_; + std::vector uoutputs_; + + std::vector usrcFrames_; + std::vector usrcForwardMotions_; + std::vector usrcBackwardMotions_; +#endif }; CV_INIT_ALGORITHM(BTVL1, "SuperResolution.BTVL1", @@ -496,6 +862,7 @@ namespace void BTVL1::collectGarbage() { + // Mat curFrame_.release(); prevFrame_.release(); @@ -509,10 +876,52 @@ namespace srcBackwardMotions_.clear(); finalOutput_.release(); +#ifdef HAVE_OPENCL + // UMat + ucurFrame_.release(); + uprevFrame_.release(); + + uframes_.clear(); + uforwardMotions_.clear(); + ubackwardMotions_.clear(); + uoutputs_.clear(); + + usrcFrames_.clear(); + usrcForwardMotions_.clear(); + usrcBackwardMotions_.clear(); +#endif + SuperResolution::collectGarbage(); BTVL1_Base::collectGarbage(); } +#ifdef HAVE_OPENCL + + bool BTVL1::ocl_initImpl(Ptr& frameSource) + { + const int cacheSize = 2 * temporalAreaRadius_ + 1; + + uframes_.resize(cacheSize); + uforwardMotions_.resize(cacheSize); + ubackwardMotions_.resize(cacheSize); + uoutputs_.resize(cacheSize); + + storePos_ = -1; + + for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t) + readNextFrame(frameSource); + + for (int i = 0; i <= temporalAreaRadius_; ++i) + processFrame(i); + + procPos_ = temporalAreaRadius_; + outPos_ = -1; + + return true; + } + +#endif + void BTVL1::initImpl(Ptr& frameSource) { const int cacheSize = 2 * temporalAreaRadius_ + 1; @@ -522,6 +931,9 @@ namespace backwardMotions_.resize(cacheSize); outputs_.resize(cacheSize); + CV_OCL_RUN(isUmat_, + ocl_initImpl(frameSource)) + storePos_ = -1; for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t) @@ -534,6 +946,18 @@ namespace outPos_ = -1; } +#ifdef HAVE_OPENCL + + bool BTVL1::ocl_processImpl(Ptr& /*frameSource*/, OutputArray _output) + { + const UMat& curOutput = at(outPos_, uoutputs_); + curOutput.convertTo(_output, CV_8U); + + return true; + } + +#endif + void BTVL1::processImpl(Ptr& frameSource, OutputArray _output) { if (outPos_ >= storePos_) @@ -549,11 +973,14 @@ namespace ++procPos_; processFrame(procPos_); } - ++outPos_; + + CV_OCL_RUN(isUmat_, + ocl_processImpl(frameSource, _output)) + const Mat& curOutput = at(outPos_, outputs_); - if (_output.kind() < _InputArray::OPENGL_BUFFER) + if (_output.kind() < _InputArray::OPENGL_BUFFER || _output.isUMat()) curOutput.convertTo(_output, CV_8U); else { @@ -562,14 +989,41 @@ namespace } } +#ifdef HAVE_OPENCL + + bool BTVL1::ocl_readNextFrame(Ptr& /*frameSource*/) + { + ucurFrame_.convertTo(at(storePos_, uframes_), CV_32F); + + if (storePos_ > 0) + { + opticalFlow_->calc(uprevFrame_, ucurFrame_, at(storePos_ - 1, uforwardMotions_)); + opticalFlow_->calc(ucurFrame_, uprevFrame_, at(storePos_, ubackwardMotions_)); + } + + ucurFrame_.copyTo(uprevFrame_); + return true; + } + +#endif + void BTVL1::readNextFrame(Ptr& frameSource) { frameSource->nextFrame(curFrame_); - if (curFrame_.empty()) return; +#ifdef HAVE_OPENCL + if (isUmat_ && curFrame_.channels() == 1) + curFrame_.copyTo(ucurFrame_); + else + isUmat_ = false; +#endif ++storePos_; + + CV_OCL_RUN(isUmat_, + ocl_readNextFrame(frameSource)) + curFrame_.convertTo(at(storePos_, frames_), CV_32F); if (storePos_ > 0) @@ -581,8 +1035,47 @@ namespace curFrame_.copyTo(prevFrame_); } +#ifdef HAVE_OPENCL + + bool BTVL1::ocl_processFrame(int idx) + { + const int startIdx = std::max(idx - temporalAreaRadius_, 0); + const int procIdx = idx; + const int endIdx = std::min(startIdx + 2 * temporalAreaRadius_, storePos_); + + const int count = endIdx - startIdx + 1; + + usrcFrames_.resize(count); + usrcForwardMotions_.resize(count); + usrcBackwardMotions_.resize(count); + + int baseIdx = -1; + + for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k) + { + if (i == procIdx) + baseIdx = k; + + usrcFrames_[k] = at(i, uframes_); + + if (i < endIdx) + usrcForwardMotions_[k] = at(i, uforwardMotions_); + if (i > startIdx) + usrcBackwardMotions_[k] = at(i, ubackwardMotions_); + } + + process(usrcFrames_, at(idx, uoutputs_), usrcForwardMotions_, usrcBackwardMotions_, baseIdx); + + return true; + } + +#endif + void BTVL1::processFrame(int idx) { + CV_OCL_RUN(isUmat_, + ocl_processFrame(idx)) + const int startIdx = std::max(idx - temporalAreaRadius_, 0); const int procIdx = idx; const int endIdx = std::min(startIdx + 2 * temporalAreaRadius_, storePos_); diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp deleted file mode 100644 index cfaf583..0000000 --- a/modules/superres/src/btv_l1_ocl.cpp +++ /dev/null @@ -1,725 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Jin Ma, jin@multicorewareinc.com -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -// S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution. -// Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow. - -#include "precomp.hpp" - -#if !defined(HAVE_OPENCL) || !defined(HAVE_OPENCV_OCL) - -cv::Ptr cv::superres::createSuperResolution_BTVL1_OCL() -{ - CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); - return Ptr(); -} - -#else -#include "opencl_kernels.hpp" - -using namespace std; -using namespace cv; -using namespace cv::ocl; -using namespace cv::superres; -using namespace cv::superres::detail; - -static ProgramEntry superres_btvl1 = cv::ocl::superres::superres_btvl1; - -namespace cv -{ - namespace ocl - { - float* btvWeights_ = NULL; - size_t btvWeights_size = 0; - oclMat c_btvRegWeights; - } -} - -namespace btv_l1_device_ocl -{ - void buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY, - const oclMat& backwardMotionX, const oclMat& bacwardMotionY, - oclMat& forwardMapX, oclMat& forwardMapY, - oclMat& backwardMapX, oclMat& backwardMapY); - - void upscale(const oclMat& src, oclMat& dst, int scale); - - void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst); - - void calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize); -} - -void btv_l1_device_ocl::buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY, - const oclMat& backwardMotionX, const oclMat& backwardMotionY, - oclMat& forwardMapX, oclMat& forwardMapY, - oclMat& backwardMapX, oclMat& backwardMapY) -{ - Context* clCxt = Context::getContext(); - - size_t local_thread[] = {32, 8, 1}; - size_t global_thread[] = {forwardMapX.cols, forwardMapX.rows, 1}; - - int forwardMotionX_step = (int)(forwardMotionX.step/forwardMotionX.elemSize()); - int forwardMotionY_step = (int)(forwardMotionY.step/forwardMotionY.elemSize()); - int backwardMotionX_step = (int)(backwardMotionX.step/backwardMotionX.elemSize()); - int backwardMotionY_step = (int)(backwardMotionY.step/backwardMotionY.elemSize()); - int forwardMapX_step = (int)(forwardMapX.step/forwardMapX.elemSize()); - int forwardMapY_step = (int)(forwardMapY.step/forwardMapY.elemSize()); - int backwardMapX_step = (int)(backwardMapX.step/backwardMapX.elemSize()); - int backwardMapY_step = (int)(backwardMapY.step/backwardMapY.elemSize()); - - String kernel_name = "buildMotionMapsKernel"; - vector< pair > args; - - args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionX.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionY.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionX.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionY.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapX.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapY.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapX.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapY.data)); - - args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX.rows)); - args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY.cols)); - - args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionX_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionY_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapX_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapY_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapX_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapY_step)); - - openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); -} - -void btv_l1_device_ocl::upscale(const oclMat& src, oclMat& dst, int scale) -{ - Context* clCxt = Context::getContext(); - - size_t local_thread[] = {32, 8, 1}; - size_t global_thread[] = {src.cols, src.rows, 1}; - - int src_step = (int)(src.step/src.elemSize()); - int dst_step = (int)(dst.step/dst.elemSize()); - - String kernel_name = "upscaleKernel"; - vector< pair > args; - - int cn = src.oclchannels(); - - args.push_back(make_pair(sizeof(cl_mem), (void*)&src.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols)); - args.push_back(make_pair(sizeof(cl_int), (void*)&scale)); - args.push_back(make_pair(sizeof(cl_int), (void*)&cn)); - - openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); - -} - -void btv_l1_device_ocl::diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst) -{ - Context* clCxt = Context::getContext(); - - oclMat src1_ = src1.reshape(1); - oclMat src2_ = src2.reshape(1); - oclMat dst_ = dst.reshape(1); - - int src1_step = (int)(src1_.step/src1_.elemSize()); - int src2_step = (int)(src2_.step/src2_.elemSize()); - int dst_step = (int)(dst_.step/dst_.elemSize()); - - size_t local_thread[] = {32, 8, 1}; - size_t global_thread[] = {src1_.cols, src1_.rows, 1}; - - String kernel_name = "diffSignKernel"; - vector< pair > args; - - args.push_back(make_pair(sizeof(cl_mem), (void*)&src1_.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&src2_.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data)); - - args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.rows)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.cols)); - args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src1_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src2_step)); - - openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); -} - -void btv_l1_device_ocl::calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize) -{ - Context* clCxt = Context::getContext(); - - oclMat src_ = src.reshape(1); - oclMat dst_ = dst.reshape(1); - - size_t local_thread[] = {32, 8, 1}; - size_t global_thread[] = {src.cols, src.rows, 1}; - - int src_step = (int)(src_.step/src_.elemSize()); - int dst_step = (int)(dst_.step/dst_.elemSize()); - - String kernel_name = "calcBtvRegularizationKernel"; - vector< pair > args; - - int cn = src.oclchannels(); - - args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows)); - args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols)); - args.push_back(make_pair(sizeof(cl_int), (void*)&ksize)); - args.push_back(make_pair(sizeof(cl_int), (void*)&cn)); - args.push_back(make_pair(sizeof(cl_mem), (void*)&c_btvRegWeights.data)); - - openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); -} - -namespace -{ - void calcRelativeMotions(const vector >& forwardMotions, const vector >& backwardMotions, - vector >& relForwardMotions, vector >& relBackwardMotions, - int baseIdx, Size size) - { - const int count = static_cast(forwardMotions.size()); - - relForwardMotions.resize(count); - relForwardMotions[baseIdx].first.create(size, CV_32FC1); - relForwardMotions[baseIdx].first.setTo(Scalar::all(0)); - relForwardMotions[baseIdx].second.create(size, CV_32FC1); - relForwardMotions[baseIdx].second.setTo(Scalar::all(0)); - - relBackwardMotions.resize(count); - relBackwardMotions[baseIdx].first.create(size, CV_32FC1); - relBackwardMotions[baseIdx].first.setTo(Scalar::all(0)); - relBackwardMotions[baseIdx].second.create(size, CV_32FC1); - relBackwardMotions[baseIdx].second.setTo(Scalar::all(0)); - - for (int i = baseIdx - 1; i >= 0; --i) - { - ocl::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first); - ocl::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second); - - ocl::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first); - ocl::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second); - } - - for (int i = baseIdx + 1; i < count; ++i) - { - ocl::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first); - ocl::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second); - - ocl::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first); - ocl::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second); - } - } - - void upscaleMotions(const vector >& lowResMotions, vector >& highResMotions, int scale) - { - highResMotions.resize(lowResMotions.size()); - - for (size_t i = 0; i < lowResMotions.size(); ++i) - { - ocl::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_LINEAR); - ocl::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_LINEAR); - - ocl::multiply(scale, highResMotions[i].first, highResMotions[i].first); - ocl::multiply(scale, highResMotions[i].second, highResMotions[i].second); - } - } - - void buildMotionMaps(const pair& forwardMotion, const pair& backwardMotion, - pair& forwardMap, pair& backwardMap) - { - forwardMap.first.create(forwardMotion.first.size(), CV_32FC1); - forwardMap.second.create(forwardMotion.first.size(), CV_32FC1); - - backwardMap.first.create(forwardMotion.first.size(), CV_32FC1); - backwardMap.second.create(forwardMotion.first.size(), CV_32FC1); - - btv_l1_device_ocl::buildMotionMaps(forwardMotion.first, forwardMotion.second, - backwardMotion.first, backwardMotion.second, - forwardMap.first, forwardMap.second, - backwardMap.first, backwardMap.second); - } - - void upscale(const oclMat& src, oclMat& dst, int scale) - { - CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); - - btv_l1_device_ocl::upscale(src, dst, scale); - } - - void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst) - { - dst.create(src1.size(), src1.type()); - - btv_l1_device_ocl::diffSign(src1, src2, dst); - } - - void calcBtvWeights(int btvKernelSize, double alpha, vector& btvWeights) - { - const size_t size = btvKernelSize * btvKernelSize; - - btvWeights.resize(size); - - const int ksize = (btvKernelSize - 1) / 2; - const float alpha_f = static_cast(alpha); - - for (int m = 0, ind = 0; m <= ksize; ++m) - { - for (int l = ksize; l + m >= 0; --l, ++ind) - btvWeights[ind] = pow(alpha_f, std::abs(m) + std::abs(l)); - } - - btvWeights_ = &btvWeights[0]; - btvWeights_size = size; - Mat btvWeights_mheader(1, static_cast(size), CV_32FC1, btvWeights_); - c_btvRegWeights = btvWeights_mheader; - } - - void calcBtvRegularization(const oclMat& src, oclMat& dst, int btvKernelSize) - { - dst.create(src.size(), src.type()); - - const int ksize = (btvKernelSize - 1) / 2; - - btv_l1_device_ocl::calcBtvRegularization(src, dst, ksize); - } - - class BTVL1_OCL_Base - { - public: - BTVL1_OCL_Base(); - - void process(const vector& src, oclMat& dst, - const vector >& forwardMotions, const vector >& backwardMotions, - int baseIdx); - - void collectGarbage(); - - protected: - int scale_; - int iterations_; - double lambda_; - double tau_; - double alpha_; - int btvKernelSize_; - int blurKernelSize_; - double blurSigma_; - Ptr opticalFlow_; - - private: - vector > filters_; - int curBlurKernelSize_; - double curBlurSigma_; - int curSrcType_; - - vector btvWeights_; - int curBtvKernelSize_; - double curAlpha_; - - vector > lowResForwardMotions_; - vector > lowResBackwardMotions_; - - vector > highResForwardMotions_; - vector > highResBackwardMotions_; - - vector > forwardMaps_; - vector > backwardMaps_; - - oclMat highRes_; - - vector diffTerms_; - oclMat a_, b_, c_, d_; - oclMat regTerm_; - }; - - BTVL1_OCL_Base::BTVL1_OCL_Base() - { - scale_ = 4; - iterations_ = 180; - lambda_ = 0.03; - tau_ = 1.3; - alpha_ = 0.7; - btvKernelSize_ = 7; - blurKernelSize_ = 5; - blurSigma_ = 0.0; - opticalFlow_ = createOptFlow_Farneback_OCL(); - - curBlurKernelSize_ = -1; - curBlurSigma_ = -1.0; - curSrcType_ = -1; - - curBtvKernelSize_ = -1; - curAlpha_ = -1.0; - } - - void BTVL1_OCL_Base::process(const vector& src, oclMat& dst, - const vector >& forwardMotions, const vector >& backwardMotions, - int baseIdx) - { - CV_Assert( scale_ > 1 ); - CV_Assert( iterations_ > 0 ); - CV_Assert( tau_ > 0.0 ); - CV_Assert( alpha_ > 0.0 ); - CV_Assert( btvKernelSize_ > 0 && btvKernelSize_ <= 16 ); - CV_Assert( blurKernelSize_ > 0 ); - CV_Assert( blurSigma_ >= 0.0 ); - - // update blur filter and btv weights - - if (filters_.size() != src.size() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_) - { - filters_.resize(src.size()); - for (size_t i = 0; i < src.size(); ++i) - filters_[i] = cv::ocl::createGaussianFilter_GPU(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_); - curBlurKernelSize_ = blurKernelSize_; - curBlurSigma_ = blurSigma_; - curSrcType_ = src[0].type(); - } - - if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_) - { - calcBtvWeights(btvKernelSize_, alpha_, btvWeights_); - curBtvKernelSize_ = btvKernelSize_; - curAlpha_ = alpha_; - } - - // calc motions between input frames - - calcRelativeMotions(forwardMotions, backwardMotions, - lowResForwardMotions_, lowResBackwardMotions_, - baseIdx, src[0].size()); - - upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_); - upscaleMotions(lowResBackwardMotions_, highResBackwardMotions_, scale_); - - forwardMaps_.resize(highResForwardMotions_.size()); - backwardMaps_.resize(highResForwardMotions_.size()); - for (size_t i = 0; i < highResForwardMotions_.size(); ++i) - { - buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]); - } - // initial estimation - - const Size lowResSize = src[0].size(); - const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_); - - ocl::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_LINEAR); - - // iterations - - diffTerms_.resize(src.size()); - bool d_inited = false; - a_.create(highRes_.size(), highRes_.type()); - b_.create(highRes_.size(), highRes_.type()); - c_.create(lowResSize, highRes_.type()); - d_.create(highRes_.rows, highRes_.cols, highRes_.type()); - for (int i = 0; i < iterations_; ++i) - { - if(!d_inited) - { - d_.setTo(0); - d_inited = true; - } - for (size_t k = 0; k < src.size(); ++k) - { - diffTerms_[k].create(highRes_.size(), highRes_.type()); - // a = M * Ih - ocl::remap(highRes_, a_, backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar()); - // b = HM * Ih - filters_[k]->apply(a_, b_, Rect(0,0,-1,-1)); - // c = DHF * Ih - ocl::resize(b_, c_, lowResSize, 0, 0, INTER_NEAREST); - - diffSign(src[k], c_, c_); - - // a = Dt * diff - upscale(c_, d_, scale_); - // b = HtDt * diff - filters_[k]->apply(d_, b_, Rect(0,0,-1,-1)); - // diffTerm = MtHtDt * diff - ocl::remap(b_, diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar()); - } - - if (lambda_ > 0) - { - calcBtvRegularization(highRes_, regTerm_, btvKernelSize_); - ocl::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_); - } - - for (size_t k = 0; k < src.size(); ++k) - { - ocl::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_); - } - } - - Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_); - highRes_(inner).copyTo(dst); - } - - void BTVL1_OCL_Base::collectGarbage() - { - filters_.clear(); - - lowResForwardMotions_.clear(); - lowResBackwardMotions_.clear(); - - highResForwardMotions_.clear(); - highResBackwardMotions_.clear(); - - forwardMaps_.clear(); - backwardMaps_.clear(); - - highRes_.release(); - - diffTerms_.clear(); - a_.release(); - b_.release(); - c_.release(); - regTerm_.release(); - c_btvRegWeights.release(); - } - - //////////////////////////////////////////////////////////// - - class BTVL1_OCL : public SuperResolution, private BTVL1_OCL_Base - { - public: - AlgorithmInfo* info() const; - - BTVL1_OCL(); - - void collectGarbage(); - - protected: - void initImpl(Ptr& frameSource); - void processImpl(Ptr& frameSource, OutputArray output); - - private: - int temporalAreaRadius_; - - void readNextFrame(Ptr& frameSource); - void processFrame(int idx); - - oclMat curFrame_; - oclMat prevFrame_; - - vector frames_; - vector > forwardMotions_; - vector > backwardMotions_; - vector outputs_; - - int storePos_; - int procPos_; - int outPos_; - - vector srcFrames_; - vector > srcForwardMotions_; - vector > srcBackwardMotions_; - oclMat finalOutput_; - }; - - CV_INIT_ALGORITHM(BTVL1_OCL, "SuperResolution.BTVL1_OCL", - obj.info()->addParam(obj, "scale", obj.scale_, false, 0, 0, "Scale factor."); - obj.info()->addParam(obj, "iterations", obj.iterations_, false, 0, 0, "Iteration count."); - obj.info()->addParam(obj, "tau", obj.tau_, false, 0, 0, "Asymptotic value of steepest descent method."); - obj.info()->addParam(obj, "lambda", obj.lambda_, false, 0, 0, "Weight parameter to balance data term and smoothness term."); - obj.info()->addParam(obj, "alpha", obj.alpha_, false, 0, 0, "Parameter of spacial distribution in Bilateral-TV."); - obj.info()->addParam(obj, "btvKernelSize", obj.btvKernelSize_, false, 0, 0, "Kernel size of Bilateral-TV filter."); - obj.info()->addParam(obj, "blurKernelSize", obj.blurKernelSize_, false, 0, 0, "Gaussian blur kernel size."); - obj.info()->addParam(obj, "blurSigma", obj.blurSigma_, false, 0, 0, "Gaussian blur sigma."); - obj.info()->addParam(obj, "temporalAreaRadius", obj.temporalAreaRadius_, false, 0, 0, "Radius of the temporal search area."); - obj.info()->addParam(obj, "opticalFlow", obj.opticalFlow_, false, 0, 0, "Dense optical flow algorithm.")) - - BTVL1_OCL::BTVL1_OCL() - { - temporalAreaRadius_ = 4; - } - - void BTVL1_OCL::collectGarbage() - { - curFrame_.release(); - prevFrame_.release(); - - frames_.clear(); - forwardMotions_.clear(); - backwardMotions_.clear(); - outputs_.clear(); - - srcFrames_.clear(); - srcForwardMotions_.clear(); - srcBackwardMotions_.clear(); - finalOutput_.release(); - - SuperResolution::collectGarbage(); - BTVL1_OCL_Base::collectGarbage(); - } - - void BTVL1_OCL::initImpl(Ptr& frameSource) - { - const int cacheSize = 2 * temporalAreaRadius_ + 1; - - frames_.resize(cacheSize); - forwardMotions_.resize(cacheSize); - backwardMotions_.resize(cacheSize); - outputs_.resize(cacheSize); - - storePos_ = -1; - - for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t) - readNextFrame(frameSource); - - for (int i = 0; i <= temporalAreaRadius_; ++i) - processFrame(i); - - procPos_ = temporalAreaRadius_; - outPos_ = -1; - } - - void BTVL1_OCL::processImpl(Ptr& frameSource, OutputArray _output) - { - if (outPos_ >= storePos_) - { - if(_output.kind() == _InputArray::OCL_MAT) - { - getOclMatRef(_output).release(); - } - else - { - _output.release(); - } - return; - } - - readNextFrame(frameSource); - - if (procPos_ < storePos_) - { - ++procPos_; - processFrame(procPos_); - } - - ++outPos_; - const oclMat& curOutput = at(outPos_, outputs_); - - if (_output.kind() == _InputArray::OCL_MAT) - curOutput.convertTo(getOclMatRef(_output), CV_8U); - else - { - curOutput.convertTo(finalOutput_, CV_8U); - arrCopy(finalOutput_, _output); - } - } - - void BTVL1_OCL::readNextFrame(Ptr& frameSource) - { - curFrame_.release(); - frameSource->nextFrame(curFrame_); - - if (curFrame_.empty()) - return; - - ++storePos_; - curFrame_.convertTo(at(storePos_, frames_), CV_32F); - - if (storePos_ > 0) - { - pair& forwardMotion = at(storePos_ - 1, forwardMotions_); - pair& backwardMotion = at(storePos_, backwardMotions_); - - opticalFlow_->calc(prevFrame_, curFrame_, forwardMotion.first, forwardMotion.second); - opticalFlow_->calc(curFrame_, prevFrame_, backwardMotion.first, backwardMotion.second); - } - - curFrame_.copyTo(prevFrame_); - } - - void BTVL1_OCL::processFrame(int idx) - { - const int startIdx = max(idx - temporalAreaRadius_, 0); - const int procIdx = idx; - const int endIdx = min(startIdx + 2 * temporalAreaRadius_, storePos_); - - const int count = endIdx - startIdx + 1; - - srcFrames_.resize(count); - srcForwardMotions_.resize(count); - srcBackwardMotions_.resize(count); - - int baseIdx = -1; - - for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k) - { - if (i == procIdx) - baseIdx = k; - - srcFrames_[k] = at(i, frames_); - - if (i < endIdx) - srcForwardMotions_[k] = at(i, forwardMotions_); - if (i > startIdx) - srcBackwardMotions_[k] = at(i, backwardMotions_); - } - - process(srcFrames_, at(idx, outputs_), srcForwardMotions_, srcBackwardMotions_, baseIdx); - } -} - -Ptr cv::superres::createSuperResolution_BTVL1_OCL() -{ - return makePtr(); -} -#endif diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp index 14481b8..c572c09 100644 --- a/modules/superres/src/frame_source.cpp +++ b/modules/superres/src/frame_source.cpp @@ -115,25 +115,18 @@ namespace void CaptureFrameSource::nextFrame(OutputArray _frame) { if (_frame.kind() == _InputArray::MAT) - { vc_ >> _frame.getMatRef(); - } else if(_frame.kind() == _InputArray::GPU_MAT) { vc_ >> frame_; arrCopy(frame_, _frame); } - else if(_frame.kind() == _InputArray::OCL_MAT) - { - vc_ >> frame_; - if(!frame_.empty()) - { - arrCopy(frame_, _frame); - } - } + else if (_frame.isUMat()) + vc_ >> *(UMat *)_frame.getObj(); else { - //should never get here + // should never get here + CV_Assert(0); } } diff --git a/modules/superres/src/input_array_utility.cpp b/modules/superres/src/input_array_utility.cpp index 5b87267..6b306d2 100644 --- a/modules/superres/src/input_array_utility.cpp +++ b/modules/superres/src/input_array_utility.cpp @@ -62,6 +62,23 @@ Mat cv::superres::arrGetMat(InputArray arr, Mat& buf) } } +UMat cv::superres::arrGetUMat(InputArray arr, UMat& buf) +{ + switch (arr.kind()) + { + case _InputArray::GPU_MAT: + arr.getGpuMat().download(buf); + return buf; + + case _InputArray::OPENGL_BUFFER: + arr.getOGlBuffer().copyTo(buf); + return buf; + + default: + return arr.getUMat(); + } +} + GpuMat cv::superres::arrGetGpuMat(InputArray arr, GpuMat& buf) { switch (arr.kind()) @@ -108,62 +125,39 @@ namespace { src.getGpuMat().copyTo(dst.getGpuMatRef()); } -#ifdef HAVE_OPENCV_OCL - void ocl2mat(InputArray src, OutputArray dst) - { - dst.getMatRef() = (Mat)ocl::getOclMatRef(src); - } - void mat2ocl(InputArray src, OutputArray dst) - { - Mat m = src.getMat(); - ocl::getOclMatRef(dst) = (ocl::oclMat)m; - } - void ocl2ocl(InputArray src, OutputArray dst) - { - ocl::getOclMatRef(src).copyTo(ocl::getOclMatRef(dst)); - } -#else - void ocl2mat(InputArray, OutputArray) - { - CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");; - } - void mat2ocl(InputArray, OutputArray) - { - CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");; - } - void ocl2ocl(InputArray, OutputArray) - { - CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); - } -#endif } void cv::superres::arrCopy(InputArray src, OutputArray dst) { + if (dst.isUMat() || src.isUMat()) + { + src.copyTo(dst); + return; + } + typedef void (*func_t)(InputArray src, OutputArray dst); - static const func_t funcs[11][11] = + static const func_t funcs[10][10] = { - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl}, - {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0 /*buf2arr*/, buf2arr, 0 }, - {0, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0}, - {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, 0 /*arr2tex*/, gpu2gpu, 0 }, - {0, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, 0, 0, 0, ocl2ocl} + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu }, + { 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu }, + { 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu }, + { 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu }, + { 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu }, + { 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu }, + { 0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0, buf2arr }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, 0 , gpu2gpu }, }; const int src_kind = src.kind() >> _InputArray::KIND_SHIFT; const int dst_kind = dst.kind() >> _InputArray::KIND_SHIFT; - CV_DbgAssert( src_kind >= 0 && src_kind < 11 ); - CV_DbgAssert( dst_kind >= 0 && dst_kind < 11 ); + CV_Assert( src_kind >= 0 && src_kind < 10 ); + CV_Assert( dst_kind >= 0 && dst_kind < 10 ); const func_t func = funcs[src_kind][dst_kind]; - CV_DbgAssert( func != 0 ); + CV_Assert( func != 0 ); func(src, dst); } @@ -172,20 +166,21 @@ namespace { void convertToCn(InputArray src, OutputArray dst, int cn) { - CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); + int scn = src.channels(); + CV_Assert( scn == 1 || scn == 3 || scn == 4 ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); static const int codes[5][5] = { - {-1, -1, -1, -1, -1}, - {-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA}, - {-1, -1, -1, -1, -1}, - {-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA}, - {-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1}, + { -1, -1, -1, -1, -1 }, + { -1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA }, + { -1, -1, -1, -1, -1 }, + { -1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA }, + { -1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1 } }; - const int code = codes[src.channels()][cn]; - CV_DbgAssert( code >= 0 ); + const int code = codes[scn][cn]; + CV_Assert( code >= 0 ); switch (src.kind()) { @@ -202,6 +197,7 @@ namespace break; } } + void convertToDepth(InputArray src, OutputArray dst, int depth) { CV_Assert( src.depth() <= CV_64F ); @@ -226,6 +222,11 @@ namespace src.getGpuMat().convertTo(dst.getGpuMatRef(), depth, scale); break; + case _InputArray::UMAT: + case _InputArray::UEXPR: + src.getUMat().convertTo(dst, depth, scale); + break; + default: src.getMat().convertTo(dst, depth, scale); break; @@ -258,7 +259,7 @@ Mat cv::superres::convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1) return buf1; } -GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, GpuMat& buf1) +UMat cv::superres::convertToType(const UMat& src, int type, UMat& buf0, UMat& buf1) { if (src.type() == type) return src; @@ -282,49 +283,8 @@ GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, Gp convertToDepth(buf0, buf1, depth); return buf1; } -#ifdef HAVE_OPENCV_OCL -namespace -{ - // TODO(pengx17): remove these overloaded functions until IntputArray fully supports oclMat - void convertToCn(const ocl::oclMat& src, ocl::oclMat& dst, int cn) - { - CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); - CV_Assert( cn == 1 || cn == 3 || cn == 4 ); - - static const int codes[5][5] = - { - {-1, -1, -1, -1, -1}, - {-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA}, - {-1, -1, -1, -1, -1}, - {-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA}, - {-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1}, - }; - - const int code = codes[src.channels()][cn]; - CV_DbgAssert( code >= 0 ); - ocl::cvtColor(src, dst, code, cn); - } - void convertToDepth(const ocl::oclMat& src, ocl::oclMat& dst, int depth) - { - CV_Assert( src.depth() <= CV_64F ); - CV_Assert( depth == CV_8U || depth == CV_32F ); - - static const double maxVals[] = - { - std::numeric_limits::max(), - std::numeric_limits::max(), - std::numeric_limits::max(), - std::numeric_limits::max(), - std::numeric_limits::max(), - 1.0, - 1.0, - }; - const double scale = maxVals[depth] / maxVals[src.depth()]; - src.convertTo(dst, depth, scale); - } -} -ocl::oclMat cv::superres::convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1) +GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, GpuMat& buf1) { if (src.type() == type) return src; @@ -348,4 +308,3 @@ ocl::oclMat cv::superres::convertToType(const ocl::oclMat& src, int type, ocl::o convertToDepth(buf0, buf1, depth); return buf1; } -#endif diff --git a/modules/superres/src/input_array_utility.hpp b/modules/superres/src/input_array_utility.hpp index 6f17da0..3a858fb 100644 --- a/modules/superres/src/input_array_utility.hpp +++ b/modules/superres/src/input_array_utility.hpp @@ -45,25 +45,20 @@ #include "opencv2/core.hpp" #include "opencv2/core/cuda.hpp" -#ifdef HAVE_OPENCV_OCL -#include "opencv2/ocl.hpp" -#endif namespace cv { namespace superres { CV_EXPORTS Mat arrGetMat(InputArray arr, Mat& buf); + CV_EXPORTS UMat arrGetUMat(InputArray arr, UMat& buf); CV_EXPORTS cuda::GpuMat arrGetGpuMat(InputArray arr, cuda::GpuMat& buf); CV_EXPORTS void arrCopy(InputArray src, OutputArray dst); CV_EXPORTS Mat convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1); + CV_EXPORTS UMat convertToType(const UMat& src, int type, UMat& buf0, UMat& buf1); CV_EXPORTS cuda::GpuMat convertToType(const cuda::GpuMat& src, int type, cuda::GpuMat& buf0, cuda::GpuMat& buf1); - -#ifdef HAVE_OPENCV_OCL - CV_EXPORTS ocl::oclMat convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1); -#endif } } diff --git a/modules/superres/src/opencl/superres_btvl1.cl b/modules/superres/src/opencl/superres_btvl1.cl index 3c0cff8..b0e11aa 100644 --- a/modules/superres/src/opencl/superres_btvl1.cl +++ b/modules/superres/src/opencl/superres_btvl1.cl @@ -43,160 +43,137 @@ // //M*/ -__kernel void buildMotionMapsKernel(__global float* forwardMotionX, - __global float* forwardMotionY, - __global float* backwardMotionX, - __global float* backwardMotionY, - __global float* forwardMapX, - __global float* forwardMapY, - __global float* backwardMapX, - __global float* backwardMapY, - int forwardMotionX_row, - int forwardMotionX_col, - int forwardMotionX_step, - int forwardMotionY_step, - int backwardMotionX_step, - int backwardMotionY_step, - int forwardMapX_step, - int forwardMapY_step, - int backwardMapX_step, - int backwardMapY_step - ) +#ifndef cn +#define cn 1 +#endif + +#define sz (int)sizeof(float) +#define src_elem_at(_src, y, step, x) *(__global const float *)(_src + mad24(y, step, (x) * sz)) +#define dst_elem_at(_dst, y, step, x) *(__global float *)(_dst + mad24(y, step, (x) * sz)) + +__kernel void buildMotionMaps(__global const uchar * forwardMotionPtr, int forwardMotion_step, int forwardMotion_offset, + __global const uchar * backwardMotionPtr, int backwardMotion_step, int backwardMotion_offset, + __global const uchar * forwardMapPtr, int forwardMap_step, int forwardMap_offset, + __global const uchar * backwardMapPtr, int backwardMap_step, int backwardMap_offset, + int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); - if(x < forwardMotionX_col && y < forwardMotionX_row) + if (x < cols && y < rows) { - float fx = forwardMotionX[y * forwardMotionX_step + x]; - float fy = forwardMotionY[y * forwardMotionY_step + x]; + int forwardMotion_index = mad24(forwardMotion_step, y, (int)sizeof(float2) * x + forwardMotion_offset); + int backwardMotion_index = mad24(backwardMotion_step, y, (int)sizeof(float2) * x + backwardMotion_offset); + int forwardMap_index = mad24(forwardMap_step, y, (int)sizeof(float2) * x + forwardMap_offset); + int backwardMap_index = mad24(backwardMap_step, y, (int)sizeof(float2) * x + backwardMap_offset); - float bx = backwardMotionX[y * backwardMotionX_step + x]; - float by = backwardMotionY[y * backwardMotionY_step + x]; + float2 forwardMotion = *(__global const float2 *)(forwardMotionPtr + forwardMotion_index); + float2 backwardMotion = *(__global const float2 *)(backwardMotionPtr + backwardMotion_index); + __global float2 * forwardMap = (__global float2 *)(forwardMapPtr + forwardMap_index); + __global float2 * backwardMap = (__global float2 *)(backwardMapPtr + backwardMap_index); - forwardMapX[y * forwardMapX_step + x] = x + bx; - forwardMapY[y * forwardMapY_step + x] = y + by; + float2 basePoint = (float2)(x, y); - backwardMapX[y * backwardMapX_step + x] = x + fx; - backwardMapY[y * backwardMapY_step + x] = y + fy; + forwardMap[0] = basePoint + backwardMotion; + backwardMap[0] = basePoint + forwardMotion; } } -__kernel void upscaleKernel(__global float* src, - __global float* dst, - int src_step, - int dst_step, - int src_row, - int src_col, - int scale, - int channels - ) +__kernel void upscale(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int scale) { int x = get_global_id(0); int y = get_global_id(1); - if(x < src_col && y < src_row) + if (x < src_cols && y < src_rows) { - if(channels == 1) - { - dst[y * scale * dst_step + x * scale] = src[y * src_step + x]; - } - else - { - vstore4(vload4(0, src + y * channels * src_step + 4 * x), 0, dst + y * channels * scale * dst_step + 4 * x * scale); - } + int src_index = mad24(y, src_step, sz * x * cn + src_offset); + int dst_index = mad24(y * scale, dst_step, sz * x * scale * cn + dst_offset); + + __global const float * src = (__global const float *)(srcptr + src_index); + __global float * dst = (__global float *)(dstptr + dst_index); + + #pragma unroll + for (int c = 0; c < cn; ++c) + dst[c] = src[c]; } } -float diffSign(float a, float b) +inline float diffSign1(float a, float b) { return a > b ? 1.0f : a < b ? -1.0f : 0.0f; } -float4 diffSign4(float4 a, float4 b) +inline float3 diffSign3(float3 a, float3 b) { - float4 pos; + float3 pos; pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f; pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f; pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f; - pos.w = 0.0f; return pos; } -__kernel void diffSignKernel(__global float* src1, - __global float* src2, - __global float* dst, - int src1_row, - int src1_col, - int dst_step, - int src1_step, - int src2_step) +__kernel void diffSign(__global const uchar * src1, int src1_step, int src1_offset, + __global const uchar * src2, int src2_step, int src2_offset, + __global uchar * dst, int dst_step, int dst_offset, int rows, int cols) { int x = get_global_id(0); int y = get_global_id(1); - if(x < src1_col && y < src1_row) - { - dst[y * dst_step + x] = diffSign(src1[y * src1_step + x], src2[y * src2_step + x]); - } + if (x < cols && y < rows) + *(__global float *)(dst + mad24(y, dst_step, sz * x + dst_offset)) = + diffSign1(*(__global const float *)(src1 + mad24(y, src1_step, sz * x + src1_offset)), + *(__global const float *)(src2 + mad24(y, src2_step, sz * x + src2_offset))); } -__kernel void calcBtvRegularizationKernel(__global float* src, - __global float* dst, - int src_step, - int dst_step, - int src_row, - int src_col, - int ksize, - int channels, - __constant float* c_btvRegWeights - ) +__kernel void calcBtvRegularization(__global const uchar * src, int src_step, int src_offset, + __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, + int ksize, __constant float * c_btvRegWeights) { int x = get_global_id(0) + ksize; int y = get_global_id(1) + ksize; - if ((y < src_row - ksize) && (x < src_col - ksize)) + if (y < dst_rows - ksize && x < dst_cols - ksize) { - if(channels == 1) - { - const float srcVal = src[y * src_step + x]; - float dstVal = 0.0f; + src += src_offset; - for (int m = 0, count = 0; m <= ksize; ++m) +#if cn == 1 + const float srcVal = src_elem_at(src, y, src_step, x); + float dstVal = 0.0f; + + for (int m = 0, count = 0; m <= ksize; ++m) + for (int l = ksize; l + m >= 0; --l, ++count) { - for (int l = ksize; l + m >= 0; --l, ++count) - { - dstVal = dstVal + c_btvRegWeights[count] * (diffSign(srcVal, src[(y + m) * src_step + (x + l)]) - diffSign(src[(y - m) * src_step + (x - l)], srcVal)); - } + dstVal += c_btvRegWeights[count] * (diffSign1(srcVal, src_elem_at(src, y + m, src_step, x + l)) + - diffSign1(src_elem_at(src, y - m, src_step, x - l), srcVal)); } - dst[y * dst_step + x] = dstVal; - } - else - { - float4 srcVal = vload4(0, src + y * src_step + 4 * x); - float4 dstVal = 0.f; - for (int m = 0, count = 0; m <= ksize; ++m) + dst_elem_at(dst, y, dst_step, x) = dstVal; +#elif cn == 3 + __global const float * src0ptr = (__global const float *)(src + mad24(y, src_step, 3 * sz * x + src_offset)); + float3 srcVal = (float3)(src0ptr[0], src0ptr[1], src0ptr[2]), dstVal = 0.f; + + for (int m = 0, count = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++count) { - for (int l = ksize; l + m >= 0; --l, ++count) - { - float4 src1; - src1.x = src[(y + m) * src_step + 4 * (x + l) + 0]; - src1.y = src[(y + m) * src_step + 4 * (x + l) + 1]; - src1.z = src[(y + m) * src_step + 4 * (x + l) + 2]; - src1.w = src[(y + m) * src_step + 4 * (x + l) + 3]; - - float4 src2; - src2.x = src[(y - m) * src_step + 4 * (x - l) + 0]; - src2.y = src[(y - m) * src_step + 4 * (x - l) + 1]; - src2.z = src[(y - m) * src_step + 4 * (x - l) + 2]; - src2.w = src[(y - m) * src_step + 4 * (x - l) + 3]; - - dstVal = dstVal + c_btvRegWeights[count] * (diffSign4(srcVal, src1) - diffSign4(src2, srcVal)); - } + __global const float * src1ptr = (__global const float *)(src + mad24(y + m, src_step, 3 * sz * (x + l) + src_offset)); + __global const float * src2ptr = (__global const float *)(src + mad24(y - m, src_step, 3 * sz * (x - l) + src_offset)); + + float3 src1 = (float3)(src1ptr[0], src1ptr[1], src1ptr[2]); + float3 src2 = (float3)(src2ptr[0], src2ptr[1], src2ptr[2]); + + dstVal += c_btvRegWeights[count] * (diffSign3(srcVal, src1) - diffSign3(src2, srcVal)); } - vstore4(dstVal, 0, dst + y * dst_step + 4 * x); } + + __global float * dstptr = (__global float *)(dst + mad24(y, dst_step, 3 * sz * x + dst_offset + 0)); + dstptr[0] = dstVal.x; + dstptr[1] = dstVal.y; + dstptr[2] = dstVal.z; +#else +#error "Number of channels should be either 1 of 3" +#endif } } diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp index 30c27c2..2f77cd7 100644 --- a/modules/superres/src/optical_flow.cpp +++ b/modules/superres/src/optical_flow.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include "opencv2/core/opencl/ocl_defs.hpp" using namespace cv; using namespace cv::cuda; @@ -61,21 +62,66 @@ namespace void collectGarbage(); protected: - virtual void impl(const Mat& input0, const Mat& input1, OutputArray dst) = 0; + virtual void impl(InputArray input0, InputArray input1, OutputArray dst) = 0; private: + bool ocl_calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2); + int work_type_; + + // Mat Mat buf_[6]; Mat flow_; Mat flows_[2]; + + // UMat + UMat ubuf_[6]; + UMat uflow_; + std::vector uflows_; }; - CpuOpticalFlow::CpuOpticalFlow(int work_type) : work_type_(work_type) + CpuOpticalFlow::CpuOpticalFlow(int work_type) : + work_type_(work_type) { } + bool CpuOpticalFlow::ocl_calc(InputArray _frame0, InputArray _frame1, OutputArray _flow1, OutputArray _flow2) + { + UMat frame0 = arrGetUMat(_frame0, ubuf_[0]); + UMat frame1 = arrGetUMat(_frame1, ubuf_[1]); + + CV_Assert( frame1.type() == frame0.type() ); + CV_Assert( frame1.size() == frame0.size() ); + + UMat input0 = convertToType(frame0, work_type_, ubuf_[2], ubuf_[3]); + UMat input1 = convertToType(frame1, work_type_, ubuf_[4], ubuf_[5]); + + if (!_flow2.needed()) + { + impl(input0, input1, _flow1); + return true; + } + + impl(input0, input1, uflow_); + + if (!_flow2.needed()) + arrCopy(uflow_, _flow1); + else + { + split(uflow_, uflows_); + + arrCopy(uflows_[0], _flow1); + arrCopy(uflows_[1], _flow2); + } + + return true; + } + void CpuOpticalFlow::calc(InputArray _frame0, InputArray _frame1, OutputArray _flow1, OutputArray _flow2) { + CV_OCL_RUN(_flow1.isUMat() && (_flow2.isUMat() || !_flow2.needed()), + ocl_calc(_frame0, _frame1, _flow1, _flow2)) + Mat frame0 = arrGetMat(_frame0, buf_[0]); Mat frame1 = arrGetMat(_frame1, buf_[1]); @@ -94,9 +140,7 @@ namespace impl(input0, input1, flow_); if (!_flow2.needed()) - { arrCopy(flow_, _flow1); - } else { split(flow_, flows_); @@ -108,11 +152,19 @@ namespace void CpuOpticalFlow::collectGarbage() { + // Mat for (int i = 0; i < 6; ++i) buf_[i].release(); flow_.release(); flows_[0].release(); flows_[1].release(); + + // UMat + for (int i = 0; i < 6; ++i) + ubuf_[i].release(); + uflow_.release(); + uflows_[0].release(); + uflows_[1].release(); } } @@ -129,7 +181,7 @@ namespace Farneback(); protected: - void impl(const Mat& input0, const Mat& input1, OutputArray dst); + void impl(InputArray input0, InputArray input1, OutputArray dst); private: double pyrScale_; @@ -161,7 +213,7 @@ namespace flags_ = 0; } - void Farneback::impl(const Mat& input0, const Mat& input1, OutputArray dst) + void Farneback::impl(InputArray input0, InputArray input1, OutputArray dst) { calcOpticalFlowFarneback(input0, input1, (InputOutputArray)dst, pyrScale_, numLevels_, winSize_, numIters_, @@ -187,7 +239,7 @@ namespace Simple(); protected: - void impl(const Mat& input0, const Mat& input1, OutputArray dst); + void impl(InputArray input0, InputArray input1, OutputArray dst); private: int layers_; @@ -237,11 +289,9 @@ namespace speedUpThr_ = 10; } - void Simple::impl(const Mat& _input0, const Mat& _input1, OutputArray dst) + void Simple::impl(InputArray _input0, InputArray _input1, OutputArray _dst) { - Mat input0 = _input0; - Mat input1 = _input1; - calcOpticalFlowSF(input0, input1, dst.getMatRef(), + calcOpticalFlowSF(_input0, _input1, _dst, layers_, averagingBlockSize_, maxFlow_, @@ -278,7 +328,7 @@ namespace void collectGarbage(); protected: - void impl(const Mat& input0, const Mat& input1, OutputArray dst); + void impl(InputArray input0, InputArray input1, OutputArray dst); private: double tau_; @@ -316,7 +366,7 @@ namespace useInitialFlow_ = alg_->getBool("useInitialFlow"); } - void DualTVL1::impl(const Mat& input0, const Mat& input1, OutputArray dst) + void DualTVL1::impl(InputArray input0, InputArray input1, OutputArray dst) { alg_->set("tau", tau_); alg_->set("lambda", lambda_); @@ -720,269 +770,3 @@ Ptr cv::superres::createOptFlow_DualTVL1_CUDA() } #endif // HAVE_OPENCV_CUDAOPTFLOW -#ifdef HAVE_OPENCV_OCL - -namespace -{ - class oclOpticalFlow : public DenseOpticalFlowExt - { - public: - explicit oclOpticalFlow(int work_type); - - void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2); - void collectGarbage(); - - protected: - virtual void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) = 0; - - private: - int work_type_; - cv::ocl::oclMat buf_[6]; - cv::ocl::oclMat u_, v_, flow_; - }; - - oclOpticalFlow::oclOpticalFlow(int work_type) : work_type_(work_type) - { - } - - void oclOpticalFlow::calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2) - { - ocl::oclMat& _frame0 = ocl::getOclMatRef(frame0); - ocl::oclMat& _frame1 = ocl::getOclMatRef(frame1); - ocl::oclMat& _flow1 = ocl::getOclMatRef(flow1); - ocl::oclMat& _flow2 = ocl::getOclMatRef(flow2); - - CV_Assert( _frame1.type() == _frame0.type() ); - CV_Assert( _frame1.size() == _frame0.size() ); - - cv::ocl::oclMat input0_ = convertToType(_frame0, work_type_, buf_[2], buf_[3]); - cv::ocl::oclMat input1_ = convertToType(_frame1, work_type_, buf_[4], buf_[5]); - - impl(input0_, input1_, u_, v_);//go to tvl1 algorithm - - u_.copyTo(_flow1); - v_.copyTo(_flow2); - } - - void oclOpticalFlow::collectGarbage() - { - for (int i = 0; i < 6; ++i) - buf_[i].release(); - u_.release(); - v_.release(); - flow_.release(); - } -} -/////////////////////////////////////////////////////////////////// -// PyrLK_OCL - -namespace -{ - class PyrLK_OCL : public oclOpticalFlow - { - public: - AlgorithmInfo* info() const; - - PyrLK_OCL(); - - void collectGarbage(); - - protected: - void impl(const ocl::oclMat& input0, const ocl::oclMat& input1, ocl::oclMat& dst1, ocl::oclMat& dst2); - - private: - int winSize_; - int maxLevel_; - int iterations_; - - ocl::PyrLKOpticalFlow alg_; - }; - - CV_INIT_ALGORITHM(PyrLK_OCL, "DenseOpticalFlowExt.PyrLK_OCL", - obj.info()->addParam(obj, "winSize", obj.winSize_); - obj.info()->addParam(obj, "maxLevel", obj.maxLevel_); - obj.info()->addParam(obj, "iterations", obj.iterations_)) - - PyrLK_OCL::PyrLK_OCL() : oclOpticalFlow(CV_8UC1) - { - winSize_ = alg_.winSize.width; - maxLevel_ = alg_.maxLevel; - iterations_ = alg_.iters; - } - - void PyrLK_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) - { - alg_.winSize.width = winSize_; - alg_.winSize.height = winSize_; - alg_.maxLevel = maxLevel_; - alg_.iters = iterations_; - - alg_.dense(input0, input1, dst1, dst2); - } - - void PyrLK_OCL::collectGarbage() - { - alg_.releaseMemory(); - oclOpticalFlow::collectGarbage(); - } -} - -Ptr cv::superres::createOptFlow_PyrLK_OCL() -{ - return makePtr(); -} - -/////////////////////////////////////////////////////////////////// -// DualTVL1_OCL - -namespace -{ - class DualTVL1_OCL : public oclOpticalFlow - { - public: - AlgorithmInfo* info() const; - - DualTVL1_OCL(); - - void collectGarbage(); - - protected: - void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2); - - private: - double tau_; - double lambda_; - double theta_; - int nscales_; - int warps_; - double epsilon_; - int iterations_; - bool useInitialFlow_; - - ocl::OpticalFlowDual_TVL1_OCL alg_; - }; - - CV_INIT_ALGORITHM(DualTVL1_OCL, "DenseOpticalFlowExt.DualTVL1_OCL", - obj.info()->addParam(obj, "tau", obj.tau_); - obj.info()->addParam(obj, "lambda", obj.lambda_); - obj.info()->addParam(obj, "theta", obj.theta_); - obj.info()->addParam(obj, "nscales", obj.nscales_); - obj.info()->addParam(obj, "warps", obj.warps_); - obj.info()->addParam(obj, "epsilon", obj.epsilon_); - obj.info()->addParam(obj, "iterations", obj.iterations_); - obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow_)) - - DualTVL1_OCL::DualTVL1_OCL() : oclOpticalFlow(CV_8UC1) - { - tau_ = alg_.tau; - lambda_ = alg_.lambda; - theta_ = alg_.theta; - nscales_ = alg_.nscales; - warps_ = alg_.warps; - epsilon_ = alg_.epsilon; - iterations_ = alg_.iterations; - useInitialFlow_ = alg_.useInitialFlow; - } - - void DualTVL1_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) - { - alg_.tau = tau_; - alg_.lambda = lambda_; - alg_.theta = theta_; - alg_.nscales = nscales_; - alg_.warps = warps_; - alg_.epsilon = epsilon_; - alg_.iterations = iterations_; - alg_.useInitialFlow = useInitialFlow_; - - alg_(input0, input1, dst1, dst2); - - } - - void DualTVL1_OCL::collectGarbage() - { - alg_.collectGarbage(); - oclOpticalFlow::collectGarbage(); - } -} - -Ptr cv::superres::createOptFlow_DualTVL1_OCL() -{ - return makePtr(); -} - -/////////////////////////////////////////////////////////////////// -// FarneBack - -namespace -{ - class FarneBack_OCL : public oclOpticalFlow - { - public: - AlgorithmInfo* info() const; - - FarneBack_OCL(); - - void collectGarbage(); - - protected: - void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2); - - private: - double pyrScale_; - int numLevels_; - int winSize_; - int numIters_; - int polyN_; - double polySigma_; - int flags_; - - ocl::FarnebackOpticalFlow alg_; - }; - - CV_INIT_ALGORITHM(FarneBack_OCL, "DenseOpticalFlowExt.FarneBack_OCL", - obj.info()->addParam(obj, "pyrScale", obj.pyrScale_); - obj.info()->addParam(obj, "numLevels", obj.numLevels_); - obj.info()->addParam(obj, "winSize", obj.winSize_); - obj.info()->addParam(obj, "numIters", obj.numIters_); - obj.info()->addParam(obj, "polyN", obj.polyN_); - obj.info()->addParam(obj, "polySigma", obj.polySigma_); - obj.info()->addParam(obj, "flags", obj.flags_)) - - FarneBack_OCL::FarneBack_OCL() : oclOpticalFlow(CV_8UC1) - { - pyrScale_ = alg_.pyrScale; - numLevels_ = alg_.numLevels; - winSize_ = alg_.winSize; - numIters_ = alg_.numIters; - polyN_ = alg_.polyN; - polySigma_ = alg_.polySigma; - flags_ = alg_.flags; - } - - void FarneBack_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) - { - alg_.pyrScale = pyrScale_; - alg_.numLevels = numLevels_; - alg_.winSize = winSize_; - alg_.numIters = numIters_; - alg_.polyN = polyN_; - alg_.polySigma = polySigma_; - alg_.flags = flags_; - - alg_(input0, input1, dst1, dst2); - } - - void FarneBack_OCL::collectGarbage() - { - alg_.releaseMemory(); - oclOpticalFlow::collectGarbage(); - } -} - -Ptr cv::superres::createOptFlow_Farneback_OCL() -{ - return makePtr(); -} - -#endif diff --git a/modules/superres/src/precomp.hpp b/modules/superres/src/precomp.hpp index 0681bfa..c3aeb66 100644 --- a/modules/superres/src/precomp.hpp +++ b/modules/superres/src/precomp.hpp @@ -82,10 +82,6 @@ # include "opencv2/cudacodec.hpp" #endif -#ifdef HAVE_OPENCV_OCL - #include "opencv2/ocl/private/util.hpp" -#endif - #ifdef HAVE_OPENCV_HIGHGUI #include "opencv2/highgui.hpp" #endif diff --git a/modules/superres/src/super_resolution.cpp b/modules/superres/src/super_resolution.cpp index 031a59b..215416d 100644 --- a/modules/superres/src/super_resolution.cpp +++ b/modules/superres/src/super_resolution.cpp @@ -54,16 +54,20 @@ cv::superres::SuperResolution::SuperResolution() { frameSource_ = createFrameSource_Empty(); firstCall_ = true; + isUmat_ = false; } void cv::superres::SuperResolution::setInput(const Ptr& frameSource) { frameSource_ = frameSource; firstCall_ = true; + isUmat_ = false; } void cv::superres::SuperResolution::nextFrame(OutputArray frame) { + isUmat_ = frame.isUMat(); + if (firstCall_) { initImpl(frameSource_); @@ -77,6 +81,7 @@ void cv::superres::SuperResolution::reset() { frameSource_->reset(); firstCall_ = true; + isUmat_ = false; } void cv::superres::SuperResolution::collectGarbage() diff --git a/modules/superres/test/test_superres.cpp b/modules/superres/test/test_superres.cpp index 92d51fd..980c8ed 100644 --- a/modules/superres/test/test_superres.cpp +++ b/modules/superres/test/test_superres.cpp @@ -41,6 +41,7 @@ //M*/ #include "test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" class AllignedFrameSource : public cv::superres::FrameSource { @@ -52,6 +53,7 @@ public: private: cv::Ptr base_; + cv::Mat origFrame_; int scale_; }; @@ -67,9 +69,7 @@ void AllignedFrameSource::nextFrame(cv::OutputArray frame) base_->nextFrame(origFrame_); if (origFrame_.rows % scale_ == 0 && origFrame_.cols % scale_ == 0) - { cv::superres::arrCopy(origFrame_, frame); - } else { cv::Rect ROI(0, 0, (origFrame_.cols / scale_) * scale_, (origFrame_.rows / scale_) * scale_); @@ -92,6 +92,7 @@ public: private: cv::Ptr base_; + cv::Mat origFrame_; cv::Mat blurred_; cv::Mat deg_; @@ -104,28 +105,25 @@ DegradeFrameSource::DegradeFrameSource(const cv::Ptr& CV_Assert( base_ ); } -void addGaussNoise(cv::Mat& image, double sigma) +static void addGaussNoise(cv::OutputArray _image, double sigma) { - cv::Mat noise(image.size(), CV_32FC(image.channels())); + int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + cv::Mat noise(_image.size(), CV_32FC(cn)); cvtest::TS::ptr()->get_rng().fill(noise, cv::RNG::NORMAL, 0.0, sigma); - cv::addWeighted(image, 1.0, noise, 1.0, 0.0, image, image.depth()); + cv::addWeighted(_image, 1.0, noise, 1.0, 0.0, _image, depth); } -void addSpikeNoise(cv::Mat& image, int frequency) +static void addSpikeNoise(cv::OutputArray _image, int frequency) { - cv::Mat_ mask(image.size(), 0); + cv::Mat_ mask(_image.size(), 0); for (int y = 0; y < mask.rows; ++y) - { for (int x = 0; x < mask.cols; ++x) - { if (cvtest::TS::ptr()->get_rng().uniform(0, frequency) < 1) mask(y, x) = 255; - } - } - image.setTo(cv::Scalar::all(255), mask); + _image.setTo(cv::Scalar::all(255), mask); } void DegradeFrameSource::nextFrame(cv::OutputArray frame) @@ -146,7 +144,7 @@ void DegradeFrameSource::reset() base_->reset(); } -double MSSIM(const cv::Mat& i1, const cv::Mat& i2) +double MSSIM(cv::InputArray _i1, cv::InputArray _i2) { const double C1 = 6.5025; const double C2 = 58.5225; @@ -154,8 +152,8 @@ double MSSIM(const cv::Mat& i1, const cv::Mat& i2) const int depth = CV_32F; cv::Mat I1, I2; - i1.convertTo(I1, depth); - i2.convertTo(I2, depth); + _i1.getMat().convertTo(I1, depth); + _i2.getMat().convertTo(I2, depth); cv::Mat I2_2 = I2.mul(I2); // I2^2 cv::Mat I1_2 = I1.mul(I1); // I1^2 @@ -201,7 +199,7 @@ double MSSIM(const cv::Mat& i1, const cv::Mat& i2) // mssim = average of ssim map cv::Scalar mssim = cv::mean(ssim_map); - if (i1.channels() == 1) + if (_i1.channels() == 1) return mssim[0]; return (mssim[0] + mssim[1] + mssim[3]) / 3; @@ -210,9 +208,11 @@ double MSSIM(const cv::Mat& i1, const cv::Mat& i2) class SuperResolution : public testing::Test { public: + template void RunTest(cv::Ptr superRes); }; +template void SuperResolution::RunTest(cv::Ptr superRes) { const std::string inputVideoName = cvtest::TS::ptr()->get_data_path() + "car.avi"; @@ -245,7 +245,8 @@ void SuperResolution::RunTest(cv::Ptr superRes) double srAvgMSSIM = 0.0; const int count = 10; - cv::Mat goldFrame, superResFrame; + cv::Mat goldFrame; + T superResFrame; for (int i = 0; i < count; ++i) { goldSource->nextFrame(goldFrame); @@ -266,24 +267,28 @@ void SuperResolution::RunTest(cv::Ptr superRes) TEST_F(SuperResolution, BTVL1) { - RunTest(cv::superres::createSuperResolution_BTVL1()); + RunTest(cv::superres::createSuperResolution_BTVL1()); } #if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) && defined(HAVE_OPENCV_CUDAFILTERS) TEST_F(SuperResolution, BTVL1_CUDA) { - RunTest(cv::superres::createSuperResolution_BTVL1_CUDA()); + RunTest(cv::superres::createSuperResolution_BTVL1_CUDA()); } #endif -#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL) +#ifdef HAVE_OPENCL -TEST_F(SuperResolution, BTVL1_OCL) +namespace cvtest { +namespace ocl { + +OCL_TEST_F(SuperResolution, BTVL1) { - if (cv::ocl::useOpenCL()) - RunTest(cv::superres::createSuperResolution_BTVL1_OCL()); + RunTest(cv::superres::createSuperResolution_BTVL1()); } +} } // namespace cvtest::ocl + #endif diff --git a/modules/ts/include/opencv2/ts/ocl_perf.hpp b/modules/ts/include/opencv2/ts/ocl_perf.hpp index 37d2886..8a92faa 100644 --- a/modules/ts/include/opencv2/ts/ocl_perf.hpp +++ b/modules/ts/include/opencv2/ts/ocl_perf.hpp @@ -99,10 +99,14 @@ using std::tr1::tuple; #define OCL_TEST_CYCLE() \ for (cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) +#define OCL_TEST_CYCLE_N(n) \ + for(declare.iterations(n), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) + #define OCL_TEST_CYCLE_MULTIRUN(runsNum) \ for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) \ for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r) + namespace perf { // Check for current device limitation -- 2.7.4