From: Roman Donchenko Date: Tue, 27 Aug 2013 15:15:47 +0000 (+0400) Subject: Merge remote-tracking branch 'origin/2.4' into merge-2.4 X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~949^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2d6f35d6ed4bdf136fee7628a415178f08c118fe;p=profile%2Fivi%2Fopencv.git Merge remote-tracking branch 'origin/2.4' into merge-2.4 Conflicts: modules/gpuwarping/src/cuda/resize.cu modules/gpuwarping/src/resize.cpp modules/gpuwarping/test/test_resize.cpp modules/ocl/perf/main.cpp modules/ocl/perf/perf_calib3d.cpp modules/ocl/perf/perf_canny.cpp modules/ocl/perf/perf_color.cpp modules/ocl/perf/perf_haar.cpp modules/ocl/perf/perf_match_template.cpp modules/ocl/perf/perf_precomp.cpp modules/ocl/perf/perf_precomp.hpp --- 2d6f35d6ed4bdf136fee7628a415178f08c118fe diff --cc modules/gpuoptflow/src/tvl1flow.cpp index e725e0f,0000000..b9ef05e mode 100644,000000..100644 --- a/modules/gpuoptflow/src/tvl1flow.cpp +++ b/modules/gpuoptflow/src/tvl1flow.cpp @@@ -1,258 -1,0 +1,259 @@@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#if !defined HAVE_CUDA || defined(CUDA_DISABLER) + +cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() { throw_no_cuda(); } +void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } +void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage() {} +void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } + +#else + +using namespace cv; +using namespace cv::gpu; + +cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() +{ + tau = 0.25; + lambda = 0.15; + theta = 0.3; + nscales = 5; + warps = 5; + epsilon = 0.01; + iterations = 300; + scaleStep = 0.8; + useInitialFlow = false; +} + +void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy) +{ + CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); + CV_Assert( I0.size() == I1.size() ); + CV_Assert( I0.type() == I1.type() ); + CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); + CV_Assert( nscales > 0 ); + + // allocate memory for the pyramid structure + I0s.resize(nscales); + I1s.resize(nscales); + u1s.resize(nscales); + u2s.resize(nscales); + + I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); + I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); + + if (!useInitialFlow) + { + flowx.create(I0.size(), CV_32FC1); + flowy.create(I0.size(), CV_32FC1); + } + + u1s[0] = flowx; + u2s[0] = flowy; + + I1x_buf.create(I0.size(), CV_32FC1); + I1y_buf.create(I0.size(), CV_32FC1); + + I1w_buf.create(I0.size(), CV_32FC1); + I1wx_buf.create(I0.size(), CV_32FC1); + I1wy_buf.create(I0.size(), CV_32FC1); + + grad_buf.create(I0.size(), CV_32FC1); + rho_c_buf.create(I0.size(), CV_32FC1); + + p11_buf.create(I0.size(), CV_32FC1); + p12_buf.create(I0.size(), CV_32FC1); + p21_buf.create(I0.size(), CV_32FC1); + p22_buf.create(I0.size(), CV_32FC1); + + diff_buf.create(I0.size(), CV_32FC1); + + // create the scales + for (int s = 1; s < nscales; ++s) + { + gpu::resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep); + gpu::resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep); + + if (I0s[s].cols < 16 || I0s[s].rows < 16) + { + nscales = s; + break; + } + + if (useInitialFlow) + { + gpu::resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep); + gpu::resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep); + + gpu::multiply(u1s[s], Scalar::all(scaleStep), u1s[s]); + gpu::multiply(u2s[s], Scalar::all(scaleStep), u2s[s]); + } + else + { + u1s[s].create(I0s[s].size(), CV_32FC1); + u2s[s].create(I0s[s].size(), CV_32FC1); + } + } + + if (!useInitialFlow) + { + u1s[nscales-1].setTo(Scalar::all(0)); + u2s[nscales-1].setTo(Scalar::all(0)); + } + + // pyramidal structure for computing the optical flow + for (int s = nscales - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); + + // if this was the last scale, finish now + if (s == 0) + break; + + // otherwise, upsample the optical flow + + // zoom the optical flow for the next finer scale + gpu::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); + gpu::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); + + // scale the optical flow with the appropriate zoom factor + gpu::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]); + gpu::multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]); + } +} + +namespace tvl1flow +{ + void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy); + void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho); + void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, + PtrStepSzf grad, PtrStepSzf rho_c, + PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, + PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, + float l_t, float theta); + void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut); +} + +void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2) +{ + using namespace tvl1flow; + + const double scaledEpsilon = epsilon * epsilon * I0.size().area(); + + CV_DbgAssert( I1.size() == I0.size() ); + CV_DbgAssert( I1.type() == I0.type() ); + CV_DbgAssert( u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == u1.size() ); + + GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + centeredGradient(I1, I1x, I1y); + + GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + + GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + + GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); + p11.setTo(Scalar::all(0)); + p12.setTo(Scalar::all(0)); + p21.setTo(Scalar::all(0)); + p22.setTo(Scalar::all(0)); + + GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); + + const float l_t = static_cast(lambda * theta); + const float taut = static_cast(tau / theta); + + for (int warpings = 0; warpings < warps; ++warpings) + { + warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); + + double error = std::numeric_limits::max(); + for (int n = 0; error > scaledEpsilon && n < iterations; ++n) + { + estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast(theta)); + - error = gpu::sum(diff, norm_buf)[0]; ++ if (epsilon > 0) ++ error = gpu::sum(diff, norm_buf)[0]; + + estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); + } + } +} + +void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage() +{ + I0s.clear(); + I1s.clear(); + u1s.clear(); + u2s.clear(); + + I1x_buf.release(); + I1y_buf.release(); + + I1w_buf.release(); + I1wx_buf.release(); + I1wy_buf.release(); + + grad_buf.release(); + rho_c_buf.release(); + + p11_buf.release(); + p12_buf.release(); + p21_buf.release(); + p22_buf.release(); + + diff_buf.release(); + norm_buf.release(); +} + +#endif // !defined HAVE_CUDA || defined(CUDA_DISABLER) diff --cc modules/gpuwarping/src/cuda/resize.cu index 04c1fb2,0000000..0377189 mode 100644,000000..100644 --- a/modules/gpuwarping/src/cuda/resize.cu +++ b/modules/gpuwarping/src/cuda/resize.cu @@@ -1,302 -1,0 +1,482 @@@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/border_interpolate.hpp" +#include "opencv2/core/cuda/vec_traits.hpp" +#include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/saturate_cast.hpp" +#include "opencv2/core/cuda/filters.hpp" - #include "opencv2/core/cuda/scan.hpp" + +namespace cv { namespace gpu { namespace cudev +{ - namespace imgproc ++ // kernels ++ ++ template __global__ void resize_nearest(const PtrStep src, PtrStepSz dst, const float fy, const float fx) + { - template __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz dst) ++ const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; ++ const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; ++ ++ if (dst_x < dst.cols && dst_y < dst.rows) + { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; ++ const float src_x = dst_x * fx; ++ const float src_y = dst_y * fy; + - if (x < dst.cols && y < dst.rows) - { - const float xcoo = x * fx; - const float ycoo = y * fy; ++ dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x)); ++ } ++ } + - dst(y, x) = saturate_cast(src(ycoo, xcoo)); - } ++ template __global__ void resize_linear(const PtrStepSz src, PtrStepSz dst, const float fy, const float fx) ++ { ++ typedef typename TypeVec::cn>::vec_type work_type; ++ ++ const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; ++ const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; ++ ++ if (dst_x < dst.cols && dst_y < dst.rows) ++ { ++ const float src_x = dst_x * fx; ++ const float src_y = dst_y * fy; ++ ++ work_type out = VecTraits::all(0); ++ ++ const int x1 = __float2int_rd(src_x); ++ const int y1 = __float2int_rd(src_y); ++ const int x2 = x1 + 1; ++ const int y2 = y1 + 1; ++ const int x2_read = ::min(x2, src.cols - 1); ++ const int y2_read = ::min(y2, src.rows - 1); ++ ++ T src_reg = src(y1, x1); ++ out = out + src_reg * ((x2 - src_x) * (y2 - src_y)); ++ ++ src_reg = src(y1, x2_read); ++ out = out + src_reg * ((src_x - x1) * (y2 - src_y)); ++ ++ src_reg = src(y2_read, x1); ++ out = out + src_reg * ((x2 - src_x) * (src_y - y1)); ++ ++ src_reg = src(y2_read, x2_read); ++ out = out + src_reg * ((src_x - x1) * (src_y - y1)); ++ ++ dst(dst_y, dst_x) = saturate_cast(out); + } ++ } ++ ++ template __global__ void resize(const Ptr2D src, PtrStepSz dst, const float fy, const float fx) ++ { ++ const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; ++ const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; + - template __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz dst) ++ if (dst_x < dst.cols && dst_y < dst.rows) + { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; ++ const float src_x = dst_x * fx; ++ const float src_y = dst_y * fy; + - if (x < dst.cols && y < dst.rows) - { - dst(y, x) = saturate_cast(src(y, x)); - } ++ dst(dst_y, dst_x) = src(src_y, src_x); + } ++ } + - template