1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or bpied warranties, including, but not limited to, the bpied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #if !defined CUDA_DISABLER
45 #include "opencv2/gpu/device/common.hpp"
46 #include "opencv2/gpu/device/border_interpolate.hpp"
47 #include "opencv2/gpu/device/limits.hpp"
49 using namespace cv::gpu;
50 using namespace cv::gpu::device;
52 ////////////////////////////////////////////////////////////
57 __global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
59 const int x = blockIdx.x * blockDim.x + threadIdx.x;
60 const int y = blockIdx.y * blockDim.y + threadIdx.y;
62 if (x >= src.cols || y >= src.rows)
65 dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0)));
66 dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
69 void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
71 const dim3 block(32, 8);
72 const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
74 centeredGradientKernel<<<grid, block>>>(src, dx, dy);
75 cudaSafeCall( cudaGetLastError() );
77 cudaSafeCall( cudaDeviceSynchronize() );
81 ////////////////////////////////////////////////////////////
86 static __device__ __forceinline__ float bicubicCoeff(float x_)
91 return x * x * (1.5f * x - 2.5f) + 1.0f;
95 return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
103 texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp);
104 texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
105 texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
107 __global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
109 const int x = blockIdx.x * blockDim.x + threadIdx.x;
110 const int y = blockIdx.y * blockDim.y + threadIdx.y;
112 if (x >= I0.cols || y >= I0.rows)
115 const float u1Val = u1(y, x);
116 const float u2Val = u2(y, x);
118 const float wx = x + u1Val;
119 const float wy = y + u2Val;
121 const int xmin = ::ceilf(wx - 2.0f);
122 const int xmax = ::floorf(wx + 2.0f);
124 const int ymin = ::ceilf(wy - 2.0f);
125 const int ymax = ::floorf(wy + 2.0f);
132 for (int cy = ymin; cy <= ymax; ++cy)
134 for (int cx = xmin; cx <= xmax; ++cx)
136 const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
138 sum += w * tex2D(tex_I1 , cx, cy);
139 sumx += w * tex2D(tex_I1x, cx, cy);
140 sumy += w * tex2D(tex_I1y, cx, cy);
146 const float coeff = 1.0f / wsum;
148 const float I1wVal = sum * coeff;
149 const float I1wxVal = sumx * coeff;
150 const float I1wyVal = sumy * coeff;
153 I1wx(y, x) = I1wxVal;
154 I1wy(y, x) = I1wyVal;
156 const float Ix2 = I1wxVal * I1wxVal;
157 const float Iy2 = I1wyVal * I1wyVal;
159 // store the |Grad(I1)|^2
160 grad(y, x) = Ix2 + Iy2;
162 // compute the constant part of the rho function
163 const float I0Val = I0(y, x);
164 rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
167 void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
169 const dim3 block(32, 8);
170 const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
172 bindTexture(&tex_I1 , I1);
173 bindTexture(&tex_I1x, I1x);
174 bindTexture(&tex_I1y, I1y);
176 warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
177 cudaSafeCall( cudaGetLastError() );
179 cudaSafeCall( cudaDeviceSynchronize() );
183 ////////////////////////////////////////////////////////////
188 __device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)
192 const float v1x = v1(y, x) - v1(y, x - 1);
193 const float v2y = v2(y, x) - v2(y - 1, x);
199 return v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
203 return v1(0, x) - v1(0, x - 1) + v2(0, x);
205 return v1(0, 0) + v2(0, 0);
210 __global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,
211 const PtrStepf grad, const PtrStepf rho_c,
212 const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
213 PtrStepf u1, PtrStepf u2, PtrStepf error,
214 const float l_t, const float theta)
216 const int x = blockIdx.x * blockDim.x + threadIdx.x;
217 const int y = blockIdx.y * blockDim.y + threadIdx.y;
219 if (x >= I1wx.cols || y >= I1wx.rows)
222 const float I1wxVal = I1wx(y, x);
223 const float I1wyVal = I1wy(y, x);
224 const float gradVal = grad(y, x);
225 const float u1OldVal = u1(y, x);
226 const float u2OldVal = u2(y, x);
228 const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
230 // estimate the values of the variable (v1, v2) (thresholding operator TH)
235 if (rho < -l_t * gradVal)
240 else if (rho > l_t * gradVal)
245 else if (gradVal > numeric_limits<float>::epsilon())
247 const float fi = -rho / gradVal;
252 const float v1 = u1OldVal + d1;
253 const float v2 = u2OldVal + d2;
255 // compute the divergence of the dual variable (p1, p2)
257 const float div_p1 = divergence(p11, p12, y, x);
258 const float div_p2 = divergence(p21, p22, y, x);
260 // estimate the values of the optical flow (u1, u2)
262 const float u1NewVal = v1 + theta * div_p1;
263 const float u2NewVal = v2 + theta * div_p2;
268 const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
269 const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
270 error(y, x) = n1 + n2;
273 void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
274 PtrStepSzf grad, PtrStepSzf rho_c,
275 PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
276 PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
277 float l_t, float theta)
279 const dim3 block(32, 8);
280 const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
282 estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
283 cudaSafeCall( cudaGetLastError() );
285 cudaSafeCall( cudaDeviceSynchronize() );
289 ////////////////////////////////////////////////////////////
290 // estimateDualVariables
294 __global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
296 const int x = blockIdx.x * blockDim.x + threadIdx.x;
297 const int y = blockIdx.y * blockDim.y + threadIdx.y;
299 if (x >= u1.cols || y >= u1.rows)
302 const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x);
303 const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x);
305 const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x);
306 const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x);
308 const float g1 = ::hypotf(u1x, u1y);
309 const float g2 = ::hypotf(u2x, u2y);
311 const float ng1 = 1.0f + taut * g1;
312 const float ng2 = 1.0f + taut * g2;
314 p11(y, x) = (p11(y, x) + taut * u1x) / ng1;
315 p12(y, x) = (p12(y, x) + taut * u1y) / ng1;
316 p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
317 p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
320 void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut)
322 const dim3 block(32, 8);
323 const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
325 estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
326 cudaSafeCall( cudaGetLastError() );
328 cudaSafeCall( cudaDeviceSynchronize() );
332 #endif // !defined CUDA_DISABLER