1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Sen Liu, swjtuls1987@126.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other oclMaterials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors "as is" and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
47 #include "precomp.hpp"
48 #include "opencv2/video/tracking.hpp"
52 using namespace cv::ocl;
60 ///////////////////////////OpenCL kernel strings///////////////////////////
61 extern const char *optical_flow_farneback;
67 namespace optflow_farneback
76 inline int divUp(int total, int grain)
78 return (total + grain - 1) / grain;
81 inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
83 cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
87 static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
89 string kernelName("gaussianBlur");
90 size_t localThreads[3] = { 256, 1, 1 };
91 size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 };
92 int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
94 CV_Assert(dst.size() == src.size());
95 std::vector< std::pair<size_t, const void *> > args;
96 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
97 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
98 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
99 args.push_back(std::make_pair(smem_size, (void *)NULL));
100 args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
101 args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
102 args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
103 args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
104 args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
106 openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
107 globalThreads, localThreads, args, -1, -1);
110 static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
112 string kernelName("polynomialExpansion");
113 size_t localThreads[3] = { 256, 1, 1 };
114 size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
115 int smem_size = 3 * localThreads[0] * sizeof(float);
117 std::vector< std::pair<size_t, const void *> > args;
118 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
119 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
120 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data));
121 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data));
122 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data));
123 args.push_back(std::make_pair(smem_size, (void *)NULL));
124 args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
125 args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
126 args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
127 args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
128 args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
131 sprintf(opt, "-D polyN=%d", polyN);
133 openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
134 globalThreads, localThreads, args, -1, -1, opt);
137 static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
139 string kernelName("updateMatrices");
140 size_t localThreads[3] = { 32, 8, 1 };
141 size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0],
142 divUp(flowx.rows, localThreads[1]) * localThreads[1],
146 std::vector< std::pair<size_t, const void *> > args;
147 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
148 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
149 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
150 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data));
151 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data));
152 args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
153 args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols));
154 args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
155 args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
156 args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
157 args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step));
158 args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step));
160 openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
161 globalThreads, localThreads, args, -1, -1);
164 static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
166 string kernelName("boxFilter5");
167 int height = src.rows / 5;
168 size_t localThreads[3] = { 256, 1, 1 };
169 size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 };
170 int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
172 std::vector< std::pair<size_t, const void *> > args;
173 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
174 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
175 args.push_back(std::make_pair(smem_size, (void *)NULL));
176 args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
177 args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
178 args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
179 args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
180 args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
182 openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
183 globalThreads, localThreads, args, -1, -1);
186 static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
188 string kernelName("updateFlow");
189 int cols = divUp(flowx.cols, 4);
190 size_t localThreads[3] = { 32, 8, 1 };
191 size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
192 divUp(flowx.rows, localThreads[1]) * localThreads[0],
196 std::vector< std::pair<size_t, const void *> > args;
197 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
198 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
199 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
200 args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
201 args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
202 args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
203 args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
204 args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
206 openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
207 globalThreads, localThreads, args, -1, -1);
210 static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
212 string kernelName("gaussianBlur5");
213 int height = src.rows / 5;
214 int width = src.cols;
215 size_t localThreads[3] = { 256, 1, 1 };
216 size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 };
217 int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
219 std::vector< std::pair<size_t, const void *> > args;
220 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
221 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
222 args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
223 args.push_back(std::make_pair(smem_size, (void *)NULL));
224 args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
225 args.push_back(std::make_pair(sizeof(cl_int), (void *)&width));
226 args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
227 args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
228 args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
230 openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
231 globalThreads, localThreads, args, -1, -1);
235 } // namespace cv { namespace ocl { namespace optflow_farneback
237 static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
239 if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
240 return mat(Rect(0, 0, cols, rows));
241 return mat = oclMat(rows, cols, type);
244 cv::ocl::FarnebackOpticalFlow::FarnebackOpticalFlow()
248 fastPyramids = false;
256 void cv::ocl::FarnebackOpticalFlow::releaseMemory()
258 frames_[0].release();
259 frames_[1].release();
260 pyrLevel_[0].release();
261 pyrLevel_[1].release();
266 blurredFrame_[0].release();
267 blurredFrame_[1].release();
272 void cv::ocl::FarnebackOpticalFlow::prepareGaussian(
273 int n, double sigma, float *g, float *xg, float *xxg,
274 double &ig11, double &ig03, double &ig33, double &ig55)
277 for (int x = -n; x <= n; x++)
279 g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
284 for (int x = -n; x <= n; x++)
286 g[x] = (float)(g[x]*s);
287 xg[x] = (float)(x*g[x]);
288 xxg[x] = (float)(x*x*g[x]);
291 Mat_<double> G(6, 6);
294 for (int y = -n; y <= n; y++)
296 for (int x = -n; x <= n; x++)
299 G(1,1) += g[y]*g[x]*x*x;
300 G(3,3) += g[y]*g[x]*x*x*x*x;
301 G(5,5) += g[y]*g[x]*x*x*y*y;
306 G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
308 G(3,4) = G(4,3) = G(5,5);
317 Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
325 void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
327 vector<float> buf(n*6 + 3);
328 float* g = &buf[0] + n;
329 float* xg = g + n*2 + 1;
330 float* xxg = xg + n*2 + 1;
332 if (sigma < FLT_EPSILON)
335 double ig11, ig03, ig33, ig55;
336 prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
338 cv::Mat t_g(1, n + 1, CV_32FC1, g);
339 cv::Mat t_xg(1, n + 1, CV_32FC1, xg);
340 cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg);
342 optflow_farneback::g.upload(t_g);
343 optflow_farneback::xg.upload(t_xg);
344 optflow_farneback::xxg.upload(t_xxg);
346 optflow_farneback::ig[0] = static_cast<float>(ig11);
347 optflow_farneback::ig[1] = static_cast<float>(ig03);
348 optflow_farneback::ig[2] = static_cast<float>(ig33);
349 optflow_farneback::ig[3] = static_cast<float>(ig55);
352 void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
353 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
354 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
356 optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM);
362 optflow_farneback::updateFlowOcl(M, flowx, flowy);
365 optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
369 void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
370 const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
371 oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
373 optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM);
377 optflow_farneback::updateFlowOcl(M, flowx, flowy);
380 optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
384 void cv::ocl::FarnebackOpticalFlow::operator ()(
385 const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
387 CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
388 CV_Assert(frame0.size() == frame1.size());
389 CV_Assert(polyN == 5 || polyN == 7);
390 CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
392 Size size = frame0.size();
393 oclMat prevFlowX, prevFlowY, curFlowX, curFlowY;
395 flowx.create(size, CV_32F);
396 flowy.create(size, CV_32F);
397 oclMat flowx0 = flowx;
398 oclMat flowy0 = flowy;
400 // Crop unnecessary levels
402 int numLevelsCropped = 0;
403 for (; numLevelsCropped < numLevels; numLevelsCropped++)
406 if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
410 frame0.convertTo(frames_[0], CV_32F);
411 frame1.convertTo(frames_[1], CV_32F);
415 // Build Gaussian pyramids using pyrDown()
416 pyramid0_.resize(numLevelsCropped + 1);
417 pyramid1_.resize(numLevelsCropped + 1);
418 pyramid0_[0] = frames_[0];
419 pyramid1_[0] = frames_[1];
420 for (int i = 1; i <= numLevelsCropped; ++i)
422 pyrDown(pyramid0_[i - 1], pyramid0_[i]);
423 pyrDown(pyramid1_[i - 1], pyramid1_[i]);
427 setPolynomialExpansionConsts(polyN, polySigma);
429 for (int k = numLevelsCropped; k >= 0; k--)
432 for (int i = 0; i < k; i++)
435 double sigma = (1./scale - 1) * 0.5;
436 int smoothSize = cvRound(sigma*5) | 1;
437 smoothSize = std::max(smoothSize, 3);
439 int width = cvRound(size.width*scale);
440 int height = cvRound(size.height*scale);
444 width = pyramid0_[k].cols;
445 height = pyramid0_[k].rows;
450 curFlowX.create(height, width, CV_32F);
451 curFlowY.create(height, width, CV_32F);
461 if (flags & cv::OPTFLOW_USE_INITIAL_FLOW)
463 resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
464 resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
465 multiply(scale, curFlowX, curFlowX);
466 multiply(scale, curFlowY, curFlowY);
476 resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
477 resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
478 multiply(1./pyrScale, curFlowX, curFlowX);
479 multiply(1./pyrScale, curFlowY, curFlowY);
482 oclMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
483 oclMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
486 allocMatFromBuf(5*height, width, CV_32F, R_[0]),
487 allocMatFromBuf(5*height, width, CV_32F, R_[1])
492 optflow_farneback::polynomialExpansionOcl(pyramid0_[k], polyN, R[0]);
493 optflow_farneback::polynomialExpansionOcl(pyramid1_[k], polyN, R[1]);
497 oclMat blurredFrame[2] =
499 allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
500 allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
504 allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
505 allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
508 Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
509 optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
511 for (int i = 0; i < 2; i++)
513 optflow_farneback::gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]);
514 resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR);
515 optflow_farneback::polynomialExpansionOcl(pyrLevel[i], polyN, R[i]);
519 optflow_farneback::updateMatricesOcl(curFlowX, curFlowY, R[0], R[1], M);
521 if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
523 Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
524 optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
526 for (int i = 0; i < numIters; i++)
528 if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
529 updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
531 updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
534 prevFlowX = curFlowX;
535 prevFlowY = curFlowY;