modules/ocl/src/optical_flow_farneback.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // @Authors
  18 //      Sen Liu, swjtuls1987@126.com
  19 //
  20 // Redistribution and use in source and binary forms, with or without modification,
  21 // are permitted provided that the following conditions are met:
  22 //
  23 //   * Redistribution's of source code must retain the above copyright notice,
  24 //     this list of conditions and the following disclaimer.
  25 //
  26 //   * Redistribution's in binary form must reproduce the above copyright notice,
  27 //     this list of conditions and the following disclaimer in the documentation
  28 //     and/or other materials provided with the distribution.
  29 //
  30 //   * The name of the copyright holders may not be used to endorse or promote products
  31 //     derived from this software without specific prior written permission.
  32 //
  33 // This software is provided by the copyright holders and contributors "as is" and
  34 // any express or implied warranties, including, but not limited to, the implied
  35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  36 // In no event shall the Intel Corporation or contributors be liable for any direct,
  37 // indirect, incidental, special, exemplary, or consequential damages
  38 // (including, but not limited to, procurement of substitute goods or services;
  39 // loss of use, data, or profits; or business interruption) however caused
  40 // and on any theory of liability, whether in contract, strict liability,
  41 // or tort (including negligence or otherwise) arising in any way out of
  42 // the use of this software, even if advised of the possibility of such damage.
  43 //
  44 //M*/
  45
  46
  47 #include "precomp.hpp"
  48 #include "opencl_kernels.hpp"
  49 #include "opencv2/video/tracking.hpp"
  50
  51 using namespace cv;
  52 using namespace cv::ocl;
  53
  54 #define MIN_SIZE 32
  55
  56 namespace cv {
  57 namespace ocl {
  58 namespace optflow_farneback
  59 {
  60 oclMat g;
  61 oclMat xg;
  62 oclMat xxg;
  63 oclMat gKer;
  64
  65 float ig[4];
  66
  67 inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
  68 {
  69     cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
  70     gKer.upload(t_gKer);
  71 }
  72
  73 static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
  74 {
  75     string kernelName("gaussianBlur");
  76     size_t localThreads[3] = { 256, 1, 1 };
  77     size_t globalThreads[3] = { src.cols, src.rows, 1 };
  78     int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
  79
  80     CV_Assert(dst.size() == src.size());
  81     std::vector< std::pair<size_t, const void *> > args;
  82     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
  83     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
  84     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
  85     args.push_back(std::make_pair(smem_size, (void *)NULL));
  86     args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
  87     args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
  88     args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
  89     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
  90     args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
  91
  92     openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
  93                         globalThreads, localThreads, args, -1, -1);
  94 }
  95
  96 static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
  97 {
  98     string kernelName("polynomialExpansion");
  99     size_t localThreads[3] = { 256, 1, 1 };
 100     size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
 101     int smem_size = 3 * localThreads[0] * sizeof(float);
 102
 103     std::vector< std::pair<size_t, const void *> > args;
 104     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
 105     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
 106     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data));
 107     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data));
 108     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data));
 109     args.push_back(std::make_pair(smem_size, (void *)NULL));
 110     args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
 111     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
 112     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
 113     args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
 114     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
 115
 116     char opt [128];
 117     sprintf(opt, "-D polyN=%d", polyN);
 118
 119     openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 120                         globalThreads, localThreads, args, -1, -1, opt);
 121 }
 122
 123 static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
 124 {
 125     string kernelName("updateMatrices");
 126     size_t localThreads[3] = { 32, 8, 1 };
 127     size_t globalThreads[3] = { flowx.cols, flowx.rows, 1 };
 128
 129     std::vector< std::pair<size_t, const void *> > args;
 130     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
 131     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
 132     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
 133     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data));
 134     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data));
 135     args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
 136     args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols));
 137     args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
 138     args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
 139     args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
 140     args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step));
 141     args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step));
 142
 143     openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 144                         globalThreads, localThreads, args, -1, -1);
 145 }
 146
 147 static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
 148 {
 149     string kernelName("boxFilter5");
 150     int height = src.rows / 5;
 151     size_t localThreads[3] = { 256, 1, 1 };
 152     size_t globalThreads[3] = { src.cols, height, 1 };
 153     int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
 154
 155     std::vector< std::pair<size_t, const void *> > args;
 156     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
 157     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
 158     args.push_back(std::make_pair(smem_size, (void *)NULL));
 159     args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
 160     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
 161     args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
 162     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
 163     args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
 164
 165     openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 166                         globalThreads, localThreads, args, -1, -1);
 167 }
 168
 169 static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
 170 {
 171     string kernelName("updateFlow");
 172     int cols = divUp(flowx.cols, 4);
 173     size_t localThreads[3] = { 32, 8, 1 };
 174     size_t globalThreads[3] = { cols, flowx.rows, 1 };
 175
 176     std::vector< std::pair<size_t, const void *> > args;
 177     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
 178     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
 179     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
 180     args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
 181     args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
 182     args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
 183     args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
 184     args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
 185
 186     openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 187                         globalThreads, localThreads, args, -1, -1);
 188 }
 189
 190 static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
 191 {
 192     string kernelName("gaussianBlur5");
 193     int height = src.rows / 5;
 194     size_t localThreads[3] = { 256, 1, 1 };
 195     size_t globalThreads[3] = { src.cols, height, 1 };
 196     int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
 197
 198     std::vector< std::pair<size_t, const void *> > args;
 199     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
 200     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
 201     args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
 202     args.push_back(std::make_pair(smem_size, (void *)NULL));
 203     args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
 204     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
 205     args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
 206     args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
 207     args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
 208
 209     openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 210                         globalThreads, localThreads, args, -1, -1);
 211 }
 212 }
 213 }
 214 } // namespace cv { namespace ocl { namespace optflow_farneback
 215
 216 static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
 217 {
 218     if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
 219         return mat(Rect(0, 0, cols, rows));
 220     return mat = oclMat(rows, cols, type);
 221 }
 222
 223 cv::ocl::FarnebackOpticalFlow::FarnebackOpticalFlow()
 224 {
 225     numLevels = 5;
 226     pyrScale = 0.5;
 227     fastPyramids = false;
 228     winSize = 13;
 229     numIters = 10;
 230     polyN = 5;
 231     polySigma = 1.1;
 232     flags = 0;
 233 }
 234
 235 void cv::ocl::FarnebackOpticalFlow::releaseMemory()
 236 {
 237     frames_[0].release();
 238     frames_[1].release();
 239     pyrLevel_[0].release();
 240     pyrLevel_[1].release();
 241     M_.release();
 242     bufM_.release();
 243     R_[0].release();
 244     R_[1].release();
 245     blurredFrame_[0].release();
 246     blurredFrame_[1].release();
 247     pyramid0_.clear();
 248     pyramid1_.clear();
 249 }
 250
 251 void cv::ocl::FarnebackOpticalFlow::prepareGaussian(
 252     int n, double sigma, float *g, float *xg, float *xxg,
 253     double &ig11, double &ig03, double &ig33, double &ig55)
 254 {
 255     double s = 0.;
 256     for (int x = -n; x <= n; x++)
 257     {
 258         g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
 259         s += g[x];
 260     }
 261
 262     s = 1./s;
 263     for (int x = -n; x <= n; x++)
 264     {
 265         g[x] = (float)(g[x]*s);
 266         xg[x] = (float)(x*g[x]);
 267         xxg[x] = (float)(x*x*g[x]);
 268     }
 269
 270     Mat_<double> G(6, 6);
 271     G.setTo(0);
 272
 273     for (int y = -n; y <= n; y++)
 274     {
 275         for (int x = -n; x <= n; x++)
 276         {
 277             G(0,0) += g[y]*g[x];
 278             G(1,1) += g[y]*g[x]*x*x;
 279             G(3,3) += g[y]*g[x]*x*x*x*x;
 280             G(5,5) += g[y]*g[x]*x*x*y*y;
 281         }
 282     }
 283
 284     //G[0][0] = 1.;
 285     G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
 286     G(4,4) = G(3,3);
 287     G(3,4) = G(4,3) = G(5,5);
 288
 289     // invG:
 290     // [ x        e  e    ]
 291     // [    y             ]
 292     // [       y          ]
 293     // [ e        z       ]
 294     // [ e           z    ]
 295     // [                u ]
 296     Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
 297
 298     ig11 = invG(1,1);
 299     ig03 = invG(0,3);
 300     ig33 = invG(3,3);
 301     ig55 = invG(5,5);
 302 }
 303
 304 void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
 305 {
 306     vector<float> buf(n*6 + 3);
 307     float* g = &buf[0] + n;
 308     float* xg = g + n*2 + 1;
 309     float* xxg = xg + n*2 + 1;
 310
 311     if (sigma < FLT_EPSILON)
 312         sigma = n*0.3;
 313
 314     double ig11, ig03, ig33, ig55;
 315     prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
 316
 317     cv::Mat t_g(1, n + 1, CV_32FC1, g);
 318     cv::Mat t_xg(1, n + 1, CV_32FC1, xg);
 319     cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg);
 320
 321     optflow_farneback::g.upload(t_g);
 322     optflow_farneback::xg.upload(t_xg);
 323     optflow_farneback::xxg.upload(t_xxg);
 324
 325     optflow_farneback::ig[0] = static_cast<float>(ig11);
 326     optflow_farneback::ig[1] = static_cast<float>(ig03);
 327     optflow_farneback::ig[2] = static_cast<float>(ig33);
 328     optflow_farneback::ig[3] = static_cast<float>(ig55);
 329 }
 330
 331 void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
 332     const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
 333     oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
 334 {
 335     optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM);
 336
 337     swap(M, bufM);
 338
 339     finish();
 340
 341     optflow_farneback::updateFlowOcl(M, flowx, flowy);
 342
 343     if (updateMatrices)
 344         optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
 345 }
 346
 347
 348 void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
 349     const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
 350     oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
 351 {
 352     optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM);
 353
 354     swap(M, bufM);
 355
 356     optflow_farneback::updateFlowOcl(M, flowx, flowy);
 357
 358     if (updateMatrices)
 359         optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
 360 }
 361
 362
 363 void cv::ocl::FarnebackOpticalFlow::operator ()(
 364     const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
 365 {
 366     CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
 367     CV_Assert(frame0.size() == frame1.size());
 368     CV_Assert(polyN == 5 || polyN == 7);
 369     CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
 370
 371     Size size = frame0.size();
 372     oclMat prevFlowX, prevFlowY, curFlowX, curFlowY;
 373
 374     flowx.create(size, CV_32F);
 375     flowy.create(size, CV_32F);
 376     oclMat flowx0 = flowx;
 377     oclMat flowy0 = flowy;
 378
 379     // Crop unnecessary levels
 380     double scale = 1;
 381     int numLevelsCropped = 0;
 382     for (; numLevelsCropped < numLevels; numLevelsCropped++)
 383     {
 384         scale *= pyrScale;
 385         if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
 386             break;
 387     }
 388
 389     frame0.convertTo(frames_[0], CV_32F);
 390     frame1.convertTo(frames_[1], CV_32F);
 391
 392     if (fastPyramids)
 393     {
 394         // Build Gaussian pyramids using pyrDown()
 395         pyramid0_.resize(numLevelsCropped + 1);
 396         pyramid1_.resize(numLevelsCropped + 1);
 397         pyramid0_[0] = frames_[0];
 398         pyramid1_[0] = frames_[1];
 399         for (int i = 1; i <= numLevelsCropped; ++i)
 400         {
 401             pyrDown(pyramid0_[i - 1], pyramid0_[i]);
 402             pyrDown(pyramid1_[i - 1], pyramid1_[i]);
 403         }
 404     }
 405
 406     setPolynomialExpansionConsts(polyN, polySigma);
 407
 408     for (int k = numLevelsCropped; k >= 0; k--)
 409     {
 410         scale = 1;
 411         for (int i = 0; i < k; i++)
 412             scale *= pyrScale;
 413
 414         double sigma = (1./scale - 1) * 0.5;
 415         int smoothSize = cvRound(sigma*5) | 1;
 416         smoothSize = std::max(smoothSize, 3);
 417
 418         int width = cvRound(size.width*scale);
 419         int height = cvRound(size.height*scale);
 420
 421         if (fastPyramids)
 422         {
 423             width = pyramid0_[k].cols;
 424             height = pyramid0_[k].rows;
 425         }
 426
 427         if (k > 0)
 428         {
 429             curFlowX.create(height, width, CV_32F);
 430             curFlowY.create(height, width, CV_32F);
 431         }
 432         else
 433         {
 434             curFlowX = flowx0;
 435             curFlowY = flowy0;
 436         }
 437
 438         if (!prevFlowX.data)
 439         {
 440             if (flags & cv::OPTFLOW_USE_INITIAL_FLOW)
 441             {
 442                 resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
 443                 resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
 444                 multiply(scale, curFlowX, curFlowX);
 445                 multiply(scale, curFlowY, curFlowY);
 446             }
 447             else
 448             {
 449                 curFlowX.setTo(0);
 450                 curFlowY.setTo(0);
 451             }
 452         }
 453         else
 454         {
 455             resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
 456             resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
 457             multiply(1./pyrScale, curFlowX, curFlowX);
 458             multiply(1./pyrScale, curFlowY, curFlowY);
 459         }
 460
 461         oclMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
 462         oclMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
 463         oclMat R[2] =
 464         {
 465             allocMatFromBuf(5*height, width, CV_32F, R_[0]),
 466             allocMatFromBuf(5*height, width, CV_32F, R_[1])
 467         };
 468
 469         if (fastPyramids)
 470         {
 471             optflow_farneback::polynomialExpansionOcl(pyramid0_[k], polyN, R[0]);
 472             optflow_farneback::polynomialExpansionOcl(pyramid1_[k], polyN, R[1]);
 473         }
 474         else
 475         {
 476             oclMat blurredFrame[2] =
 477             {
 478                 allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
 479                 allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
 480             };
 481             oclMat pyrLevel[2] =
 482             {
 483                 allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
 484                 allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
 485             };
 486
 487             Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
 488             optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
 489
 490             for (int i = 0; i < 2; i++)
 491             {
 492                 optflow_farneback::gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]);
 493                 resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR);
 494                 optflow_farneback::polynomialExpansionOcl(pyrLevel[i], polyN, R[i]);
 495             }
 496         }
 497
 498         optflow_farneback::updateMatricesOcl(curFlowX, curFlowY, R[0], R[1], M);
 499
 500         if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
 501         {
 502             Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
 503             optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
 504         }
 505         for (int i = 0; i < numIters; i++)
 506         {
 507             if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
 508                 updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
 509             else
 510                 updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
 511         }
 512
 513         prevFlowX = curFlowX;
 514         prevFlowY = curFlowY;
 515     }
 516
 517     flowx = curFlowX;
 518     flowy = curFlowY;
 519 }