modules/ocl/src/optical_flow_farneback.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // @Authors
  18 //      Sen Liu, swjtuls1987@126.com
  19 //
  20 // Redistribution and use in source and binary forms, with or without modification,
  21 // are permitted provided that the following conditions are met:
  22 //
  23 //   * Redistribution's of source code must retain the above copyright notice,
  24 //     this list of conditions and the following disclaimer.
  25 //
  26 //   * Redistribution's in binary form must reproduce the above copyright notice,
  27 //     this list of conditions and the following disclaimer in the documentation
  28 //     and/or other oclMaterials provided with the distribution.
  29 //
  30 //   * The name of the copyright holders may not be used to endorse or promote products
  31 //     derived from this software without specific prior written permission.
  32 //
  33 // This software is provided by the copyright holders and contributors "as is" and
  34 // any express or implied warranties, including, but not limited to, the implied
  35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  36 // In no event shall the Intel Corporation or contributors be liable for any direct,
  37 // indirect, incidental, special, exemplary, or consequential damages
  38 // (including, but not limited to, procurement of substitute goods or services;
  39 // loss of use, data, or profits; or business interruption) however caused
  40 // and on any theory of liability, whether in contract, strict liability,
  41 // or tort (including negligence or otherwise) arising in any way out of
  42 // the use of this software, even if advised of the possibility of such damage.
  43 //
  44 //M*/
  45
  46
  47 #include "precomp.hpp"
  48 #include "opencv2/video/tracking.hpp"
  49
  50 using namespace std;
  51 using namespace cv;
  52 using namespace cv::ocl;
  53
  54 #define MIN_SIZE 32
  55
  56 namespace cv
  57 {
  58     namespace ocl
  59     {
  60         ///////////////////////////OpenCL kernel strings///////////////////////////
  61         extern const char *optical_flow_farneback;
  62     }
  63 }
  64
  65 namespace cv { namespace ocl { namespace optflow_farneback
  66 {
  67     oclMat g;
  68     oclMat xg;
  69     oclMat xxg;
  70     oclMat gKer;
  71
  72     float ig[4];
  73
  74     inline int divUp(int total, int grain)
  75     {
  76         return (total + grain - 1) / grain;
  77     }
  78
  79     inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
  80     {
  81         cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
  82         gKer.upload(t_gKer);
  83     }
  84
  85     void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
  86     {
  87         string kernelName("gaussianBlur");
  88         size_t localThreads[3] = { 256, 1, 1 };
  89         size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 };
  90         int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
  91
  92         CV_Assert(dst.size() == src.size());
  93         std::vector< std::pair<size_t, const void *> > args;
  94         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
  95         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
  96         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
  97         args.push_back(std::make_pair(smem_size, (void *)NULL));
  98         args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
  99         args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
 100         args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
 101         args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
 102         args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
 103
 104         openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 105             globalThreads, localThreads, args, -1, -1);
 106     }
 107
 108     void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
 109     {
 110         string kernelName("polynomialExpansion");
 111         size_t localThreads[3] = { 256, 1, 1 };
 112         size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
 113         int smem_size = 3 * localThreads[0] * sizeof(float);
 114
 115         std::vector< std::pair<size_t, const void *> > args;
 116         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
 117         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
 118         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data));
 119         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data));
 120         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data));
 121         args.push_back(std::make_pair(smem_size, (void *)NULL));
 122         args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
 123         args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
 124         args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
 125         args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
 126         args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
 127
 128         char opt [128];
 129         sprintf(opt, "-D polyN=%d", polyN);
 130
 131         openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 132             globalThreads, localThreads, args, -1, -1, opt);
 133     }
 134
 135     void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
 136     {
 137         string kernelName("updateMatrices");
 138         size_t localThreads[3] = { 32, 8, 1 };
 139         size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0],
 140                                     divUp(flowx.rows, localThreads[1]) * localThreads[1],
 141                                     1 };
 142
 143         std::vector< std::pair<size_t, const void *> > args;
 144         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
 145         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
 146         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
 147         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data));
 148         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data));
 149         args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
 150         args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols));
 151         args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
 152         args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
 153         args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
 154         args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step));
 155         args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step));
 156
 157         openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 158             globalThreads, localThreads, args, -1, -1);
 159     }
 160
 161     void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
 162     {
 163         string kernelName("boxFilter5");
 164         int height = src.rows / 5;
 165         size_t localThreads[3] = { 256, 1, 1 };
 166         size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 };
 167         int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
 168
 169         std::vector< std::pair<size_t, const void *> > args;
 170         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
 171         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
 172         args.push_back(std::make_pair(smem_size, (void *)NULL));
 173         args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
 174         args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
 175         args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
 176         args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
 177         args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
 178
 179         openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 180             globalThreads, localThreads, args, -1, -1);
 181     }
 182
 183     void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
 184     {
 185         string kernelName("updateFlow");
 186         int cols = divUp(flowx.cols, 4);
 187         size_t localThreads[3] = { 32, 8, 1 };
 188         size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
 189                                     divUp(flowx.rows, localThreads[1]) * localThreads[0],
 190                                     1 };
 191
 192         std::vector< std::pair<size_t, const void *> > args;
 193         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
 194         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
 195         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
 196         args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
 197         args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
 198         args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
 199         args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
 200         args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
 201
 202         openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 203             globalThreads, localThreads, args, -1, -1);
 204     }
 205
 206     void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
 207     {
 208         string kernelName("gaussianBlur5");
 209         int height = src.rows / 5;
 210         int width = src.cols;
 211         size_t localThreads[3] = { 256, 1, 1 };
 212         size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 };
 213         int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
 214
 215         std::vector< std::pair<size_t, const void *> > args;
 216         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
 217         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
 218         args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
 219         args.push_back(std::make_pair(smem_size, (void *)NULL));
 220         args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
 221         args.push_back(std::make_pair(sizeof(cl_int), (void *)&width));
 222         args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
 223         args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
 224         args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
 225
 226         openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
 227             globalThreads, localThreads, args, -1, -1);
 228     }
 229 }}} // namespace cv { namespace ocl { namespace optflow_farneback
 230
 231 static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
 232 {
 233     if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
 234         return mat(Rect(0, 0, cols, rows));
 235     return mat = oclMat(rows, cols, type);
 236 }
 237
 238 void cv::ocl::FarnebackOpticalFlow::prepareGaussian(
 239         int n, double sigma, float *g, float *xg, float *xxg,
 240         double &ig11, double &ig03, double &ig33, double &ig55)
 241 {
 242     double s = 0.;
 243     for (int x = -n; x <= n; x++)
 244     {
 245         g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
 246         s += g[x];
 247     }
 248
 249     s = 1./s;
 250     for (int x = -n; x <= n; x++)
 251     {
 252         g[x] = (float)(g[x]*s);
 253         xg[x] = (float)(x*g[x]);
 254         xxg[x] = (float)(x*x*g[x]);
 255     }
 256
 257     Mat_<double> G(6, 6);
 258     G.setTo(0);
 259
 260     for (int y = -n; y <= n; y++)
 261     {
 262         for (int x = -n; x <= n; x++)
 263         {
 264             G(0,0) += g[y]*g[x];
 265             G(1,1) += g[y]*g[x]*x*x;
 266             G(3,3) += g[y]*g[x]*x*x*x*x;
 267             G(5,5) += g[y]*g[x]*x*x*y*y;
 268         }
 269     }
 270
 271     //G[0][0] = 1.;
 272     G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
 273     G(4,4) = G(3,3);
 274     G(3,4) = G(4,3) = G(5,5);
 275
 276     // invG:
 277     // [ x        e  e    ]
 278     // [    y             ]
 279     // [       y          ]
 280     // [ e        z       ]
 281     // [ e           z    ]
 282     // [                u ]
 283     Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
 284
 285     ig11 = invG(1,1);
 286     ig03 = invG(0,3);
 287     ig33 = invG(3,3);
 288     ig55 = invG(5,5);
 289 }
 290
 291 void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
 292 {
 293     vector<float> buf(n*6 + 3);
 294     float* g = &buf[0] + n;
 295     float* xg = g + n*2 + 1;
 296     float* xxg = xg + n*2 + 1;
 297
 298     if (sigma < FLT_EPSILON)
 299         sigma = n*0.3;
 300
 301     double ig11, ig03, ig33, ig55;
 302     prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
 303
 304     cv::Mat t_g(1, n + 1, CV_32FC1, g);
 305     cv::Mat t_xg(1, n + 1, CV_32FC1, xg);
 306     cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg);
 307
 308     optflow_farneback::g.upload(t_g);
 309     optflow_farneback::xg.upload(t_xg);
 310     optflow_farneback::xxg.upload(t_xxg);
 311
 312     optflow_farneback::ig[0] = static_cast<float>(ig11);
 313     optflow_farneback::ig[1] = static_cast<float>(ig03);
 314     optflow_farneback::ig[2] = static_cast<float>(ig33);
 315     optflow_farneback::ig[3] = static_cast<float>(ig55);
 316 }
 317
 318 void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
 319         const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
 320         oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
 321 {
 322     optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM);
 323
 324     swap(M, bufM);
 325
 326     finish();
 327
 328     optflow_farneback::updateFlowOcl(M, flowx, flowy);
 329
 330     if (updateMatrices)
 331         optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
 332 }
 333
 334
 335 void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
 336         const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
 337         oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
 338 {
 339     optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM);
 340
 341     swap(M, bufM);
 342
 343     optflow_farneback::updateFlowOcl(M, flowx, flowy);
 344
 345     if (updateMatrices)
 346         optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
 347 }
 348
 349
 350 void cv::ocl::FarnebackOpticalFlow::operator ()(
 351         const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
 352 {
 353     CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
 354     CV_Assert(frame0.size() == frame1.size());
 355     CV_Assert(polyN == 5 || polyN == 7);
 356     CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
 357
 358     Size size = frame0.size();
 359     oclMat prevFlowX, prevFlowY, curFlowX, curFlowY;
 360
 361     flowx.create(size, CV_32F);
 362     flowy.create(size, CV_32F);
 363     oclMat flowx0 = flowx;
 364     oclMat flowy0 = flowy;
 365
 366     // Crop unnecessary levels
 367     double scale = 1;
 368     int numLevelsCropped = 0;
 369     for (; numLevelsCropped < numLevels; numLevelsCropped++)
 370     {
 371         scale *= pyrScale;
 372         if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
 373             break;
 374     }
 375
 376     frame0.convertTo(frames_[0], CV_32F);
 377     frame1.convertTo(frames_[1], CV_32F);
 378
 379     if (fastPyramids)
 380     {
 381         // Build Gaussian pyramids using pyrDown()
 382         pyramid0_.resize(numLevelsCropped + 1);
 383         pyramid1_.resize(numLevelsCropped + 1);
 384         pyramid0_[0] = frames_[0];
 385         pyramid1_[0] = frames_[1];
 386         for (int i = 1; i <= numLevelsCropped; ++i)
 387         {
 388             pyrDown(pyramid0_[i - 1], pyramid0_[i]);
 389             pyrDown(pyramid1_[i - 1], pyramid1_[i]);
 390         }
 391     }
 392
 393     setPolynomialExpansionConsts(polyN, polySigma);
 394
 395     for (int k = numLevelsCropped; k >= 0; k--)
 396     {
 397         scale = 1;
 398         for (int i = 0; i < k; i++)
 399             scale *= pyrScale;
 400
 401         double sigma = (1./scale - 1) * 0.5;
 402         int smoothSize = cvRound(sigma*5) | 1;
 403         smoothSize = std::max(smoothSize, 3);
 404
 405         int width = cvRound(size.width*scale);
 406         int height = cvRound(size.height*scale);
 407
 408         if (fastPyramids)
 409         {
 410             width = pyramid0_[k].cols;
 411             height = pyramid0_[k].rows;
 412         }
 413
 414         if (k > 0)
 415         {
 416             curFlowX.create(height, width, CV_32F);
 417             curFlowY.create(height, width, CV_32F);
 418         }
 419         else
 420         {
 421             curFlowX = flowx0;
 422             curFlowY = flowy0;
 423         }
 424
 425         if (!prevFlowX.data)
 426         {
 427             if (flags & cv::OPTFLOW_USE_INITIAL_FLOW)
 428             {
 429                 resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
 430                 resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
 431                 multiply(scale, curFlowX, curFlowX);
 432                 multiply(scale, curFlowY, curFlowY);
 433             }
 434             else
 435             {
 436                 curFlowX.setTo(0);
 437                 curFlowY.setTo(0);
 438             }
 439         }
 440         else
 441         {
 442             resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
 443             resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
 444             multiply(1./pyrScale, curFlowX, curFlowX);
 445             multiply(1./pyrScale, curFlowY, curFlowY);
 446         }
 447
 448         oclMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
 449         oclMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
 450         oclMat R[2] =
 451         {
 452             allocMatFromBuf(5*height, width, CV_32F, R_[0]),
 453             allocMatFromBuf(5*height, width, CV_32F, R_[1])
 454         };
 455
 456         if (fastPyramids)
 457         {
 458             optflow_farneback::polynomialExpansionOcl(pyramid0_[k], polyN, R[0]);
 459             optflow_farneback::polynomialExpansionOcl(pyramid1_[k], polyN, R[1]);
 460         }
 461         else
 462         {
 463             oclMat blurredFrame[2] =
 464             {
 465                 allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
 466                 allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
 467             };
 468             oclMat pyrLevel[2] =
 469             {
 470                 allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
 471                 allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
 472             };
 473
 474             Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
 475             optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
 476
 477             for (int i = 0; i < 2; i++)
 478             {
 479                 optflow_farneback::gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]);
 480                 resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR);
 481                 optflow_farneback::polynomialExpansionOcl(pyrLevel[i], polyN, R[i]);
 482             }
 483         }
 484
 485         optflow_farneback::updateMatricesOcl(curFlowX, curFlowY, R[0], R[1], M);
 486
 487         if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
 488         {
 489             Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
 490             optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
 491         }
 492         for (int i = 0; i < numIters; i++)
 493         {
 494             if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
 495                 updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
 496             else
 497                 updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
 498         }
 499
 500         prevFlowX = curFlowX;
 501         prevFlowY = curFlowY;
 502     }
 503
 504     flowx = curFlowX;
 505     flowy = curFlowY;
 506 }
 507