modules/video/src/tvl1flow.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 /*
  44 //
  45 // This implementation is based on Javier Sánchez Pérez <jsanchez@dis.ulpgc.es> implementation.
  46 // Original BSD license:
  47 //
  48 // Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis
  49 // All rights reserved.
  50 //
  51 // Redistribution and use in source and binary forms, with or without
  52 // modification, are permitted provided that the following conditions are met:
  53 //
  54 // * Redistributions of source code must retain the above copyright notice, this
  55 //   list of conditions and the following disclaimer.
  56 //
  57 // * Redistributions in binary form must reproduce the above copyright notice,
  58 //   this list of conditions and the following disclaimer in the documentation
  59 //   and/or other materials provided with the distribution.
  60 //
  61 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  62 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  63 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  64 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  65 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  66 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  67 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  68 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  69 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  70 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  71 // POSSIBILITY OF SUCH DAMAGE.
  72 //
  73 */
  74
  75 #include "precomp.hpp"
  76 #include "opencl_kernels_video.hpp"
  77
  78 #include <limits>
  79 #include <iomanip>
  80 #include <iostream>
  81 #include "opencv2/core/opencl/ocl_defs.hpp"
  82
  83
  84
  85 using namespace cv;
  86
  87 namespace {
  88
  89 class OpticalFlowDual_TVL1 : public DenseOpticalFlow
  90 {
  91 public:
  92     OpticalFlowDual_TVL1();
  93
  94     void calc(InputArray I0, InputArray I1, InputOutputArray flow);
  95     void collectGarbage();
  96
  97     AlgorithmInfo* info() const;
  98
  99 protected:
 100     double tau;
 101     double lambda;
 102     double theta;
 103     double gamma;
 104     int nscales;
 105     int warps;
 106     double epsilon;
 107     int innerIterations;
 108     int outerIterations;
 109     bool useInitialFlow;
 110     double scaleStep;
 111     int medianFiltering;
 112
 113 private:
 114    void procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3);
 115
 116     bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2);
 117
 118     bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow);
 119     struct dataMat
 120     {
 121         std::vector<Mat_<float> > I0s;
 122         std::vector<Mat_<float> > I1s;
 123         std::vector<Mat_<float> > u1s;
 124         std::vector<Mat_<float> > u2s;
 125         std::vector<Mat_<float> > u3s;
 126
 127         Mat_<float> I1x_buf;
 128         Mat_<float> I1y_buf;
 129
 130         Mat_<float> flowMap1_buf;
 131         Mat_<float> flowMap2_buf;
 132
 133         Mat_<float> I1w_buf;
 134         Mat_<float> I1wx_buf;
 135         Mat_<float> I1wy_buf;
 136
 137         Mat_<float> grad_buf;
 138         Mat_<float> rho_c_buf;
 139
 140         Mat_<float> v1_buf;
 141         Mat_<float> v2_buf;
 142         Mat_<float> v3_buf;
 143
 144         Mat_<float> p11_buf;
 145         Mat_<float> p12_buf;
 146         Mat_<float> p21_buf;
 147         Mat_<float> p22_buf;
 148         Mat_<float> p31_buf;
 149         Mat_<float> p32_buf;
 150
 151         Mat_<float> div_p1_buf;
 152         Mat_<float> div_p2_buf;
 153         Mat_<float> div_p3_buf;
 154
 155         Mat_<float> u1x_buf;
 156         Mat_<float> u1y_buf;
 157         Mat_<float> u2x_buf;
 158         Mat_<float> u2y_buf;
 159         Mat_<float> u3x_buf;
 160         Mat_<float> u3y_buf;
 161     } dm;
 162     struct dataUMat
 163     {
 164         std::vector<UMat> I0s;
 165         std::vector<UMat> I1s;
 166         std::vector<UMat> u1s;
 167         std::vector<UMat> u2s;
 168
 169         UMat I1x_buf;
 170         UMat I1y_buf;
 171
 172         UMat I1w_buf;
 173         UMat I1wx_buf;
 174         UMat I1wy_buf;
 175
 176         UMat grad_buf;
 177         UMat rho_c_buf;
 178
 179         UMat p11_buf;
 180         UMat p12_buf;
 181         UMat p21_buf;
 182         UMat p22_buf;
 183
 184         UMat diff_buf;
 185         UMat norm_buf;
 186     } dum;
 187 };
 188
 189 namespace cv_ocl_tvl1flow
 190 {
 191     bool centeredGradient(const UMat &src, UMat &dx, UMat &dy);
 192
 193     bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
 194         UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
 195         UMat &grad, UMat &rho);
 196
 197     bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
 198         UMat &rho_c, UMat &p11, UMat &p12,
 199         UMat &p21, UMat &p22, UMat &u1,
 200         UMat &u2, UMat &error, float l_t, float theta, char calc_error);
 201
 202     bool estimateDualVariables(UMat &u1, UMat &u2,
 203         UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut);
 204 }
 205
 206 bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy)
 207 {
 208     size_t globalsize[2] = { src.cols, src.rows };
 209
 210     ocl::Kernel kernel;
 211     if (!kernel.create("centeredGradientKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
 212         return false;
 213
 214     int idxArg = 0;
 215     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat
 216     idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col
 217     idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows
 218     idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step
 219     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx
 220     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy
 221     idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step
 222     return kernel.run(2, globalsize, NULL, false);
 223 }
 224
 225 bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
 226     UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
 227     UMat &grad, UMat &rho)
 228 {
 229     size_t globalsize[2] = { I0.cols, I0.rows };
 230
 231     ocl::Kernel kernel;
 232     if (!kernel.create("warpBackwardKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
 233         return false;
 234
 235     int idxArg = 0;
 236     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat
 237     int I0_step = (int)(I0.step / I0.elemSize());
 238     idxArg = kernel.set(idxArg, I0_step);//I0_step
 239     idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col
 240     idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row
 241     ocl::Image2D imageI1(I1);
 242     ocl::Image2D imageI1x(I1x);
 243     ocl::Image2D imageI1y(I1y);
 244     idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1
 245     idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x
 246     idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y
 247     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1
 248     idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step
 249     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2
 250     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w
 251     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx
 252     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy
 253     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad
 254     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho
 255     idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step
 256     idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step
 257     int u1_offset_x = (int)((u1.offset) % (u1.step));
 258     u1_offset_x = (int)(u1_offset_x / u1.elemSize());
 259     idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x
 260     idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y
 261     int u2_offset_x = (int)((u2.offset) % (u2.step));
 262     u2_offset_x = (int) (u2_offset_x / u2.elemSize());
 263     idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x
 264     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y
 265     return kernel.run(2, globalsize, NULL, false);
 266 }
 267
 268 bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
 269     UMat &rho_c, UMat &p11, UMat &p12,
 270     UMat &p21, UMat &p22, UMat &u1,
 271     UMat &u2, UMat &error, float l_t, float theta, char calc_error)
 272 {
 273     size_t globalsize[2] = { I1wx.cols, I1wx.rows };
 274
 275     ocl::Kernel kernel;
 276     if (!kernel.create("estimateUKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
 277         return false;
 278
 279     int idxArg = 0;
 280     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx
 281     idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col
 282     idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row
 283     idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step
 284     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy
 285     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad
 286     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c
 287     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11
 288     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12
 289     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21
 290     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22
 291     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1
 292     idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step
 293     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2
 294     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error
 295     idxArg = kernel.set(idxArg, (float)l_t); //float l_t
 296     idxArg = kernel.set(idxArg, (float)theta); //float theta
 297     idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step
 298     int u1_offset_x = (int)(u1.offset % u1.step);
 299     u1_offset_x = (int) (u1_offset_x  / u1.elemSize());
 300     idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x
 301     idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y
 302     int u2_offset_x = (int)(u2.offset % u2.step);
 303     u2_offset_x = (int)(u2_offset_x / u2.elemSize());
 304     idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x
 305     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
 306     idxArg = kernel.set(idxArg, (char)calc_error);    //char calc_error
 307
 308     return kernel.run(2, globalsize, NULL, false);
 309 }
 310
 311 bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2,
 312     UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut)
 313 {
 314     size_t globalsize[2] = { u1.cols, u1.rows };
 315
 316     ocl::Kernel kernel;
 317     if (!kernel.create("estimateDualVariablesKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
 318         return false;
 319
 320     int idxArg = 0;
 321     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1
 322     idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col
 323     idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row
 324     idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step
 325     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2
 326     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11
 327     idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step
 328     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12
 329     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21
 330     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22
 331     idxArg = kernel.set(idxArg, (float)(taut));    //float taut
 332     idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step
 333     int u1_offset_x = (int)(u1.offset % u1.step);
 334     u1_offset_x = (int)(u1_offset_x / u1.elemSize());
 335     idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x
 336     idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y
 337     int u2_offset_x = (int)(u2.offset % u2.step);
 338     u2_offset_x = (int)(u2_offset_x / u2.elemSize());
 339     idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x
 340     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
 341
 342     return kernel.run(2, globalsize, NULL, false);
 343
 344 }
 345
 346 OpticalFlowDual_TVL1::OpticalFlowDual_TVL1()
 347 {
 348     tau            = 0.25;
 349     lambda         = 0.15;
 350     theta          = 0.3;
 351     nscales        = 5;
 352     warps          = 5;
 353     epsilon        = 0.01;
 354     gamma          = 0.;
 355     innerIterations = 30;
 356     outerIterations = 10;
 357     useInitialFlow = false;
 358     medianFiltering = 5;
 359     scaleStep      = 0.8;
 360 }
 361
 362 void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow)
 363 {
 364     CV_OCL_RUN(_flow.isUMat() &&
 365                ocl::Image2D::isFormatSupported(CV_32F, 1, false),
 366                calc_ocl(_I0, _I1, _flow))
 367
 368     Mat I0 = _I0.getMat();
 369     Mat I1 = _I1.getMat();
 370
 371     CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
 372     CV_Assert( I0.size() == I1.size() );
 373     CV_Assert( I0.type() == I1.type() );
 374     CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) );
 375     CV_Assert( nscales > 0 );
 376     bool use_gamma = gamma != 0;
 377     // allocate memory for the pyramid structure
 378     dm.I0s.resize(nscales);
 379     dm.I1s.resize(nscales);
 380     dm.u1s.resize(nscales);
 381     dm.u2s.resize(nscales);
 382     dm.u3s.resize(nscales);
 383
 384     I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0);
 385     I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0);
 386
 387     dm.u1s[0].create(I0.size());
 388     dm.u2s[0].create(I0.size());
 389     if (use_gamma) dm.u3s[0].create(I0.size());
 390
 391     if (useInitialFlow)
 392     {
 393         Mat_<float> mv[] = { dm.u1s[0], dm.u2s[0] };
 394         split(_flow.getMat(), mv);
 395     }
 396
 397     dm.I1x_buf.create(I0.size());
 398     dm.I1y_buf.create(I0.size());
 399
 400     dm.flowMap1_buf.create(I0.size());
 401     dm.flowMap2_buf.create(I0.size());
 402
 403     dm.I1w_buf.create(I0.size());
 404     dm.I1wx_buf.create(I0.size());
 405     dm.I1wy_buf.create(I0.size());
 406
 407     dm.grad_buf.create(I0.size());
 408     dm.rho_c_buf.create(I0.size());
 409
 410     dm.v1_buf.create(I0.size());
 411     dm.v2_buf.create(I0.size());
 412     dm.v3_buf.create(I0.size());
 413
 414     dm.p11_buf.create(I0.size());
 415     dm.p12_buf.create(I0.size());
 416     dm.p21_buf.create(I0.size());
 417     dm.p22_buf.create(I0.size());
 418     dm.p31_buf.create(I0.size());
 419     dm.p32_buf.create(I0.size());
 420
 421     dm.div_p1_buf.create(I0.size());
 422     dm.div_p2_buf.create(I0.size());
 423     dm.div_p3_buf.create(I0.size());
 424
 425     dm.u1x_buf.create(I0.size());
 426     dm.u1y_buf.create(I0.size());
 427     dm.u2x_buf.create(I0.size());
 428     dm.u2y_buf.create(I0.size());
 429     dm.u3x_buf.create(I0.size());
 430     dm.u3y_buf.create(I0.size());
 431
 432     // create the scales
 433     for (int s = 1; s < nscales; ++s)
 434     {
 435         resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep);
 436         resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep);
 437
 438         if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16)
 439         {
 440             nscales = s;
 441             break;
 442         }
 443
 444         if (useInitialFlow)
 445         {
 446             resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep);
 447             resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep);
 448
 449             multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]);
 450             multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]);
 451         }
 452         else
 453         {
 454             dm.u1s[s].create(dm.I0s[s].size());
 455             dm.u2s[s].create(dm.I0s[s].size());
 456         }
 457         if (use_gamma) dm.u3s[s].create(dm.I0s[s].size());
 458     }
 459     if (!useInitialFlow)
 460     {
 461         dm.u1s[nscales - 1].setTo(Scalar::all(0));
 462         dm.u2s[nscales - 1].setTo(Scalar::all(0));
 463     }
 464     if (use_gamma) dm.u3s[nscales - 1].setTo(Scalar::all(0));
 465     // pyramidal structure for computing the optical flow
 466     for (int s = nscales - 1; s >= 0; --s)
 467     {
 468         // compute the optical flow at the current scale
 469         procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s], dm.u3s[s]);
 470
 471         // if this was the last scale, finish now
 472         if (s == 0)
 473             break;
 474
 475         // otherwise, upsample the optical flow
 476
 477         // zoom the optical flow for the next finer scale
 478         resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size());
 479         resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size());
 480         if (use_gamma) resize(dm.u3s[s], dm.u3s[s - 1], dm.I0s[s - 1].size());
 481
 482         // scale the optical flow with the appropriate zoom factor (don't scale u3!)
 483         multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]);
 484         multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]);
 485     }
 486
 487     Mat uxy[] = { dm.u1s[0], dm.u2s[0] };
 488     merge(uxy, 2, _flow);
 489 }
 490
 491 bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow)
 492 {
 493     UMat I0 = _I0.getUMat();
 494     UMat I1 = _I1.getUMat();
 495
 496     CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1);
 497     CV_Assert(I0.size() == I1.size());
 498     CV_Assert(I0.type() == I1.type());
 499     CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2));
 500     CV_Assert(nscales > 0);
 501
 502     // allocate memory for the pyramid structure
 503     dum.I0s.resize(nscales);
 504     dum.I1s.resize(nscales);
 505     dum.u1s.resize(nscales);
 506     dum.u2s.resize(nscales);
 507     //I0s_step == I1s_step
 508     double alpha = I0.depth() == CV_8U ? 1.0 : 255.0;
 509
 510     I0.convertTo(dum.I0s[0], CV_32F, alpha);
 511     I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
 512
 513     dum.u1s[0].create(I0.size(), CV_32FC1);
 514     dum.u2s[0].create(I0.size(), CV_32FC1);
 515
 516     if (useInitialFlow)
 517     {
 518         std::vector<UMat> umv;
 519         umv.push_back(dum.u1s[0]);
 520         umv.push_back(dum.u2s[0]);
 521         cv::split(_flow,umv);
 522     }
 523
 524     dum.I1x_buf.create(I0.size(), CV_32FC1);
 525     dum.I1y_buf.create(I0.size(), CV_32FC1);
 526
 527     dum.I1w_buf.create(I0.size(), CV_32FC1);
 528     dum.I1wx_buf.create(I0.size(), CV_32FC1);
 529     dum.I1wy_buf.create(I0.size(), CV_32FC1);
 530
 531     dum.grad_buf.create(I0.size(), CV_32FC1);
 532     dum.rho_c_buf.create(I0.size(), CV_32FC1);
 533
 534     dum.p11_buf.create(I0.size(), CV_32FC1);
 535     dum.p12_buf.create(I0.size(), CV_32FC1);
 536     dum.p21_buf.create(I0.size(), CV_32FC1);
 537     dum.p22_buf.create(I0.size(), CV_32FC1);
 538
 539     dum.diff_buf.create(I0.size(), CV_32FC1);
 540
 541     // create the scales
 542     for (int s = 1; s < nscales; ++s)
 543     {
 544         resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep);
 545         resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep);
 546
 547         if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16)
 548         {
 549             nscales = s;
 550             break;
 551         }
 552
 553         if (useInitialFlow)
 554         {
 555             resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep);
 556             resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep);
 557
 558             //scale by scale factor
 559             multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]);
 560             multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]);
 561         }
 562     }
 563
 564     // pyramidal structure for computing the optical flow
 565     for (int s = nscales - 1; s >= 0; --s)
 566     {
 567         // compute the optical flow at the current scale
 568         if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s]))
 569             return false;
 570
 571         // if this was the last scale, finish now
 572         if (s == 0)
 573             break;
 574
 575         // zoom the optical flow for the next finer scale
 576         resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size());
 577         resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size());
 578
 579         // scale the optical flow with the appropriate zoom factor
 580         multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]);
 581         multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]);
 582     }
 583
 584     std::vector<UMat> uxy;
 585     uxy.push_back(dum.u1s[0]);
 586     uxy.push_back(dum.u2s[0]);
 587     merge(uxy, _flow);
 588     return true;
 589 }
 590
 591 ////////////////////////////////////////////////////////////
 592 // buildFlowMap
 593
 594 struct BuildFlowMapBody : ParallelLoopBody
 595 {
 596     void operator() (const Range& range) const;
 597
 598     Mat_<float> u1;
 599     Mat_<float> u2;
 600     mutable Mat_<float> map1;
 601     mutable Mat_<float> map2;
 602 };
 603
 604 void BuildFlowMapBody::operator() (const Range& range) const
 605 {
 606     for (int y = range.start; y < range.end; ++y)
 607     {
 608         const float* u1Row = u1[y];
 609         const float* u2Row = u2[y];
 610
 611         float* map1Row = map1[y];
 612         float* map2Row = map2[y];
 613
 614         for (int x = 0; x < u1.cols; ++x)
 615         {
 616             map1Row[x] = x + u1Row[x];
 617             map2Row[x] = y + u2Row[x];
 618         }
 619     }
 620 }
 621
 622 void buildFlowMap(const Mat_<float>& u1, const Mat_<float>& u2, Mat_<float>& map1, Mat_<float>& map2)
 623 {
 624     CV_DbgAssert( u2.size() == u1.size() );
 625     CV_DbgAssert( map1.size() == u1.size() );
 626     CV_DbgAssert( map2.size() == u1.size() );
 627
 628     BuildFlowMapBody body;
 629
 630     body.u1 = u1;
 631     body.u2 = u2;
 632     body.map1 = map1;
 633     body.map2 = map2;
 634
 635     parallel_for_(Range(0, u1.rows), body);
 636 }
 637
 638 ////////////////////////////////////////////////////////////
 639 // centeredGradient
 640
 641 struct CenteredGradientBody : ParallelLoopBody
 642 {
 643     void operator() (const Range& range) const;
 644
 645     Mat_<float> src;
 646     mutable Mat_<float> dx;
 647     mutable Mat_<float> dy;
 648 };
 649
 650 void CenteredGradientBody::operator() (const Range& range) const
 651 {
 652     const int last_col = src.cols - 1;
 653
 654     for (int y = range.start; y < range.end; ++y)
 655     {
 656         const float* srcPrevRow = src[y - 1];
 657         const float* srcCurRow = src[y];
 658         const float* srcNextRow = src[y + 1];
 659
 660         float* dxRow = dx[y];
 661         float* dyRow = dy[y];
 662
 663         for (int x = 1; x < last_col; ++x)
 664         {
 665             dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]);
 666             dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]);
 667         }
 668     }
 669 }
 670
 671 void centeredGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
 672 {
 673     CV_DbgAssert( src.rows > 2 && src.cols > 2 );
 674     CV_DbgAssert( dx.size() == src.size() );
 675     CV_DbgAssert( dy.size() == src.size() );
 676
 677     const int last_row = src.rows - 1;
 678     const int last_col = src.cols - 1;
 679
 680     // compute the gradient on the center body of the image
 681     {
 682         CenteredGradientBody body;
 683
 684         body.src = src;
 685         body.dx = dx;
 686         body.dy = dy;
 687
 688         parallel_for_(Range(1, last_row), body);
 689     }
 690
 691     // compute the gradient on the first and last rows
 692     for (int x = 1; x < last_col; ++x)
 693     {
 694         dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1));
 695         dy(0, x) = 0.5f * (src(1, x) - src(0, x));
 696
 697         dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1));
 698         dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x));
 699     }
 700
 701     // compute the gradient on the first and last columns
 702     for (int y = 1; y < last_row; ++y)
 703     {
 704         dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0));
 705         dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0));
 706
 707         dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1));
 708         dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col));
 709     }
 710
 711     // compute the gradient at the four corners
 712     dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0));
 713     dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0));
 714
 715     dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1));
 716     dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col));
 717
 718     dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0));
 719     dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0));
 720
 721     dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1));
 722     dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col));
 723 }
 724
 725 ////////////////////////////////////////////////////////////
 726 // forwardGradient
 727
 728 struct ForwardGradientBody : ParallelLoopBody
 729 {
 730     void operator() (const Range& range) const;
 731
 732     Mat_<float> src;
 733     mutable Mat_<float> dx;
 734     mutable Mat_<float> dy;
 735 };
 736
 737 void ForwardGradientBody::operator() (const Range& range) const
 738 {
 739     const int last_col = src.cols - 1;
 740
 741     for (int y = range.start; y < range.end; ++y)
 742     {
 743         const float* srcCurRow = src[y];
 744         const float* srcNextRow = src[y + 1];
 745
 746         float* dxRow = dx[y];
 747         float* dyRow = dy[y];
 748
 749         for (int x = 0; x < last_col; ++x)
 750         {
 751             dxRow[x] = srcCurRow[x + 1] - srcCurRow[x];
 752             dyRow[x] = srcNextRow[x] - srcCurRow[x];
 753         }
 754     }
 755 }
 756
 757 void forwardGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
 758 {
 759     CV_DbgAssert( src.rows > 2 && src.cols > 2 );
 760     CV_DbgAssert( dx.size() == src.size() );
 761     CV_DbgAssert( dy.size() == src.size() );
 762
 763     const int last_row = src.rows - 1;
 764     const int last_col = src.cols - 1;
 765
 766     // compute the gradient on the central body of the image
 767     {
 768         ForwardGradientBody body;
 769
 770         body.src = src;
 771         body.dx = dx;
 772         body.dy = dy;
 773
 774         parallel_for_(Range(0, last_row), body);
 775     }
 776
 777     // compute the gradient on the last row
 778     for (int x = 0; x < last_col; ++x)
 779     {
 780         dx(last_row, x) = src(last_row, x + 1) - src(last_row, x);
 781         dy(last_row, x) = 0.0f;
 782     }
 783
 784     // compute the gradient on the last column
 785     for (int y = 0; y < last_row; ++y)
 786     {
 787         dx(y, last_col) = 0.0f;
 788         dy(y, last_col) = src(y + 1, last_col) - src(y, last_col);
 789     }
 790
 791     dx(last_row, last_col) = 0.0f;
 792     dy(last_row, last_col) = 0.0f;
 793 }
 794
 795 ////////////////////////////////////////////////////////////
 796 // divergence
 797
 798 struct DivergenceBody : ParallelLoopBody
 799 {
 800     void operator() (const Range& range) const;
 801
 802     Mat_<float> v1;
 803     Mat_<float> v2;
 804     mutable Mat_<float> div;
 805 };
 806
 807 void DivergenceBody::operator() (const Range& range) const
 808 {
 809     for (int y = range.start; y < range.end; ++y)
 810     {
 811         const float* v1Row = v1[y];
 812         const float* v2PrevRow = v2[y - 1];
 813         const float* v2CurRow = v2[y];
 814
 815         float* divRow = div[y];
 816
 817         for(int x = 1; x < v1.cols; ++x)
 818         {
 819             const float v1x = v1Row[x] - v1Row[x - 1];
 820             const float v2y = v2CurRow[x] - v2PrevRow[x];
 821
 822             divRow[x] = v1x + v2y;
 823         }
 824     }
 825 }
 826
 827 void divergence(const Mat_<float>& v1, const Mat_<float>& v2, Mat_<float>& div)
 828 {
 829     CV_DbgAssert( v1.rows > 2 && v1.cols > 2 );
 830     CV_DbgAssert( v2.size() == v1.size() );
 831     CV_DbgAssert( div.size() == v1.size() );
 832
 833     {
 834         DivergenceBody body;
 835
 836         body.v1 = v1;
 837         body.v2 = v2;
 838         body.div = div;
 839
 840         parallel_for_(Range(1, v1.rows), body);
 841     }
 842
 843     // compute the divergence on the first row
 844     for(int x = 1; x < v1.cols; ++x)
 845         div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x);
 846
 847     // compute the divergence on the first column
 848     for (int y = 1; y < v1.rows; ++y)
 849         div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
 850
 851     div(0, 0) = v1(0, 0) + v2(0, 0);
 852 }
 853
 854 ////////////////////////////////////////////////////////////
 855 // calcGradRho
 856
 857 struct CalcGradRhoBody : ParallelLoopBody
 858 {
 859     void operator() (const Range& range) const;
 860
 861     Mat_<float> I0;
 862     Mat_<float> I1w;
 863     Mat_<float> I1wx;
 864     Mat_<float> I1wy;
 865     Mat_<float> u1;
 866     Mat_<float> u2;
 867     mutable Mat_<float> grad;
 868     mutable Mat_<float> rho_c;
 869 };
 870
 871 void CalcGradRhoBody::operator() (const Range& range) const
 872 {
 873     for (int y = range.start; y < range.end; ++y)
 874     {
 875         const float* I0Row = I0[y];
 876         const float* I1wRow = I1w[y];
 877         const float* I1wxRow = I1wx[y];
 878         const float* I1wyRow = I1wy[y];
 879         const float* u1Row = u1[y];
 880         const float* u2Row = u2[y];
 881
 882         float* gradRow = grad[y];
 883         float* rhoRow = rho_c[y];
 884
 885         for (int x = 0; x < I0.cols; ++x)
 886         {
 887             const float Ix2 = I1wxRow[x] * I1wxRow[x];
 888             const float Iy2 = I1wyRow[x] * I1wyRow[x];
 889
 890             // store the |Grad(I1)|^2
 891             gradRow[x] = Ix2 + Iy2;
 892
 893             // compute the constant part of the rho function
 894             rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]);
 895         }
 896     }
 897 }
 898
 899 void calcGradRho(const Mat_<float>& I0, const Mat_<float>& I1w, const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2,
 900     Mat_<float>& grad, Mat_<float>& rho_c)
 901 {
 902     CV_DbgAssert( I1w.size() == I0.size() );
 903     CV_DbgAssert( I1wx.size() == I0.size() );
 904     CV_DbgAssert( I1wy.size() == I0.size() );
 905     CV_DbgAssert( u1.size() == I0.size() );
 906     CV_DbgAssert( u2.size() == I0.size() );
 907     CV_DbgAssert( grad.size() == I0.size() );
 908     CV_DbgAssert( rho_c.size() == I0.size() );
 909
 910     CalcGradRhoBody body;
 911
 912     body.I0 = I0;
 913     body.I1w = I1w;
 914     body.I1wx = I1wx;
 915     body.I1wy = I1wy;
 916     body.u1 = u1;
 917     body.u2 = u2;
 918     body.grad = grad;
 919     body.rho_c = rho_c;
 920
 921     parallel_for_(Range(0, I0.rows), body);
 922 }
 923
 924 ////////////////////////////////////////////////////////////
 925 // estimateV
 926
 927 struct EstimateVBody : ParallelLoopBody
 928 {
 929     void operator() (const Range& range) const;
 930
 931     Mat_<float> I1wx;
 932     Mat_<float> I1wy;
 933     Mat_<float> u1;
 934     Mat_<float> u2;
 935     Mat_<float> u3;
 936     Mat_<float> grad;
 937     Mat_<float> rho_c;
 938     mutable Mat_<float> v1;
 939     mutable Mat_<float> v2;
 940     mutable Mat_<float> v3;
 941     float l_t;
 942     float gamma;
 943 };
 944
 945 void EstimateVBody::operator() (const Range& range) const
 946 {
 947     bool use_gamma = gamma != 0;
 948     for (int y = range.start; y < range.end; ++y)
 949     {
 950         const float* I1wxRow = I1wx[y];
 951         const float* I1wyRow = I1wy[y];
 952         const float* u1Row = u1[y];
 953         const float* u2Row = u2[y];
 954         const float* u3Row = use_gamma?u3[y]:NULL;
 955         const float* gradRow = grad[y];
 956         const float* rhoRow = rho_c[y];
 957
 958         float* v1Row = v1[y];
 959         float* v2Row = v2[y];
 960         float* v3Row = use_gamma ? v3[y]:NULL;
 961
 962         for (int x = 0; x < I1wx.cols; ++x)
 963         {
 964             const float rho = use_gamma ? rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]) + gamma * u3Row[x] :
 965                                           rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]);
 966             float d1 = 0.0f;
 967             float d2 = 0.0f;
 968             float d3 = 0.0f;
 969             if (rho < -l_t * gradRow[x])
 970             {
 971                 d1 = l_t * I1wxRow[x];
 972                 d2 = l_t * I1wyRow[x];
 973                 if (use_gamma) d3 = l_t * gamma;
 974             }
 975             else if (rho > l_t * gradRow[x])
 976             {
 977                 d1 = -l_t * I1wxRow[x];
 978                 d2 = -l_t * I1wyRow[x];
 979                 if (use_gamma) d3 = -l_t * gamma;
 980             }
 981             else if (gradRow[x] > std::numeric_limits<float>::epsilon())
 982             {
 983                 float fi = -rho / gradRow[x];
 984                 d1 = fi * I1wxRow[x];
 985                 d2 = fi * I1wyRow[x];
 986                 if (use_gamma) d3 = fi * gamma;
 987             }
 988
 989             v1Row[x] = u1Row[x] + d1;
 990             v2Row[x] = u2Row[x] + d2;
 991             if (use_gamma) v3Row[x] = u3Row[x] + d3;
 992         }
 993     }
 994 }
 995
 996 void estimateV(const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2, const Mat_<float>& u3, const Mat_<float>& grad, const Mat_<float>& rho_c,
 997    Mat_<float>& v1, Mat_<float>& v2, Mat_<float>& v3, float l_t, float gamma)
 998 {
 999     CV_DbgAssert( I1wy.size() == I1wx.size() );
1000     CV_DbgAssert( u1.size() == I1wx.size() );
1001     CV_DbgAssert( u2.size() == I1wx.size() );
1002     CV_DbgAssert( grad.size() == I1wx.size() );
1003     CV_DbgAssert( rho_c.size() == I1wx.size() );
1004     CV_DbgAssert( v1.size() == I1wx.size() );
1005     CV_DbgAssert( v2.size() == I1wx.size() );
1006
1007     EstimateVBody body;
1008     bool use_gamma = gamma != 0;
1009     body.I1wx = I1wx;
1010     body.I1wy = I1wy;
1011     body.u1 = u1;
1012     body.u2 = u2;
1013     if (use_gamma) body.u3 = u3;
1014     body.grad = grad;
1015     body.rho_c = rho_c;
1016     body.v1 = v1;
1017     body.v2 = v2;
1018     if (use_gamma) body.v3 = v3;
1019     body.l_t = l_t;
1020     body.gamma = gamma;
1021     parallel_for_(Range(0, I1wx.rows), body);
1022 }
1023
1024 ////////////////////////////////////////////////////////////
1025 // estimateU
1026
1027 float estimateU(const Mat_<float>& v1, const Mat_<float>& v2, const Mat_<float>& v3,
1028             const Mat_<float>& div_p1, const Mat_<float>& div_p2, const Mat_<float>& div_p3,
1029             Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3,
1030             float theta, float gamma)
1031 {
1032     CV_DbgAssert( v2.size() == v1.size() );
1033     CV_DbgAssert( div_p1.size() == v1.size() );
1034     CV_DbgAssert( div_p2.size() == v1.size() );
1035     CV_DbgAssert( u1.size() == v1.size() );
1036     CV_DbgAssert( u2.size() == v1.size() );
1037
1038     float error = 0.0f;
1039     bool use_gamma = gamma != 0;
1040     for (int y = 0; y < v1.rows; ++y)
1041     {
1042         const float* v1Row = v1[y];
1043         const float* v2Row = v2[y];
1044         const float* v3Row = use_gamma?v3[y]:NULL;
1045         const float* divP1Row = div_p1[y];
1046         const float* divP2Row = div_p2[y];
1047         const float* divP3Row = use_gamma?div_p3[y]:NULL;
1048
1049         float* u1Row = u1[y];
1050         float* u2Row = u2[y];
1051         float* u3Row = use_gamma?u3[y]:NULL;
1052
1053
1054         for (int x = 0; x < v1.cols; ++x)
1055         {
1056             const float u1k = u1Row[x];
1057             const float u2k = u2Row[x];
1058             const float u3k = use_gamma?u3Row[x]:0;
1059
1060             u1Row[x] = v1Row[x] + theta * divP1Row[x];
1061             u2Row[x] = v2Row[x] + theta * divP2Row[x];
1062             if (use_gamma) u3Row[x] = v3Row[x] + theta * divP3Row[x];
1063             error += use_gamma?(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k) + (u3Row[x] - u3k) * (u3Row[x] - u3k):
1064                                (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k);
1065         }
1066     }
1067
1068     return error;
1069 }
1070
1071 ////////////////////////////////////////////////////////////
1072 // estimateDualVariables
1073
1074 struct EstimateDualVariablesBody : ParallelLoopBody
1075 {
1076     void operator() (const Range& range) const;
1077
1078     Mat_<float> u1x;
1079     Mat_<float> u1y;
1080     Mat_<float> u2x;
1081     Mat_<float> u2y;
1082     Mat_<float> u3x;
1083     Mat_<float> u3y;
1084     mutable Mat_<float> p11;
1085     mutable Mat_<float> p12;
1086     mutable Mat_<float> p21;
1087     mutable Mat_<float> p22;
1088     mutable Mat_<float> p31;
1089     mutable Mat_<float> p32;
1090     float taut;
1091     bool use_gamma;
1092 };
1093
1094 void EstimateDualVariablesBody::operator() (const Range& range) const
1095 {
1096     for (int y = range.start; y < range.end; ++y)
1097     {
1098         const float* u1xRow = u1x[y];
1099         const float* u1yRow = u1y[y];
1100         const float* u2xRow = u2x[y];
1101         const float* u2yRow = u2y[y];
1102         const float* u3xRow = u3x[y];
1103         const float* u3yRow = u3y[y];
1104
1105         float* p11Row = p11[y];
1106         float* p12Row = p12[y];
1107         float* p21Row = p21[y];
1108         float* p22Row = p22[y];
1109         float* p31Row = p31[y];
1110         float* p32Row = p32[y];
1111
1112         for (int x = 0; x < u1x.cols; ++x)
1113         {
1114             const float g1 = static_cast<float>(hypot(u1xRow[x], u1yRow[x]));
1115             const float g2 = static_cast<float>(hypot(u2xRow[x], u2yRow[x]));
1116             const float g3 = static_cast<float>(hypot(u3xRow[x], u3yRow[x]));
1117
1118             const float ng1  = 1.0f + taut * g1;
1119             const float ng2 =  1.0f + taut * g2;
1120             const float ng3 = 1.0f + taut * g3;
1121
1122             p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1;
1123             p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1;
1124             p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2;
1125             p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2;
1126             if (use_gamma) p31Row[x] = (p31Row[x] + taut * u3xRow[x]) / ng3;
1127             if (use_gamma) p32Row[x] = (p32Row[x] + taut * u3yRow[x]) / ng3;
1128         }
1129     }
1130 }
1131
1132 void estimateDualVariables(const Mat_<float>& u1x, const Mat_<float>& u1y,
1133                      const Mat_<float>& u2x, const Mat_<float>& u2y,
1134                      const Mat_<float>& u3x, const Mat_<float>& u3y,
1135                            Mat_<float>& p11, Mat_<float>& p12,
1136                      Mat_<float>& p21, Mat_<float>& p22,
1137                      Mat_<float>& p31, Mat_<float>& p32,
1138                      float taut, bool use_gamma)
1139 {
1140     CV_DbgAssert( u1y.size() == u1x.size() );
1141     CV_DbgAssert( u2x.size() == u1x.size() );
1142     CV_DbgAssert( u3x.size() == u1x.size() );
1143     CV_DbgAssert( u2y.size() == u1x.size() );
1144     CV_DbgAssert( u3y.size() == u1x.size() );
1145     CV_DbgAssert( p11.size() == u1x.size() );
1146     CV_DbgAssert( p12.size() == u1x.size() );
1147     CV_DbgAssert( p21.size() == u1x.size() );
1148     CV_DbgAssert( p22.size() == u1x.size() );
1149     CV_DbgAssert( p31.size() == u1x.size() );
1150     CV_DbgAssert( p32.size() == u1x.size() );
1151
1152     EstimateDualVariablesBody body;
1153
1154     body.u1x = u1x;
1155     body.u1y = u1y;
1156     body.u2x = u2x;
1157     body.u2y = u2y;
1158     body.u3x = u3x;
1159     body.u3y = u3y;
1160     body.p11 = p11;
1161     body.p12 = p12;
1162     body.p21 = p21;
1163     body.p22 = p22;
1164     body.p31 = p31;
1165     body.p32 = p32;
1166     body.taut = taut;
1167     body.use_gamma = use_gamma;
1168
1169     parallel_for_(Range(0, u1x.rows), body);
1170 }
1171
1172 bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2)
1173 {
1174     using namespace cv_ocl_tvl1flow;
1175
1176     const double scaledEpsilon = epsilon * epsilon * I0.size().area();
1177
1178     CV_DbgAssert(I1.size() == I0.size());
1179     CV_DbgAssert(I1.type() == I0.type());
1180     CV_DbgAssert(u1.empty() || u1.size() == I0.size());
1181     CV_DbgAssert(u2.size() == u1.size());
1182
1183     if (u1.empty())
1184     {
1185         u1.create(I0.size(), CV_32FC1);
1186         u1.setTo(Scalar::all(0));
1187
1188         u2.create(I0.size(), CV_32FC1);
1189         u2.setTo(Scalar::all(0));
1190     }
1191
1192     UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
1193     UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
1194
1195     if (!centeredGradient(I1, I1x, I1y))
1196         return false;
1197
1198     UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
1199     UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
1200     UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
1201
1202     UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows));
1203     UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
1204
1205     UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows));
1206     UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows));
1207     UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows));
1208     UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows));
1209     p11.setTo(Scalar::all(0));
1210     p12.setTo(Scalar::all(0));
1211     p21.setTo(Scalar::all(0));
1212     p22.setTo(Scalar::all(0));
1213
1214     UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows));
1215
1216     const float l_t = static_cast<float>(lambda * theta);
1217     const float taut = static_cast<float>(tau / theta);
1218     int n;
1219
1220     for (int warpings = 0; warpings < warps; ++warpings)
1221     {
1222         if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c))
1223             return false;
1224
1225         double error = std::numeric_limits<double>::max();
1226         double prev_error = 0;
1227
1228         for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
1229         {
1230             if (medianFiltering > 1) {
1231                 cv::medianBlur(u1, u1, medianFiltering);
1232                 cv::medianBlur(u2, u2, medianFiltering);
1233             }
1234             for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
1235             {
1236                 // some tweaks to make sum operation less frequently
1237                 n = n_inner + n_outer*innerIterations;
1238                 char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
1239                 if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
1240                     u1, u2, diff, l_t, static_cast<float>(theta), calc_error))
1241                     return false;
1242                 if (calc_error)
1243                 {
1244                     error = cv::sum(diff)[0];
1245                     prev_error = error;
1246                 }
1247                 else
1248                 {
1249                     error = std::numeric_limits<double>::max();
1250                     prev_error -= scaledEpsilon;
1251                 }
1252                 if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut))
1253                     return false;
1254             }
1255         }
1256     }
1257     return true;
1258 }
1259
1260 void OpticalFlowDual_TVL1::procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3)
1261 {
1262     const float scaledEpsilon = static_cast<float>(epsilon * epsilon * I0.size().area());
1263
1264     CV_DbgAssert( I1.size() == I0.size() );
1265     CV_DbgAssert( I1.type() == I0.type() );
1266     CV_DbgAssert( u1.size() == I0.size() );
1267     CV_DbgAssert( u2.size() == u1.size() );
1268
1269     Mat_<float> I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
1270     Mat_<float> I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
1271     centeredGradient(I1, I1x, I1y);
1272
1273     Mat_<float> flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows));
1274     Mat_<float> flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows));
1275
1276     Mat_<float> I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
1277     Mat_<float> I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
1278     Mat_<float> I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
1279
1280     Mat_<float> grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows));
1281     Mat_<float> rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
1282
1283     Mat_<float> v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows));
1284     Mat_<float> v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows));
1285     Mat_<float> v3 = dm.v3_buf(Rect(0, 0, I0.cols, I0.rows));
1286
1287     Mat_<float> p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows));
1288     Mat_<float> p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows));
1289     Mat_<float> p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows));
1290     Mat_<float> p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows));
1291     Mat_<float> p31 = dm.p31_buf(Rect(0, 0, I0.cols, I0.rows));
1292     Mat_<float> p32 = dm.p32_buf(Rect(0, 0, I0.cols, I0.rows));
1293     p11.setTo(Scalar::all(0));
1294     p12.setTo(Scalar::all(0));
1295     p21.setTo(Scalar::all(0));
1296     p22.setTo(Scalar::all(0));
1297     bool use_gamma = gamma != 0.;
1298     if (use_gamma) p31.setTo(Scalar::all(0));
1299     if (use_gamma) p32.setTo(Scalar::all(0));
1300
1301     Mat_<float> div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows));
1302     Mat_<float> div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows));
1303     Mat_<float> div_p3 = dm.div_p3_buf(Rect(0, 0, I0.cols, I0.rows));
1304
1305     Mat_<float> u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows));
1306     Mat_<float> u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows));
1307     Mat_<float> u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows));
1308     Mat_<float> u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows));
1309     Mat_<float> u3x = dm.u3x_buf(Rect(0, 0, I0.cols, I0.rows));
1310     Mat_<float> u3y = dm.u3y_buf(Rect(0, 0, I0.cols, I0.rows));
1311
1312     const float l_t = static_cast<float>(lambda * theta);
1313     const float taut = static_cast<float>(tau / theta);
1314
1315     for (int warpings = 0; warpings < warps; ++warpings)
1316     {
1317         // compute the warping of the target image and its derivatives
1318         buildFlowMap(u1, u2, flowMap1, flowMap2);
1319         remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC);
1320         remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC);
1321         remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC);
1322         //calculate I1(x+u0) and its gradient
1323         calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c);
1324
1325         float error = std::numeric_limits<float>::max();
1326         for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
1327         {
1328             if (medianFiltering > 1) {
1329                 cv::medianBlur(u1, u1, medianFiltering);
1330                 cv::medianBlur(u2, u2, medianFiltering);
1331             }
1332             for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
1333             {
1334                 // estimate the values of the variable (v1, v2) (thresholding operator TH)
1335                 estimateV(I1wx, I1wy, u1, u2, u3, grad, rho_c, v1, v2, v3, l_t, static_cast<float>(gamma));
1336
1337                 // compute the divergence of the dual variable (p1, p2, p3)
1338                 divergence(p11, p12, div_p1);
1339                 divergence(p21, p22, div_p2);
1340                 if (use_gamma) divergence(p31, p32, div_p3);
1341
1342                 // estimate the values of the optical flow (u1, u2)
1343                 error = estimateU(v1, v2, v3, div_p1, div_p2, div_p3, u1, u2, u3, static_cast<float>(theta), static_cast<float>(gamma));
1344
1345                 // compute the gradient of the optical flow (Du1, Du2)
1346                 forwardGradient(u1, u1x, u1y);
1347                 forwardGradient(u2, u2x, u2y);
1348                 if (use_gamma) forwardGradient(u3, u3x, u3y);
1349
1350                 // estimate the values of the dual variable (p1, p2, p3)
1351                 estimateDualVariables(u1x, u1y, u2x, u2y, u3x, u3y, p11, p12, p21, p22, p31, p32, taut, use_gamma);
1352             }
1353         }
1354     }
1355 }
1356
1357 void OpticalFlowDual_TVL1::collectGarbage()
1358 {
1359     //dataMat structure dm
1360     dm.I0s.clear();
1361     dm.I1s.clear();
1362     dm.u1s.clear();
1363     dm.u2s.clear();
1364
1365     dm.I1x_buf.release();
1366     dm.I1y_buf.release();
1367
1368     dm.flowMap1_buf.release();
1369     dm.flowMap2_buf.release();
1370
1371     dm.I1w_buf.release();
1372     dm.I1wx_buf.release();
1373     dm.I1wy_buf.release();
1374
1375     dm.grad_buf.release();
1376     dm.rho_c_buf.release();
1377
1378     dm.v1_buf.release();
1379     dm.v2_buf.release();
1380
1381     dm.p11_buf.release();
1382     dm.p12_buf.release();
1383     dm.p21_buf.release();
1384     dm.p22_buf.release();
1385
1386     dm.div_p1_buf.release();
1387     dm.div_p2_buf.release();
1388
1389     dm.u1x_buf.release();
1390     dm.u1y_buf.release();
1391     dm.u2x_buf.release();
1392     dm.u2y_buf.release();
1393
1394     //dataUMat structure dum
1395     dum.I0s.clear();
1396     dum.I1s.clear();
1397     dum.u1s.clear();
1398     dum.u2s.clear();
1399
1400     dum.I1x_buf.release();
1401     dum.I1y_buf.release();
1402
1403     dum.I1w_buf.release();
1404     dum.I1wx_buf.release();
1405     dum.I1wy_buf.release();
1406
1407     dum.grad_buf.release();
1408     dum.rho_c_buf.release();
1409
1410     dum.p11_buf.release();
1411     dum.p12_buf.release();
1412     dum.p21_buf.release();
1413     dum.p22_buf.release();
1414
1415     dum.diff_buf.release();
1416     dum.norm_buf.release();
1417 }
1418
1419
1420 CV_INIT_ALGORITHM(OpticalFlowDual_TVL1, "DenseOpticalFlow.DualTVL1",
1421                   obj.info()->addParam(obj, "tau", obj.tau, false, 0, 0,
1422                                        "Time step of the numerical scheme");
1423                   obj.info()->addParam(obj, "lambda", obj.lambda, false, 0, 0,
1424                                        "Weight parameter for the data term, attachment parameter");
1425                   obj.info()->addParam(obj, "theta", obj.theta, false, 0, 0,
1426                                        "Weight parameter for (u - v)^2, tightness parameter");
1427                   obj.info()->addParam(obj, "nscales", obj.nscales, false, 0, 0,
1428                                        "Number of scales used to create the pyramid of images");
1429                   obj.info()->addParam(obj, "warps", obj.warps, false, 0, 0,
1430                                        "Number of warpings per scale");
1431                   obj.info()->addParam(obj, "medianFiltering", obj.medianFiltering, false, 0, 0,
1432                                        "Median filter kernel size (1 = no filter) (3 or 5)");
1433                   obj.info()->addParam(obj, "scaleStep", obj.scaleStep, false, 0, 0,
1434                                        "Step between scales (<1)");
1435                   obj.info()->addParam(obj, "epsilon", obj.epsilon, false, 0, 0,
1436                                        "Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time");
1437                   obj.info()->addParam(obj, "innerIterations", obj.innerIterations, false, 0, 0,
1438                                        "inner iterations (between outlier filtering) used in the numerical scheme");
1439                   obj.info()->addParam(obj, "outerIterations", obj.outerIterations, false, 0, 0,
1440                                        "outer iterations (number of inner loops) used in the numerical scheme");
1441                   obj.info()->addParam(obj, "gamma", obj.gamma, false, 0, 0,
1442                                        "coefficient for additional illumination variation term");
1443                   obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow))
1444
1445 } // namespace
1446
1447 Ptr<DenseOpticalFlow> cv::createOptFlow_DualTVL1()
1448 {
1449     return makePtr<OpticalFlowDual_TVL1>();
1450 }