1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
42 #include "precomp.hpp"
45 #include "lkpyramid.hpp"
46 #include "opencl_kernels_video.hpp"
47 #include "opencv2/core/hal/intrin.hpp"
49 #include "opencv2/core/openvx/ovx_defs.hpp"
51 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
55 static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
58 using cv::detail::deriv_type;
59 int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth();
60 CV_Assert(depth == CV_8U);
61 dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
63 int x, y, delta = (int)alignSize((cols + 2)*cn, 16);
64 AutoBuffer<deriv_type> _tempBuf(delta*2 + 64);
65 deriv_type *trow0 = alignPtr(_tempBuf + cn, 16), *trow1 = alignPtr(trow0 + delta, 16);
68 v_int16x8 c3 = v_setall_s16(3), c10 = v_setall_s16(10);
69 bool haveSIMD = checkHardwareSupport(CV_CPU_SSE2) || checkHardwareSupport(CV_CPU_NEON);
72 for( y = 0; y < rows; y++ )
74 const uchar* srow0 = src.ptr<uchar>(y > 0 ? y-1 : rows > 1 ? 1 : 0);
75 const uchar* srow1 = src.ptr<uchar>(y);
76 const uchar* srow2 = src.ptr<uchar>(y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
77 deriv_type* drow = dst.ptr<deriv_type>(y);
79 // do vertical convolution
84 for( ; x <= colsn - 8; x += 8 )
86 v_int16x8 s0 = v_reinterpret_as_s16(v_load_expand(srow0 + x));
87 v_int16x8 s1 = v_reinterpret_as_s16(v_load_expand(srow1 + x));
88 v_int16x8 s2 = v_reinterpret_as_s16(v_load_expand(srow2 + x));
90 v_int16x8 t1 = s2 - s0;
91 v_int16x8 t0 = (s0 + s2) * c3 + s1 * c10;
93 v_store(trow0 + x, t0);
94 v_store(trow1 + x, t1);
99 for( ; x < colsn; x++ )
101 int t0 = (srow0[x] + srow2[x])*3 + srow1[x]*10;
102 int t1 = srow2[x] - srow0[x];
103 trow0[x] = (deriv_type)t0;
104 trow1[x] = (deriv_type)t1;
108 int x0 = (cols > 1 ? 1 : 0)*cn, x1 = (cols > 1 ? cols-2 : 0)*cn;
109 for( int k = 0; k < cn; k++ )
111 trow0[-cn + k] = trow0[x0 + k]; trow0[colsn + k] = trow0[x1 + k];
112 trow1[-cn + k] = trow1[x0 + k]; trow1[colsn + k] = trow1[x1 + k];
115 // do horizontal convolution, interleave the results and store them to dst
120 for( ; x <= colsn - 8; x += 8 )
122 v_int16x8 s0 = v_load(trow0 + x - cn);
123 v_int16x8 s1 = v_load(trow0 + x + cn);
124 v_int16x8 s2 = v_load(trow1 + x - cn);
125 v_int16x8 s3 = v_load(trow1 + x);
126 v_int16x8 s4 = v_load(trow1 + x + cn);
128 v_int16x8 t0 = s1 - s0;
129 v_int16x8 t1 = ((s2 + s4) * c3) + (s3 * c10);
131 v_store_interleave((drow + x*2), t0, t1);
135 for( ; x < colsn; x++ )
137 deriv_type t0 = (deriv_type)(trow0[x+cn] - trow0[x-cn]);
138 deriv_type t1 = (deriv_type)((trow1[x+cn] + trow1[x-cn])*3 + trow1[x]*10);
139 drow[x*2] = t0; drow[x*2+1] = t1;
146 cv::detail::LKTrackerInvoker::LKTrackerInvoker(
147 const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
148 const Point2f* _prevPts, Point2f* _nextPts,
149 uchar* _status, float* _err,
150 Size _winSize, TermCriteria _criteria,
151 int _level, int _maxLevel, int _flags, float _minEigThreshold )
154 prevDeriv = &_prevDeriv;
161 criteria = _criteria;
163 maxLevel = _maxLevel;
165 minEigThreshold = _minEigThreshold;
168 #if defined __arm__ && !CV_NEON
169 typedef int64 acctype;
170 typedef int itemtype;
172 typedef float acctype;
173 typedef float itemtype;
176 void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
178 CV_INSTRUMENT_REGION()
180 Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
181 const Mat& I = *prevImg;
182 const Mat& J = *nextImg;
183 const Mat& derivI = *prevDeriv;
185 int j, cn = I.channels(), cn2 = cn*2;
186 cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
187 int derivDepth = DataType<deriv_type>::depth;
189 Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
190 Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
192 for( int ptidx = range.start; ptidx < range.end; ptidx++ )
194 Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
196 if( level == maxLevel )
198 if( flags & OPTFLOW_USE_INITIAL_FLOW )
199 nextPt = nextPts[ptidx]*(float)(1./(1 << level));
204 nextPt = nextPts[ptidx]*2.f;
205 nextPts[ptidx] = nextPt;
207 Point2i iprevPt, inextPt;
209 iprevPt.x = cvFloor(prevPt.x);
210 iprevPt.y = cvFloor(prevPt.y);
212 if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols ||
213 iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows )
218 status[ptidx] = false;
225 float a = prevPt.x - iprevPt.x;
226 float b = prevPt.y - iprevPt.y;
227 const int W_BITS = 14, W_BITS1 = 14;
228 const float FLT_SCALE = 1.f/(1 << 20);
229 int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
230 int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
231 int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
232 int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
234 int dstep = (int)(derivI.step/derivI.elemSize1());
235 int stepI = (int)(I.step/I.elemSize1());
236 int stepJ = (int)(J.step/J.elemSize1());
237 acctype iA11 = 0, iA12 = 0, iA22 = 0;
241 __m128i qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
242 __m128i qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
243 __m128i z = _mm_setzero_si128();
244 __m128i qdelta_d = _mm_set1_epi32(1 << (W_BITS1-1));
245 __m128i qdelta = _mm_set1_epi32(1 << (W_BITS1-5-1));
246 __m128 qA11 = _mm_setzero_ps(), qA12 = _mm_setzero_ps(), qA22 = _mm_setzero_ps();
251 float CV_DECL_ALIGNED(16) nA11[] = { 0, 0, 0, 0 }, nA12[] = { 0, 0, 0, 0 }, nA22[] = { 0, 0, 0, 0 };
252 const int shifter1 = -(W_BITS - 5); //negative so it shifts right
253 const int shifter2 = -(W_BITS);
255 const int16x4_t d26 = vdup_n_s16((int16_t)iw00);
256 const int16x4_t d27 = vdup_n_s16((int16_t)iw01);
257 const int16x4_t d28 = vdup_n_s16((int16_t)iw10);
258 const int16x4_t d29 = vdup_n_s16((int16_t)iw11);
259 const int32x4_t q11 = vdupq_n_s32((int32_t)shifter1);
260 const int32x4_t q12 = vdupq_n_s32((int32_t)shifter2);
264 // extract the patch from the first image, compute covariation matrix of derivatives
266 for( y = 0; y < winSize.height; y++ )
268 const uchar* src = I.ptr() + (y + iprevPt.y)*stepI + iprevPt.x*cn;
269 const deriv_type* dsrc = derivI.ptr<deriv_type>() + (y + iprevPt.y)*dstep + iprevPt.x*cn2;
271 deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
272 deriv_type* dIptr = derivIWinBuf.ptr<deriv_type>(y);
277 for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
279 __m128i v00, v01, v10, v11, t0, t1;
281 v00 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x)), z);
282 v01 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + cn)), z);
283 v10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI)), z);
284 v11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI + cn)), z);
286 t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
287 _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
288 t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
289 _mm_storel_epi64((__m128i*)(Iptr + x), _mm_packs_epi32(t0,t0));
291 v00 = _mm_loadu_si128((const __m128i*)(dsrc));
292 v01 = _mm_loadu_si128((const __m128i*)(dsrc + cn2));
293 v10 = _mm_loadu_si128((const __m128i*)(dsrc + dstep));
294 v11 = _mm_loadu_si128((const __m128i*)(dsrc + dstep + cn2));
296 t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
297 _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
298 t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
299 _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
300 t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta_d), W_BITS1);
301 t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta_d), W_BITS1);
302 v00 = _mm_packs_epi32(t0, t1); // Ix0 Iy0 Ix1 Iy1 ...
304 _mm_storeu_si128((__m128i*)dIptr, v00);
305 t0 = _mm_srai_epi32(v00, 16); // Iy0 Iy1 Iy2 Iy3
306 t1 = _mm_srai_epi32(_mm_slli_epi32(v00, 16), 16); // Ix0 Ix1 Ix2 Ix3
308 __m128 fy = _mm_cvtepi32_ps(t0);
309 __m128 fx = _mm_cvtepi32_ps(t1);
311 qA22 = _mm_add_ps(qA22, _mm_mul_ps(fy, fy));
312 qA12 = _mm_add_ps(qA12, _mm_mul_ps(fx, fy));
313 qA11 = _mm_add_ps(qA11, _mm_mul_ps(fx, fx));
318 for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
321 uint8x8_t d0 = vld1_u8(&src[x]);
322 uint8x8_t d2 = vld1_u8(&src[x+cn]);
323 uint16x8_t q0 = vmovl_u8(d0);
324 uint16x8_t q1 = vmovl_u8(d2);
326 int32x4_t q5 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q0)), d26);
327 int32x4_t q6 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q1)), d27);
329 uint8x8_t d4 = vld1_u8(&src[x + stepI]);
330 uint8x8_t d6 = vld1_u8(&src[x + stepI + cn]);
331 uint16x8_t q2 = vmovl_u8(d4);
332 uint16x8_t q3 = vmovl_u8(d6);
334 int32x4_t q7 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q2)), d28);
335 int32x4_t q8 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q3)), d29);
337 q5 = vaddq_s32(q5, q6);
338 q7 = vaddq_s32(q7, q8);
339 q5 = vaddq_s32(q5, q7);
341 int16x4x2_t d0d1 = vld2_s16(dsrc);
342 int16x4x2_t d2d3 = vld2_s16(&dsrc[cn2]);
344 q5 = vqrshlq_s32(q5, q11);
346 int32x4_t q4 = vmull_s16(d0d1.val[0], d26);
347 q6 = vmull_s16(d0d1.val[1], d26);
349 int16x4_t nd0 = vmovn_s32(q5);
351 q7 = vmull_s16(d2d3.val[0], d27);
352 q8 = vmull_s16(d2d3.val[1], d27);
354 vst1_s16(&Iptr[x], nd0);
356 int16x4x2_t d4d5 = vld2_s16(&dsrc[dstep]);
357 int16x4x2_t d6d7 = vld2_s16(&dsrc[dstep+cn2]);
359 q4 = vaddq_s32(q4, q7);
360 q6 = vaddq_s32(q6, q8);
362 q7 = vmull_s16(d4d5.val[0], d28);
363 int32x4_t q14 = vmull_s16(d4d5.val[1], d28);
364 q8 = vmull_s16(d6d7.val[0], d29);
365 int32x4_t q15 = vmull_s16(d6d7.val[1], d29);
367 q7 = vaddq_s32(q7, q8);
368 q14 = vaddq_s32(q14, q15);
370 q4 = vaddq_s32(q4, q7);
371 q6 = vaddq_s32(q6, q14);
373 float32x4_t nq0 = vld1q_f32(nA11);
374 float32x4_t nq1 = vld1q_f32(nA12);
375 float32x4_t nq2 = vld1q_f32(nA22);
377 q4 = vqrshlq_s32(q4, q12);
378 q6 = vqrshlq_s32(q6, q12);
380 q7 = vmulq_s32(q4, q4);
381 q8 = vmulq_s32(q4, q6);
382 q15 = vmulq_s32(q6, q6);
384 nq0 = vaddq_f32(nq0, vcvtq_f32_s32(q7));
385 nq1 = vaddq_f32(nq1, vcvtq_f32_s32(q8));
386 nq2 = vaddq_f32(nq2, vcvtq_f32_s32(q15));
388 vst1q_f32(nA11, nq0);
389 vst1q_f32(nA12, nq1);
390 vst1q_f32(nA22, nq2);
392 int16x4_t d8 = vmovn_s32(q4);
393 int16x4_t d12 = vmovn_s32(q6);
396 d8d12.val[0] = d8; d8d12.val[1] = d12;
397 vst2_s16(dIptr, d8d12);
401 for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 )
403 int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
404 src[x+stepI]*iw10 + src[x+stepI+cn]*iw11, W_BITS1-5);
405 int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
406 dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
407 int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
408 dsrc[dstep+cn2+1]*iw11, W_BITS1);
410 Iptr[x] = (short)ival;
411 dIptr[0] = (short)ixval;
412 dIptr[1] = (short)iyval;
414 iA11 += (itemtype)(ixval*ixval);
415 iA12 += (itemtype)(ixval*iyval);
416 iA22 += (itemtype)(iyval*iyval);
421 float CV_DECL_ALIGNED(16) A11buf[4], A12buf[4], A22buf[4];
422 _mm_store_ps(A11buf, qA11);
423 _mm_store_ps(A12buf, qA12);
424 _mm_store_ps(A22buf, qA22);
425 iA11 += A11buf[0] + A11buf[1] + A11buf[2] + A11buf[3];
426 iA12 += A12buf[0] + A12buf[1] + A12buf[2] + A12buf[3];
427 iA22 += A22buf[0] + A22buf[1] + A22buf[2] + A22buf[3];
431 iA11 += nA11[0] + nA11[1] + nA11[2] + nA11[3];
432 iA12 += nA12[0] + nA12[1] + nA12[2] + nA12[3];
433 iA22 += nA22[0] + nA22[1] + nA22[2] + nA22[3];
436 A11 = iA11*FLT_SCALE;
437 A12 = iA12*FLT_SCALE;
438 A22 = iA22*FLT_SCALE;
440 float D = A11*A22 - A12*A12;
441 float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
442 4.f*A12*A12))/(2*winSize.width*winSize.height);
444 if( err && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0 )
445 err[ptidx] = (float)minEig;
447 if( minEig < minEigThreshold || D < FLT_EPSILON )
449 if( level == 0 && status )
450 status[ptidx] = false;
459 for( j = 0; j < criteria.maxCount; j++ )
461 inextPt.x = cvFloor(nextPt.x);
462 inextPt.y = cvFloor(nextPt.y);
464 if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
465 inextPt.y < -winSize.height || inextPt.y >= J.rows )
467 if( level == 0 && status )
468 status[ptidx] = false;
472 a = nextPt.x - inextPt.x;
473 b = nextPt.y - inextPt.y;
474 iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
475 iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
476 iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
477 iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
478 acctype ib1 = 0, ib2 = 0;
481 qw0 = _mm_set1_epi32(iw00 + (iw01 << 16));
482 qw1 = _mm_set1_epi32(iw10 + (iw11 << 16));
483 __m128 qb0 = _mm_setzero_ps(), qb1 = _mm_setzero_ps();
487 float CV_DECL_ALIGNED(16) nB1[] = { 0,0,0,0 }, nB2[] = { 0,0,0,0 };
489 const int16x4_t d26_2 = vdup_n_s16((int16_t)iw00);
490 const int16x4_t d27_2 = vdup_n_s16((int16_t)iw01);
491 const int16x4_t d28_2 = vdup_n_s16((int16_t)iw10);
492 const int16x4_t d29_2 = vdup_n_s16((int16_t)iw11);
496 for( y = 0; y < winSize.height; y++ )
498 const uchar* Jptr = J.ptr() + (y + inextPt.y)*stepJ + inextPt.x*cn;
499 const deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
500 const deriv_type* dIptr = derivIWinBuf.ptr<deriv_type>(y);
505 for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
507 __m128i diff0 = _mm_loadu_si128((const __m128i*)(Iptr + x)), diff1;
508 __m128i v00 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x)), z);
509 __m128i v01 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + cn)), z);
510 __m128i v10 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ)), z);
511 __m128i v11 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ + cn)), z);
513 __m128i t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0),
514 _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1));
515 __m128i t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0),
516 _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1));
517 t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5);
518 t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta), W_BITS1-5);
519 diff0 = _mm_subs_epi16(_mm_packs_epi32(t0, t1), diff0);
520 diff1 = _mm_unpackhi_epi16(diff0, diff0);
521 diff0 = _mm_unpacklo_epi16(diff0, diff0); // It0 It0 It1 It1 ...
522 v00 = _mm_loadu_si128((const __m128i*)(dIptr)); // Ix0 Iy0 Ix1 Iy1 ...
523 v01 = _mm_loadu_si128((const __m128i*)(dIptr + 8));
524 v10 = _mm_unpacklo_epi16(v00, v01);
525 v11 = _mm_unpackhi_epi16(v00, v01);
526 v00 = _mm_unpacklo_epi16(diff0, diff1);
527 v01 = _mm_unpackhi_epi16(diff0, diff1);
528 v00 = _mm_madd_epi16(v00, v10);
529 v11 = _mm_madd_epi16(v01, v11);
530 qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00));
531 qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v11));
536 for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 )
539 uint8x8_t d0 = vld1_u8(&Jptr[x]);
540 uint8x8_t d2 = vld1_u8(&Jptr[x+cn]);
541 uint8x8_t d4 = vld1_u8(&Jptr[x+stepJ]);
542 uint8x8_t d6 = vld1_u8(&Jptr[x+stepJ+cn]);
544 uint16x8_t q0 = vmovl_u8(d0);
545 uint16x8_t q1 = vmovl_u8(d2);
546 uint16x8_t q2 = vmovl_u8(d4);
547 uint16x8_t q3 = vmovl_u8(d6);
549 int32x4_t nq4 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q0)), d26_2);
550 int32x4_t nq5 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q0)), d26_2);
552 int32x4_t nq6 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q1)), d27_2);
553 int32x4_t nq7 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q1)), d27_2);
555 int32x4_t nq8 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q2)), d28_2);
556 int32x4_t nq9 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q2)), d28_2);
558 int32x4_t nq10 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q3)), d29_2);
559 int32x4_t nq11 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q3)), d29_2);
561 nq4 = vaddq_s32(nq4, nq6);
562 nq5 = vaddq_s32(nq5, nq7);
563 nq8 = vaddq_s32(nq8, nq10);
564 nq9 = vaddq_s32(nq9, nq11);
566 int16x8_t q6 = vld1q_s16(&Iptr[x]);
568 nq4 = vaddq_s32(nq4, nq8);
569 nq5 = vaddq_s32(nq5, nq9);
571 nq8 = vmovl_s16(vget_high_s16(q6));
572 nq6 = vmovl_s16(vget_low_s16(q6));
574 nq4 = vqrshlq_s32(nq4, q11);
575 nq5 = vqrshlq_s32(nq5, q11);
577 int16x8x2_t q0q1 = vld2q_s16(dIptr);
578 float32x4_t nB1v = vld1q_f32(nB1);
579 float32x4_t nB2v = vld1q_f32(nB2);
581 nq4 = vsubq_s32(nq4, nq6);
582 nq5 = vsubq_s32(nq5, nq8);
584 int32x4_t nq2 = vmovl_s16(vget_low_s16(q0q1.val[0]));
585 int32x4_t nq3 = vmovl_s16(vget_high_s16(q0q1.val[0]));
587 nq7 = vmovl_s16(vget_low_s16(q0q1.val[1]));
588 nq8 = vmovl_s16(vget_high_s16(q0q1.val[1]));
590 nq9 = vmulq_s32(nq4, nq2);
591 nq10 = vmulq_s32(nq5, nq3);
593 nq4 = vmulq_s32(nq4, nq7);
594 nq5 = vmulq_s32(nq5, nq8);
596 nq9 = vaddq_s32(nq9, nq10);
597 nq4 = vaddq_s32(nq4, nq5);
599 nB1v = vaddq_f32(nB1v, vcvtq_f32_s32(nq9));
600 nB2v = vaddq_f32(nB2v, vcvtq_f32_s32(nq4));
602 vst1q_f32(nB1, nB1v);
603 vst1q_f32(nB2, nB2v);
607 for( ; x < winSize.width*cn; x++, dIptr += 2 )
609 int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
610 Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
611 W_BITS1-5) - Iptr[x];
612 ib1 += (itemtype)(diff*dIptr[0]);
613 ib2 += (itemtype)(diff*dIptr[1]);
618 float CV_DECL_ALIGNED(16) bbuf[4];
619 _mm_store_ps(bbuf, _mm_add_ps(qb0, qb1));
620 ib1 += bbuf[0] + bbuf[2];
621 ib2 += bbuf[1] + bbuf[3];
626 ib1 += (float)(nB1[0] + nB1[1] + nB1[2] + nB1[3]);
627 ib2 += (float)(nB2[0] + nB2[1] + nB2[2] + nB2[3]);
633 Point2f delta( (float)((A12*b2 - A22*b1) * D),
634 (float)((A12*b1 - A11*b2) * D));
638 nextPts[ptidx] = nextPt + halfWin;
640 if( delta.ddot(delta) <= criteria.epsilon )
643 if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 &&
644 std::abs(delta.y + prevDelta.y) < 0.01 )
646 nextPts[ptidx] -= delta*0.5f;
652 CV_Assert(status != NULL);
653 if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 )
655 Point2f nextPoint = nextPts[ptidx] - halfWin;
658 inextPoint.x = cvFloor(nextPoint.x);
659 inextPoint.y = cvFloor(nextPoint.y);
661 if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols ||
662 inextPoint.y < -winSize.height || inextPoint.y >= J.rows )
665 status[ptidx] = false;
669 float aa = nextPoint.x - inextPoint.x;
670 float bb = nextPoint.y - inextPoint.y;
671 iw00 = cvRound((1.f - aa)*(1.f - bb)*(1 << W_BITS));
672 iw01 = cvRound(aa*(1.f - bb)*(1 << W_BITS));
673 iw10 = cvRound((1.f - aa)*bb*(1 << W_BITS));
674 iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
677 for( y = 0; y < winSize.height; y++ )
679 const uchar* Jptr = J.ptr() + (y + inextPoint.y)*stepJ + inextPoint.x*cn;
680 const deriv_type* Iptr = IWinBuf.ptr<deriv_type>(y);
682 for( x = 0; x < winSize.width*cn; x++ )
684 int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
685 Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
686 W_BITS1-5) - Iptr[x];
687 errval += std::abs((float)diff);
690 err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height);
695 int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives,
696 int pyrBorder, int derivBorder, bool tryReuseInputImage)
698 CV_INSTRUMENT_REGION()
700 Mat img = _img.getMat();
701 CV_Assert(img.depth() == CV_8U && winSize.width > 2 && winSize.height > 2 );
702 int pyrstep = withDerivatives ? 2 : 1;
704 pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0);
706 int derivType = CV_MAKETYPE(DataType<cv::detail::deriv_type>::depth, img.channels() * 2);
709 bool lvl0IsSet = false;
710 if(tryReuseInputImage && img.isSubmatrix() && (pyrBorder & BORDER_ISOLATED) == 0)
714 img.locateROI(wholeSize, ofs);
715 if (ofs.x >= winSize.width && ofs.y >= winSize.height
716 && ofs.x + img.cols + winSize.width <= wholeSize.width
717 && ofs.y + img.rows + winSize.height <= wholeSize.height)
719 pyramid.getMatRef(0) = img;
726 Mat& temp = pyramid.getMatRef(0);
729 temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
730 if(temp.type() != img.type() || temp.cols != winSize.width*2 + img.cols || temp.rows != winSize.height * 2 + img.rows)
731 temp.create(img.rows + winSize.height*2, img.cols + winSize.width*2, img.type());
733 if(pyrBorder == BORDER_TRANSPARENT)
734 img.copyTo(temp(Rect(winSize.width, winSize.height, img.cols, img.rows)));
736 copyMakeBorder(img, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder);
737 temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
740 Size sz = img.size();
741 Mat prevLevel = pyramid.getMatRef(0);
742 Mat thisLevel = prevLevel;
744 for(int level = 0; level <= maxLevel; ++level)
748 Mat& temp = pyramid.getMatRef(level * pyrstep);
751 temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
752 if(temp.type() != img.type() || temp.cols != winSize.width*2 + sz.width || temp.rows != winSize.height * 2 + sz.height)
753 temp.create(sz.height + winSize.height*2, sz.width + winSize.width*2, img.type());
755 thisLevel = temp(Rect(winSize.width, winSize.height, sz.width, sz.height));
756 pyrDown(prevLevel, thisLevel, sz);
758 if(pyrBorder != BORDER_TRANSPARENT)
759 copyMakeBorder(thisLevel, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder|BORDER_ISOLATED);
760 temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
765 Mat& deriv = pyramid.getMatRef(level * pyrstep + 1);
768 deriv.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
769 if(deriv.type() != derivType || deriv.cols != winSize.width*2 + sz.width || deriv.rows != winSize.height * 2 + sz.height)
770 deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType);
772 Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height));
773 calcSharrDeriv(thisLevel, derivI);
775 if(derivBorder != BORDER_TRANSPARENT)
776 copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED);
777 deriv.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
780 sz = Size((sz.width+1)/2, (sz.height+1)/2);
781 if( sz.width <= winSize.width || sz.height <= winSize.height )
783 pyramid.create(1, (level + 1) * pyrstep, 0 /*type*/, -1, true, 0);//check this
787 prevLevel = thisLevel;
797 class SparsePyrLKOpticalFlowImpl : public SparsePyrLKOpticalFlow
801 unsigned int x, y, z;
802 dim3() : x(0), y(0), z(0) { }
805 SparsePyrLKOpticalFlowImpl(Size winSize_ = Size(21,21),
807 TermCriteria criteria_ = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01),
809 double minEigThreshold_ = 1e-4) :
810 winSize(winSize_), maxLevel(maxLevel_), criteria(criteria_), flags(flags_), minEigThreshold(minEigThreshold_)
812 , iters(criteria_.maxCount), derivLambda(criteria_.epsilon), useInitialFlow(0 != (flags_ & OPTFLOW_LK_GET_MIN_EIGENVALS)), waveSize(0)
817 virtual Size getWinSize() const CV_OVERRIDE { return winSize;}
818 virtual void setWinSize(Size winSize_) CV_OVERRIDE { winSize = winSize_;}
820 virtual int getMaxLevel() const CV_OVERRIDE { return maxLevel;}
821 virtual void setMaxLevel(int maxLevel_) CV_OVERRIDE { maxLevel = maxLevel_;}
823 virtual TermCriteria getTermCriteria() const CV_OVERRIDE { return criteria;}
824 virtual void setTermCriteria(TermCriteria& crit_) CV_OVERRIDE { criteria=crit_;}
826 virtual int getFlags() const CV_OVERRIDE { return flags; }
827 virtual void setFlags(int flags_) CV_OVERRIDE { flags=flags_;}
829 virtual double getMinEigThreshold() const CV_OVERRIDE { return minEigThreshold;}
830 virtual void setMinEigThreshold(double minEigThreshold_) CV_OVERRIDE { minEigThreshold=minEigThreshold_;}
832 virtual void calc(InputArray prevImg, InputArray nextImg,
833 InputArray prevPts, InputOutputArray nextPts,
835 OutputArray err = cv::noArray()) CV_OVERRIDE;
841 iters = std::min(std::max(iters, 0), 100);
843 derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);
846 if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2)
848 if (winSize.width < 8 || winSize.height < 8 ||
849 winSize.width > 24 || winSize.height > 24)
852 if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
859 bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err)
864 UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
865 UMat temp2 = nextPts.reshape(1);
866 multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2);
868 status.setTo(Scalar::all(1));
870 // build the image pyramids.
871 std::vector<UMat> prevPyr; prevPyr.resize(maxLevel + 1);
872 std::vector<UMat> nextPyr; nextPyr.resize(maxLevel + 1);
874 // allocate buffers with aligned pitch to be able to use cl_khr_image2d_from_buffer extension
875 // This is the required pitch alignment in pixels
876 int pitchAlign = (int)ocl::Device::getDefault().imagePitchAlignment();
879 prevPyr[0] = UMat(prevImg.rows,(prevImg.cols+pitchAlign-1)&(-pitchAlign),CV_32FC1).colRange(0,prevImg.cols);
880 nextPyr[0] = UMat(nextImg.rows,(nextImg.cols+pitchAlign-1)&(-pitchAlign),CV_32FC1).colRange(0,nextImg.cols);
881 for (int level = 1; level <= maxLevel; ++level)
884 // allocate buffers with aligned pitch to be able to use image on buffer extension
885 cols = (prevPyr[level - 1].cols+1)/2;
886 rows = (prevPyr[level - 1].rows+1)/2;
887 prevPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),prevPyr[level-1].type()).colRange(0,cols);
888 cols = (nextPyr[level - 1].cols+1)/2;
889 rows = (nextPyr[level - 1].rows+1)/2;
890 nextPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),nextPyr[level-1].type()).colRange(0,cols);
894 prevImg.convertTo(prevPyr[0], CV_32F);
895 nextImg.convertTo(nextPyr[0], CV_32F);
897 for (int level = 1; level <= maxLevel; ++level)
899 pyrDown(prevPyr[level - 1], prevPyr[level]);
900 pyrDown(nextPyr[level - 1], nextPyr[level]);
903 // dI/dx ~ Ix, dI/dy ~ Iy
904 for (int level = maxLevel; level >= 0; level--)
906 if (!lkSparse_run(prevPyr[level], nextPyr[level], prevPts,
907 nextPts, status, err,
908 prevPts.cols, level))
917 TermCriteria criteria;
919 double minEigThreshold;
932 if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
934 waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
942 if (winSize.width > 32 && winSize.width > 2 * winSize.height)
953 patch.x = (winSize.width + block.x - 1) / block.x;
954 patch.y = (winSize.height + block.y - 1) / block.y;
956 block.z = patch.z = 1;
959 bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err,
960 int ptcount, int level)
962 size_t localThreads[3] = { 8, 8};
963 size_t globalThreads[3] = { 8 * (size_t)ptcount, 8};
964 char calcErr = (0 == level) ? 1 : 0;
966 int wsx = 1, wsy = 1;
967 if(winSize.width < 16)
969 if(winSize.height < 16)
971 cv::String build_options;
973 build_options = " -D CPU";
975 build_options = cv::format("-D WAVE_SIZE=%d -D WSX=%d -D WSY=%d",
979 if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
982 CV_Assert(I.depth() == CV_32F && J.depth() == CV_32F);
983 ocl::Image2D imageI(I, false, ocl::Image2D::canCreateAlias(I));
984 ocl::Image2D imageJ(J, false, ocl::Image2D::canCreateAlias(J));
987 idxArg = kernel.set(idxArg, imageI); //image2d_t I
988 idxArg = kernel.set(idxArg, imageJ); //image2d_t J
989 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts
990 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts
991 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status
992 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err
993 idxArg = kernel.set(idxArg, (int)level); // const int level
994 idxArg = kernel.set(idxArg, (int)I.rows); // const int rows
995 idxArg = kernel.set(idxArg, (int)I.cols); // const int cols
996 idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X
997 idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y
998 idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x
999 idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
1000 idxArg = kernel.set(idxArg, (int)iters); // int c_iters
1001 idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
1002 return kernel.run(2, globalThreads, localThreads, true); // sync=true because ocl::Image2D lifetime is not handled well for temp UMat
1005 inline static bool isDeviceCPU()
1007 return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type());
1011 bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg,
1012 InputArray _prevPts, InputOutputArray _nextPts,
1013 OutputArray _status, OutputArray _err)
1015 if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags))
1017 if (!cv::ocl::Device::getDefault().imageSupport())
1019 if (_nextImg.size() != _prevImg.size())
1021 int typePrev = _prevImg.type();
1022 int typeNext = _nextImg.type();
1023 if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext)))
1025 if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext)))
1028 if (_prevPts.empty() || _prevPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
1030 if ((1 != _prevPts.size().height) && (1 != _prevPts.size().width))
1032 size_t npoints = _prevPts.total();
1035 if (_nextPts.empty() || _nextPts.type() != CV_32FC2 || (!_prevPts.isContinuous()))
1037 if ((1 != _nextPts.size().height) && (1 != _nextPts.size().width))
1039 if (_nextPts.total() != npoints)
1044 _nextPts.create(_prevPts.size(), _prevPts.type());
1053 _err.create((int)npoints, 1, CV_32FC1);
1054 umatErr = _err.getUMat();
1057 umatErr.create((int)npoints, 1, CV_32FC1);
1059 _status.create((int)npoints, 1, CV_8UC1);
1060 UMat umatNextPts = _nextPts.getUMat();
1061 UMat umatStatus = _status.getUMat();
1062 return sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr);
1067 bool openvx_pyrlk(InputArray _prevImg, InputArray _nextImg, InputArray _prevPts, InputOutputArray _nextPts,
1068 OutputArray _status, OutputArray _err)
1070 using namespace ivx;
1072 // Pyramids as inputs are not acceptable because there's no (direct or simple) way
1073 // to build vx_pyramid on user data
1074 if(_prevImg.kind() != _InputArray::MAT || _nextImg.kind() != _InputArray::MAT)
1077 Mat prevImgMat = _prevImg.getMat(), nextImgMat = _nextImg.getMat();
1079 if(prevImgMat.type() != CV_8UC1 || nextImgMat.type() != CV_8UC1)
1082 if (ovx::skipSmallImages<VX_KERNEL_OPTICAL_FLOW_PYR_LK>(prevImgMat.cols, prevImgMat.rows))
1085 CV_Assert(prevImgMat.size() == nextImgMat.size());
1086 Mat prevPtsMat = _prevPts.getMat();
1087 int checkPrev = prevPtsMat.checkVector(2, CV_32F, false);
1088 CV_Assert( checkPrev >= 0 );
1089 size_t npoints = checkPrev;
1091 if( !(flags & OPTFLOW_USE_INITIAL_FLOW) )
1092 _nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true);
1093 Mat nextPtsMat = _nextPts.getMat();
1094 CV_Assert( nextPtsMat.checkVector(2, CV_32F, false) == (int)npoints );
1096 _status.create((int)npoints, 1, CV_8U, -1, true);
1097 Mat statusMat = _status.getMat();
1098 uchar* status = statusMat.ptr();
1099 for(size_t i = 0; i < npoints; i++ )
1102 // OpenVX doesn't return detection errors
1110 Context context = ovx::getOpenVXContext();
1112 if(context.vendorID() == VX_ID_KHRONOS)
1114 // PyrLK in OVX 1.0.1 performs vxCommitImagePatch incorrecty and crashes
1115 if(VX_VERSION == VX_VERSION_1_0)
1117 // Implementation ignores border mode
1118 // So check that minimal size of image in pyramid is big enough
1119 int width = prevImgMat.cols, height = prevImgMat.rows;
1120 for(int i = 0; i < maxLevel+1; i++)
1122 if(width < winSize.width + 1 || height < winSize.height + 1)
1126 width /= 2; height /= 2;
1131 Image prevImg = Image::createFromHandle(context, Image::matTypeToFormat(prevImgMat.type()),
1132 Image::createAddressing(prevImgMat), (void*)prevImgMat.data);
1133 Image nextImg = Image::createFromHandle(context, Image::matTypeToFormat(nextImgMat.type()),
1134 Image::createAddressing(nextImgMat), (void*)nextImgMat.data);
1136 Graph graph = Graph::create(context);
1138 Pyramid prevPyr = Pyramid::createVirtual(graph, (vx_size)maxLevel+1, VX_SCALE_PYRAMID_HALF,
1139 prevImg.width(), prevImg.height(), prevImg.format());
1140 Pyramid nextPyr = Pyramid::createVirtual(graph, (vx_size)maxLevel+1, VX_SCALE_PYRAMID_HALF,
1141 nextImg.width(), nextImg.height(), nextImg.format());
1143 ivx::Node::create(graph, VX_KERNEL_GAUSSIAN_PYRAMID, prevImg, prevPyr);
1144 ivx::Node::create(graph, VX_KERNEL_GAUSSIAN_PYRAMID, nextImg, nextPyr);
1146 Array prevPts = Array::create(context, VX_TYPE_KEYPOINT, npoints);
1147 Array estimatedPts = Array::create(context, VX_TYPE_KEYPOINT, npoints);
1148 Array nextPts = Array::create(context, VX_TYPE_KEYPOINT, npoints);
1150 std::vector<vx_keypoint_t> vxPrevPts(npoints), vxEstPts(npoints), vxNextPts(npoints);
1151 for(size_t i = 0; i < npoints; i++)
1153 vx_keypoint_t& prevPt = vxPrevPts[i]; vx_keypoint_t& estPt = vxEstPts[i];
1154 prevPt.x = prevPtsMat.at<Point2f>(i).x; prevPt.y = prevPtsMat.at<Point2f>(i).y;
1155 estPt.x = nextPtsMat.at<Point2f>(i).x; estPt.y = nextPtsMat.at<Point2f>(i).y;
1156 prevPt.tracking_status = estPt.tracking_status = vx_true_e;
1158 prevPts.addItems(vxPrevPts); estimatedPts.addItems(vxEstPts);
1160 if( (criteria.type & TermCriteria::COUNT) == 0 )
1161 criteria.maxCount = 30;
1163 criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100);
1164 if( (criteria.type & TermCriteria::EPS) == 0 )
1165 criteria.epsilon = 0.01;
1167 criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.);
1168 criteria.epsilon *= criteria.epsilon;
1170 vx_enum termEnum = (criteria.type == TermCriteria::COUNT) ? VX_TERM_CRITERIA_ITERATIONS :
1171 (criteria.type == TermCriteria::EPS) ? VX_TERM_CRITERIA_EPSILON :
1172 VX_TERM_CRITERIA_BOTH;
1174 //minEigThreshold is fixed to 0.0001f
1175 ivx::Scalar termination = ivx::Scalar::create<VX_TYPE_ENUM>(context, termEnum);
1176 ivx::Scalar epsilon = ivx::Scalar::create<VX_TYPE_FLOAT32>(context, criteria.epsilon);
1177 ivx::Scalar numIterations = ivx::Scalar::create<VX_TYPE_UINT32>(context, criteria.maxCount);
1178 ivx::Scalar useInitial = ivx::Scalar::create<VX_TYPE_BOOL>(context, (vx_bool)(flags & OPTFLOW_USE_INITIAL_FLOW));
1179 //assume winSize is square
1180 ivx::Scalar windowSize = ivx::Scalar::create<VX_TYPE_SIZE>(context, (vx_size)winSize.width);
1182 ivx::Node::create(graph, VX_KERNEL_OPTICAL_FLOW_PYR_LK, prevPyr, nextPyr, prevPts, estimatedPts,
1183 nextPts, termination, epsilon, numIterations, useInitial, windowSize);
1188 nextPts.copyTo(vxNextPts);
1189 for(size_t i = 0; i < npoints; i++)
1191 vx_keypoint_t kp = vxNextPts[i];
1192 nextPtsMat.at<Point2f>(i) = Point2f(kp.x, kp.y);
1193 statusMat.at<uchar>(i) = (bool)kp.tracking_status;
1196 #ifdef VX_VERSION_1_1
1197 //we should take user memory back before release
1198 //(it's not done automatically according to standard)
1199 prevImg.swapHandle(); nextImg.swapHandle();
1202 catch (RuntimeError & e)
1204 VX_DbgThrow(e.what());
1206 catch (WrapperError & e)
1208 VX_DbgThrow(e.what());
1218 void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg,
1219 InputArray _prevPts, InputOutputArray _nextPts,
1220 OutputArray _status, OutputArray _err)
1222 CV_INSTRUMENT_REGION()
1224 CV_OCL_RUN(ocl::isOpenCLActivated() &&
1225 (_prevImg.isUMat() || _nextImg.isUMat()) &&
1226 ocl::Image2D::isFormatSupported(CV_32F, 1, false),
1227 ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err))
1229 // Disabled due to bad accuracy
1231 openvx_pyrlk(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err))
1233 Mat prevPtsMat = _prevPts.getMat();
1234 const int derivDepth = DataType<cv::detail::deriv_type>::depth;
1236 CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
1238 int level=0, i, npoints;
1239 CV_Assert( (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 );
1249 if( !(flags & OPTFLOW_USE_INITIAL_FLOW) )
1250 _nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true);
1252 Mat nextPtsMat = _nextPts.getMat();
1253 CV_Assert( nextPtsMat.checkVector(2, CV_32F, true) == npoints );
1255 const Point2f* prevPts = prevPtsMat.ptr<Point2f>();
1256 Point2f* nextPts = nextPtsMat.ptr<Point2f>();
1258 _status.create((int)npoints, 1, CV_8U, -1, true);
1259 Mat statusMat = _status.getMat(), errMat;
1260 CV_Assert( statusMat.isContinuous() );
1261 uchar* status = statusMat.ptr();
1264 for( i = 0; i < npoints; i++ )
1269 _err.create((int)npoints, 1, CV_32F, -1, true);
1270 errMat = _err.getMat();
1271 CV_Assert( errMat.isContinuous() );
1272 err = errMat.ptr<float>();
1275 std::vector<Mat> prevPyr, nextPyr;
1281 if(_prevImg.kind() == _InputArray::STD_VECTOR_MAT)
1283 _prevImg.getMatVector(prevPyr);
1285 levels1 = int(prevPyr.size()) - 1;
1286 CV_Assert(levels1 >= 0);
1288 if (levels1 % 2 == 1 && prevPyr[0].channels() * 2 == prevPyr[1].channels() && prevPyr[1].depth() == derivDepth)
1294 // ensure that pyramid has reqired padding
1299 prevPyr[lvlStep1].locateROI(fullSize, ofs);
1300 CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
1301 && ofs.x + prevPyr[lvlStep1].cols + winSize.width <= fullSize.width
1302 && ofs.y + prevPyr[lvlStep1].rows + winSize.height <= fullSize.height);
1305 if(levels1 < maxLevel)
1309 if(_nextImg.kind() == _InputArray::STD_VECTOR_MAT)
1311 _nextImg.getMatVector(nextPyr);
1313 levels2 = int(nextPyr.size()) - 1;
1314 CV_Assert(levels2 >= 0);
1316 if (levels2 % 2 == 1 && nextPyr[0].channels() * 2 == nextPyr[1].channels() && nextPyr[1].depth() == derivDepth)
1322 // ensure that pyramid has reqired padding
1327 nextPyr[lvlStep2].locateROI(fullSize, ofs);
1328 CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
1329 && ofs.x + nextPyr[lvlStep2].cols + winSize.width <= fullSize.width
1330 && ofs.y + nextPyr[lvlStep2].rows + winSize.height <= fullSize.height);
1333 if(levels2 < maxLevel)
1338 maxLevel = buildOpticalFlowPyramid(_prevImg, prevPyr, winSize, maxLevel, false);
1341 maxLevel = buildOpticalFlowPyramid(_nextImg, nextPyr, winSize, maxLevel, false);
1343 if( (criteria.type & TermCriteria::COUNT) == 0 )
1344 criteria.maxCount = 30;
1346 criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100);
1347 if( (criteria.type & TermCriteria::EPS) == 0 )
1348 criteria.epsilon = 0.01;
1350 criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.);
1351 criteria.epsilon *= criteria.epsilon;
1353 // dI/dx ~ Ix, dI/dy ~ Iy
1356 derivIBuf.create(prevPyr[0].rows + winSize.height*2, prevPyr[0].cols + winSize.width*2, CV_MAKETYPE(derivDepth, prevPyr[0].channels() * 2));
1358 for( level = maxLevel; level >= 0; level-- )
1363 Size imgSize = prevPyr[level * lvlStep1].size();
1364 Mat _derivI( imgSize.height + winSize.height*2,
1365 imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.ptr() );
1366 derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
1367 calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
1368 copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
1371 derivI = prevPyr[level * lvlStep1 + 1];
1373 CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
1374 CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());
1376 typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
1377 parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
1378 nextPyr[level * lvlStep2], prevPts, nextPts,
1380 winSize, criteria, level, maxLevel,
1381 flags, (float)minEigThreshold));
1387 cv::Ptr<cv::SparsePyrLKOpticalFlow> cv::SparsePyrLKOpticalFlow::create(Size winSize, int maxLevel, TermCriteria crit, int flags, double minEigThreshold){
1388 return makePtr<SparsePyrLKOpticalFlowImpl>(winSize,maxLevel,crit,flags,minEigThreshold);
1390 void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
1391 InputArray _prevPts, InputOutputArray _nextPts,
1392 OutputArray _status, OutputArray _err,
1393 Size winSize, int maxLevel,
1394 TermCriteria criteria,
1395 int flags, double minEigThreshold )
1397 Ptr<cv::SparsePyrLKOpticalFlow> optflow = cv::SparsePyrLKOpticalFlow::create(winSize,maxLevel,criteria,flags,minEigThreshold);
1398 optflow->calc(_prevImg,_nextImg,_prevPts,_nextPts,_status,_err);
1405 getRTMatrix( const Point2f* a, const Point2f* b,
1406 int count, Mat& M, bool fullAffine )
1408 CV_Assert( M.isContinuous() );
1412 double sa[6][6]={{0.}}, sb[6]={0.};
1413 Mat A( 6, 6, CV_64F, &sa[0][0] ), B( 6, 1, CV_64F, sb );
1414 Mat MM = M.reshape(1, 6);
1416 for( int i = 0; i < count; i++ )
1418 sa[0][0] += a[i].x*a[i].x;
1419 sa[0][1] += a[i].y*a[i].x;
1422 sa[1][1] += a[i].y*a[i].y;
1425 sb[0] += a[i].x*b[i].x;
1426 sb[1] += a[i].y*b[i].x;
1428 sb[3] += a[i].x*b[i].y;
1429 sb[4] += a[i].y*b[i].y;
1433 sa[3][4] = sa[4][3] = sa[1][0] = sa[0][1];
1434 sa[3][5] = sa[5][3] = sa[2][0] = sa[0][2];
1435 sa[4][5] = sa[5][4] = sa[2][1] = sa[1][2];
1437 sa[3][3] = sa[0][0];
1438 sa[4][4] = sa[1][1];
1439 sa[5][5] = sa[2][2] = count;
1441 solve( A, B, MM, DECOMP_EIG );
1445 double sa[4][4]={{0.}}, sb[4]={0.}, m[4] = {0};
1446 Mat A( 4, 4, CV_64F, sa ), B( 4, 1, CV_64F, sb );
1447 Mat MM( 4, 1, CV_64F, m );
1449 for( int i = 0; i < count; i++ )
1451 sa[0][0] += a[i].x*a[i].x + a[i].y*a[i].y;
1455 sb[0] += a[i].x*b[i].x + a[i].y*b[i].y;
1456 sb[1] += a[i].x*b[i].y - a[i].y*b[i].x;
1461 sa[1][1] = sa[0][0];
1462 sa[2][1] = sa[1][2] = -sa[0][3];
1463 sa[3][1] = sa[1][3] = sa[2][0] = sa[0][2];
1464 sa[2][2] = sa[3][3] = count;
1465 sa[3][0] = sa[0][3];
1467 solve( A, B, MM, DECOMP_EIG );
1469 double* om = M.ptr<double>();
1470 om[0] = om[4] = m[0];
1480 cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullAffine )
1482 CV_INSTRUMENT_REGION()
1484 Mat M(2, 3, CV_64F), A = src1.getMat(), B = src2.getMat();
1486 const int COUNT = 15;
1487 const int WIDTH = 160, HEIGHT = 120;
1488 const int RANSAC_MAX_ITERS = 500;
1489 const int RANSAC_SIZE0 = 3;
1490 const double RANSAC_GOOD_RATIO = 0.5;
1492 std::vector<Point2f> pA, pB;
1493 std::vector<int> good_idx;
1494 std::vector<uchar> status;
1499 RNG rng((uint64)-1);
1502 if( A.size() != B.size() )
1503 CV_Error( Error::StsUnmatchedSizes, "Both input images must have the same size" );
1505 if( A.type() != B.type() )
1506 CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" );
1508 int count = A.checkVector(2);
1512 A.reshape(2, count).convertTo(pA, CV_32F);
1513 B.reshape(2, count).convertTo(pB, CV_32F);
1515 else if( A.depth() == CV_8U )
1517 int cn = A.channels();
1518 CV_Assert( cn == 1 || cn == 3 || cn == 4 );
1519 Size sz0 = A.size();
1520 Size sz1(WIDTH, HEIGHT);
1522 scale = std::max(1., std::max( (double)sz1.width/sz0.width, (double)sz1.height/sz0.height ));
1524 sz1.width = cvRound( sz0.width * scale );
1525 sz1.height = cvRound( sz0.height * scale );
1527 bool equalSizes = sz1.width == sz0.width && sz1.height == sz0.height;
1529 if( !equalSizes || cn != 1 )
1536 cvtColor(A, gray, COLOR_BGR2GRAY);
1537 resize(gray, sA, sz1, 0., 0., INTER_AREA);
1538 cvtColor(B, gray, COLOR_BGR2GRAY);
1539 resize(gray, sB, sz1, 0., 0., INTER_AREA);
1543 resize(A, sA, sz1, 0., 0., INTER_AREA);
1544 resize(B, sB, sz1, 0., 0., INTER_AREA);
1551 int count_y = COUNT;
1552 int count_x = cvRound((double)COUNT*sz1.width/sz1.height);
1553 count = count_x * count_y;
1557 status.resize(count);
1559 for( i = 0, k = 0; i < count_y; i++ )
1560 for( j = 0; j < count_x; j++, k++ )
1562 pA[k].x = (j+0.5f)*sz1.width/count_x;
1563 pA[k].y = (i+0.5f)*sz1.height/count_y;
1566 // find the corresponding points in B
1567 calcOpticalFlowPyrLK(A, B, pA, pB, status, noArray(), Size(21, 21), 3,
1568 TermCriteria(TermCriteria::MAX_ITER,40,0.1));
1570 // repack the remained points
1571 for( i = 0, k = 0; i < count; i++ )
1586 CV_Error( Error::StsUnsupportedFormat, "Both input images must have either 8uC1 or 8uC3 type" );
1588 good_idx.resize(count);
1590 if( count < RANSAC_SIZE0 )
1593 Rect brect = boundingRect(pB);
1596 // 1. find the consensus
1597 for( k = 0; k < RANSAC_MAX_ITERS; k++ )
1599 int idx[RANSAC_SIZE0];
1600 Point2f a[RANSAC_SIZE0];
1601 Point2f b[RANSAC_SIZE0];
1603 // choose random 3 non-coplanar points from A & B
1604 for( i = 0; i < RANSAC_SIZE0; i++ )
1606 for( k1 = 0; k1 < RANSAC_MAX_ITERS; k1++ )
1608 idx[i] = rng.uniform(0, count);
1610 for( j = 0; j < i; j++ )
1612 if( idx[j] == idx[i] )
1614 // check that the points are not very close one each other
1615 if( fabs(pA[idx[i]].x - pA[idx[j]].x) +
1616 fabs(pA[idx[i]].y - pA[idx[j]].y) < FLT_EPSILON )
1618 if( fabs(pB[idx[i]].x - pB[idx[j]].x) +
1619 fabs(pB[idx[i]].y - pB[idx[j]].y) < FLT_EPSILON )
1626 if( i+1 == RANSAC_SIZE0 )
1628 // additional check for non-complanar vectors
1637 double dax1 = a[1].x - a[0].x, day1 = a[1].y - a[0].y;
1638 double dax2 = a[2].x - a[0].x, day2 = a[2].y - a[0].y;
1639 double dbx1 = b[1].x - b[0].x, dby1 = b[1].y - b[0].y;
1640 double dbx2 = b[2].x - b[0].x, dby2 = b[2].y - b[0].y;
1641 const double eps = 0.01;
1643 if( fabs(dax1*day2 - day1*dax2) < eps*std::sqrt(dax1*dax1+day1*day1)*std::sqrt(dax2*dax2+day2*day2) ||
1644 fabs(dbx1*dby2 - dby1*dbx2) < eps*std::sqrt(dbx1*dbx1+dby1*dby1)*std::sqrt(dbx2*dbx2+dby2*dby2) )
1650 if( k1 >= RANSAC_MAX_ITERS )
1654 if( i < RANSAC_SIZE0 )
1657 // estimate the transformation using 3 points
1658 getRTMatrix( a, b, 3, M, fullAffine );
1660 const double* m = M.ptr<double>();
1661 for( i = 0, good_count = 0; i < count; i++ )
1663 if( std::abs( m[0]*pA[i].x + m[1]*pA[i].y + m[2] - pB[i].x ) +
1664 std::abs( m[3]*pA[i].x + m[4]*pA[i].y + m[5] - pB[i].y ) < std::max(brect.width,brect.height)*0.05 )
1665 good_idx[good_count++] = i;
1668 if( good_count >= count*RANSAC_GOOD_RATIO )
1672 if( k >= RANSAC_MAX_ITERS )
1675 if( good_count < count )
1677 for( i = 0; i < good_count; i++ )
1685 getRTMatrix( &pA[0], &pB[0], good_count, M, fullAffine );
1686 M.at<double>(0, 2) /= scale;
1687 M.at<double>(1, 2) /= scale;