oclMat vPyr_[2];
bool isDeviceArch11_;
};
+
+ class CV_EXPORTS FarnebackOpticalFlow
+ {
+ public:
+ FarnebackOpticalFlow()
+ {
+ numLevels = 5;
+ pyrScale = 0.5;
+ fastPyramids = false;
+ winSize = 13;
+ numIters = 10;
+ polyN = 5;
+ polySigma = 1.1;
+ flags = 0;
+ }
+
+ int numLevels;
+ double pyrScale;
+ bool fastPyramids;
+ int winSize;
+ int numIters;
+ int polyN;
+ double polySigma;
+ int flags;
+
+ void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
+
+ void releaseMemory()
+ {
+ frames_[0].release();
+ frames_[1].release();
+ pyrLevel_[0].release();
+ pyrLevel_[1].release();
+ M_.release();
+ bufM_.release();
+ R_[0].release();
+ R_[1].release();
+ blurredFrame_[0].release();
+ blurredFrame_[1].release();
+ pyramid0_.clear();
+ pyramid1_.clear();
+ }
+
+ private:
+ void prepareGaussian(
+ int n, double sigma, float *g, float *xg, float *xxg,
+ double &ig11, double &ig03, double &ig33, double &ig55);
+
+ void setPolynomialExpansionConsts(int n, double sigma);
+
+ void updateFlow_boxFilter(
+ const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
+ oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
+
+ void updateFlow_gaussianBlur(
+ const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
+ oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices);
+
+ oclMat frames_[2];
+ oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
+ std::vector<oclMat> pyramid0_, pyramid1_;
+ };
+
//////////////// build warping maps ////////////////////
//! builds plane warping maps
CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y);
TestSystem::instance().ExceptedMatSimilar(gold[0], flowx, 3e-3);
TestSystem::instance().ExceptedMatSimilar(gold[1], flowy, 3e-3);
-}
\ No newline at end of file
+}
+
+///////////// FarnebackOpticalFlow ////////////////////////
+PERFTEST(FarnebackOpticalFlow)
+{
+ cv::Mat frame0 = imread("rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame0.empty());
+
+ cv::Mat frame1 = imread("rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame1.empty());
+
+ cv::ocl::oclMat d_frame0(frame0), d_frame1(frame1);
+
+ int polyNs[2] = { 5, 7 };
+ double polySigmas[2] = { 1.1, 1.5 };
+ int farneFlags[2] = { 0, cv::OPTFLOW_FARNEBACK_GAUSSIAN };
+ bool UseInitFlows[2] = { false, true };
+ double pyrScale = 0.5;
+
+ string farneFlagStrs[2] = { "BoxFilter", "GaussianBlur" };
+ string useInitFlowStrs[2] = { "", "UseInitFlow" };
+
+ for ( int i = 0; i < 2; ++i)
+ {
+ int polyN = polyNs[i];
+ double polySigma = polySigmas[i];
+
+ for ( int j = 0; j < 2; ++j)
+ {
+ int flags = farneFlags[j];
+
+ for ( int k = 0; k < 2; ++k)
+ {
+ bool useInitFlow = UseInitFlows[k];
+ SUBTEST << "polyN(" << polyN << "); " << farneFlagStrs[j] << "; " << useInitFlowStrs[k];
+
+ cv::ocl::FarnebackOpticalFlow farn;
+ farn.pyrScale = pyrScale;
+ farn.polyN = polyN;
+ farn.polySigma = polySigma;
+ farn.flags = flags;
+
+ cv::ocl::oclMat d_flowx, d_flowy;
+ cv::Mat flow, flowBuf, flowxBuf, flowyBuf;
+
+ WARMUP_ON;
+ farn(d_frame0, d_frame1, d_flowx, d_flowy);
+
+ if (useInitFlow)
+ {
+ cv::Mat flowxy[] = {cv::Mat(d_flowx), cv::Mat(d_flowy)};
+ cv::merge(flowxy, 2, flow);
+ flow.copyTo(flowBuf);
+ flowxy[0].copyTo(flowxBuf);
+ flowxy[1].copyTo(flowyBuf);
+
+ farn.flags |= cv::OPTFLOW_USE_INITIAL_FLOW;
+ farn(d_frame0, d_frame1, d_flowx, d_flowy);
+ }
+ WARMUP_OFF;
+
+ cv::calcOpticalFlowFarneback(
+ frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
+ farn.numIters, farn.polyN, farn.polySigma, farn.flags);
+
+ std::vector<cv::Mat> flowxy;
+ cv::split(flow, flowxy);
+
+ double diff0 = 0.0;
+ TestSystem::instance().setAccurate(ExceptedMatSimilar(flowxy[0], cv::Mat(d_flowx), 0.1, diff0));
+ TestSystem::instance().setDiff(diff0);
+ double diff1 = 0.0;
+ TestSystem::instance().setAccurate(ExceptedMatSimilar(flowxy[1], cv::Mat(d_flowy), 0.1, diff1));
+ TestSystem::instance().setDiff(diff1);
+
+ if (useInitFlow)
+ {
+ cv::Mat flowx, flowy;
+ farn.flags = (flags | cv::OPTFLOW_USE_INITIAL_FLOW);
+
+ CPU_ON;
+ cv::calcOpticalFlowFarneback(
+ frame0, frame1, flowBuf, farn.pyrScale, farn.numLevels, farn.winSize,
+ farn.numIters, farn.polyN, farn.polySigma, farn.flags);
+ CPU_OFF;
+
+ GPU_ON;
+ farn(d_frame0, d_frame1, d_flowx, d_flowy);
+ GPU_OFF;
+
+ GPU_FULL_ON;
+ d_frame0.upload(frame0);
+ d_frame1.upload(frame1);
+ d_flowx.upload(flowxBuf);
+ d_flowy.upload(flowyBuf);
+ farn(d_frame0, d_frame1, d_flowx, d_flowy);
+ d_flowx.download(flowx);
+ d_flowy.download(flowy);
+ GPU_FULL_OFF;
+ }
+ else
+ {
+ cv::Mat flow, flowx, flowy;
+ cv::ocl::oclMat d_flowx, d_flowy;
+
+ farn.flags = flags;
+
+ CPU_ON;
+ cv::calcOpticalFlowFarneback(
+ frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
+ farn.numIters, farn.polyN, farn.polySigma, farn.flags);
+ CPU_OFF;
+
+ GPU_ON;
+ farn(d_frame0, d_frame1, d_flowx, d_flowy);
+ GPU_OFF;
+
+ GPU_FULL_ON;
+ d_frame0.upload(frame0);
+ d_frame1.upload(frame1);
+ farn(d_frame0, d_frame1, d_flowx, d_flowy);
+ d_flowx.download(flowx);
+ d_flowy.download(flowy);
+ GPU_FULL_OFF;
+ }
+ }
+ }
+ }
+}
+
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Sen Liu, swjtuls1987@126.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#define tx get_local_id(0)
+#define ty get_local_id(1)
+#define bx get_group_id(0)
+#define bdx get_local_size(0)
+
+#define BORDER_SIZE 5
+#define MAX_KSIZE_HALF 100
+
+#ifndef polyN
+#define polyN 5
+#endif
+
+__kernel void polynomialExpansion(__global float * dst,
+ __global __const float * src,
+ __global __const float * c_g,
+ __global __const float * c_xg,
+ __global __const float * c_xxg,
+ __local float * smem,
+ const float4 ig,
+ const int height, const int width,
+ int dstStep, int srcStep)
+{
+ const int y = get_global_id(1);
+ const int x = bx * (bdx - 2*polyN) + tx - polyN;
+
+ dstStep /= sizeof(*dst);
+ srcStep /= sizeof(*src);
+
+ int xWarped;
+ __local float *row = smem + tx;
+
+ if (y < height && y >= 0)
+ {
+ xWarped = min(max(x, 0), width - 1);
+
+ row[0] = src[mad24(y, srcStep, xWarped)] * c_g[0];
+ row[bdx] = 0.f;
+ row[2*bdx] = 0.f;
+
+#pragma unroll
+ for (int k = 1; k <= polyN; ++k)
+ {
+ float t0 = src[mad24(max(y - k, 0), srcStep, xWarped)];
+ float t1 = src[mad24(min(y + k, height - 1), srcStep, xWarped)];
+
+ row[0] += c_g[k] * (t0 + t1);
+ row[bdx] += c_xg[k] * (t1 - t0);
+ row[2*bdx] += c_xxg[k] * (t0 + t1);
+ }
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (y < height && y >= 0 && tx >= polyN && tx + polyN < bdx && x < width)
+ {
+ float b1 = c_g[0] * row[0];
+ float b3 = c_g[0] * row[bdx];
+ float b5 = c_g[0] * row[2*bdx];
+ float b2 = 0, b4 = 0, b6 = 0;
+
+#pragma unroll
+ for (int k = 1; k <= polyN; ++k)
+ {
+ b1 += (row[k] + row[-k]) * c_g[k];
+ b4 += (row[k] + row[-k]) * c_xxg[k];
+ b2 += (row[k] - row[-k]) * c_xg[k];
+ b3 += (row[k + bdx] + row[-k + bdx]) * c_g[k];
+ b6 += (row[k + bdx] - row[-k + bdx]) * c_xg[k];
+ b5 += (row[k + 2*bdx] + row[-k + 2*bdx]) * c_g[k];
+ }
+
+ dst[mad24(y, dstStep, xWarped)] = b3*ig.s0;
+ dst[mad24(height + y, dstStep, xWarped)] = b2*ig.s0;
+ dst[mad24(2*height + y, dstStep, xWarped)] = b1*ig.s1 + b5*ig.s2;
+ dst[mad24(3*height + y, dstStep, xWarped)] = b1*ig.s1 + b4*ig.s2;
+ dst[mad24(4*height + y, dstStep, xWarped)] = b6*ig.s3;
+ }
+}
+
+inline int idx_row_low(const int y, const int last_row)
+{
+ return abs(y) % (last_row + 1);
+}
+
+inline int idx_row_high(const int y, const int last_row)
+{
+ return abs(last_row - abs(last_row - y)) % (last_row + 1);
+}
+
+inline int idx_row(const int y, const int last_row)
+{
+ return idx_row_low(idx_row_high(y, last_row), last_row);
+}
+
+inline int idx_col_low(const int x, const int last_col)
+{
+ return abs(x) % (last_col + 1);
+}
+
+inline int idx_col_high(const int x, const int last_col)
+{
+ return abs(last_col - abs(last_col - x)) % (last_col + 1);
+}
+
+inline int idx_col(const int x, const int last_col)
+{
+ return idx_col_low(idx_col_high(x, last_col), last_col);
+}
+
+__kernel void gaussianBlur(__global float * dst,
+ __global const float * src,
+ __global const float * c_gKer,
+ __local float * smem,
+ const int height, const int width,
+ int dstStep, int srcStep,
+ const int ksizeHalf)
+{
+ const int y = get_global_id(1);
+ const int x = get_global_id(0);
+
+ dstStep /= sizeof(*dst);
+ srcStep /= sizeof(*src);
+
+ __local float *row = smem + ty * (bdx + 2*ksizeHalf);
+
+ if (y < height)
+ {
+ // Vertical pass
+ for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+ {
+ int xExt = (int)(bx * bdx) + i - ksizeHalf;
+ xExt = idx_col(xExt, width - 1);
+ row[i] = src[mad24(y, srcStep, xExt)] * c_gKer[0];
+ for (int j = 1; j <= ksizeHalf; ++j)
+ row[i] += (src[mad24(idx_row_low(y - j, height - 1), srcStep, xExt)]
+ + src[mad24(idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j];
+ }
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (y < height && y >= 0 && x < width && x >= 0)
+ {
+ // Horizontal pass
+ row += tx + ksizeHalf;
+ float res = row[0] * c_gKer[0];
+ for (int i = 1; i <= ksizeHalf; ++i)
+ res += (row[-i] + row[i]) * c_gKer[i];
+
+ dst[mad24(y, dstStep, x)] = res;
+ }
+}
+
+__constant float c_border[BORDER_SIZE + 1] = { 0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f };
+
+__kernel void updateMatrices(__global float * M,
+ __global const float * flowx, __global const float * flowy,
+ __global const float * R0, __global const float * R1,
+ const int height, const int width,
+ int mStep, int xStep, int yStep, int R0Step, int R1Step)
+{
+ const int y = get_global_id(1);
+ const int x = get_global_id(0);
+
+ mStep /= sizeof(*M);
+ xStep /= sizeof(*flowx);
+ yStep /= sizeof(*flowy);
+ R0Step /= sizeof(*R0);
+ R1Step /= sizeof(*R1);
+
+ if (y < height && y >= 0 && x < width && x >= 0)
+ {
+ float dx = flowx[mad24(y, xStep, x)];
+ float dy = flowy[mad24(y, yStep, x)];
+ float fx = x + dx;
+ float fy = y + dy;
+
+ int x1 = convert_int(floor(fx));
+ int y1 = convert_int(floor(fy));
+ fx -= x1; fy -= y1;
+
+ float r2, r3, r4, r5, r6;
+
+ if (x1 >= 0 && y1 >= 0 && x1 < width - 1 && y1 < height - 1)
+ {
+ float a00 = (1.f - fx) * (1.f - fy);
+ float a01 = fx * (1.f - fy);
+ float a10 = (1.f - fx) * fy;
+ float a11 = fx * fy;
+
+ r2 = a00 * R1[mad24(y1, R1Step, x1)] +
+ a01 * R1[mad24(y1, R1Step, x1 + 1)] +
+ a10 * R1[mad24(y1 + 1, R1Step, x1)] +
+ a11 * R1[mad24(y1 + 1, R1Step, x1 + 1)];
+
+ r3 = a00 * R1[mad24(height + y1, R1Step, x1)] +
+ a01 * R1[mad24(height + y1, R1Step, x1 + 1)] +
+ a10 * R1[mad24(height + y1 + 1, R1Step, x1)] +
+ a11 * R1[mad24(height + y1 + 1, R1Step, x1 + 1)];
+
+ r4 = a00 * R1[mad24(2*height + y1, R1Step, x1)] +
+ a01 * R1[mad24(2*height + y1, R1Step, x1 + 1)] +
+ a10 * R1[mad24(2*height + y1 + 1, R1Step, x1)] +
+ a11 * R1[mad24(2*height + y1 + 1, R1Step, x1 + 1)];
+
+ r5 = a00 * R1[mad24(3*height + y1, R1Step, x1)] +
+ a01 * R1[mad24(3*height + y1, R1Step, x1 + 1)] +
+ a10 * R1[mad24(3*height + y1 + 1, R1Step, x1)] +
+ a11 * R1[mad24(3*height + y1 + 1, R1Step, x1 + 1)];
+
+ r6 = a00 * R1[mad24(4*height + y1, R1Step, x1)] +
+ a01 * R1[mad24(4*height + y1, R1Step, x1 + 1)] +
+ a10 * R1[mad24(4*height + y1 + 1, R1Step, x1)] +
+ a11 * R1[mad24(4*height + y1 + 1, R1Step, x1 + 1)];
+
+ r4 = (R0[mad24(2*height + y, R0Step, x)] + r4) * 0.5f;
+ r5 = (R0[mad24(3*height + y, R0Step, x)] + r5) * 0.5f;
+ r6 = (R0[mad24(4*height + y, R0Step, x)] + r6) * 0.25f;
+ }
+ else
+ {
+ r2 = r3 = 0.f;
+ r4 = R0[mad24(2*height + y, R0Step, x)];
+ r5 = R0[mad24(3*height + y, R0Step, x)];
+ r6 = R0[mad24(4*height + y, R0Step, x)] * 0.5f;
+ }
+
+ r2 = (R0[mad24(y, R0Step, x)] - r2) * 0.5f;
+ r3 = (R0[mad24(height + y, R0Step, x)] - r3) * 0.5f;
+
+ r2 += r4*dy + r6*dx;
+ r3 += r6*dy + r5*dx;
+
+ float scale =
+ c_border[min(x, BORDER_SIZE)] *
+ c_border[min(y, BORDER_SIZE)] *
+ c_border[min(width - x - 1, BORDER_SIZE)] *
+ c_border[min(height - y - 1, BORDER_SIZE)];
+
+ r2 *= scale; r3 *= scale; r4 *= scale;
+ r5 *= scale; r6 *= scale;
+
+ M[mad24(y, mStep, x)] = r4*r4 + r6*r6;
+ M[mad24(height + y, mStep, x)] = (r4 + r5)*r6;
+ M[mad24(2*height + y, mStep, x)] = r5*r5 + r6*r6;
+ M[mad24(3*height + y, mStep, x)] = r4*r2 + r6*r3;
+ M[mad24(4*height + y, mStep, x)] = r6*r2 + r5*r3;
+ }
+}
+
+__kernel void boxFilter5(__global float * dst,
+ __global const float * src,
+ __local float * smem,
+ const int height, const int width,
+ int dstStep, int srcStep,
+ const int ksizeHalf)
+{
+ const int y = get_global_id(1);
+ const int x = get_global_id(0);
+
+ const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
+ const int smw = bdx + 2*ksizeHalf; // shared memory "width"
+ __local float *row = smem + 5 * ty * smw;
+
+ dstStep /= sizeof(*dst);
+ srcStep /= sizeof(*src);
+
+ if (y < height)
+ {
+ // Vertical pass
+ for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+ {
+ int xExt = (int)(bx * bdx) + i - ksizeHalf;
+ xExt = min(max(xExt, 0), width - 1);
+
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)];
+
+ for (int j = 1; j <= ksizeHalf; ++j)
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ row[k*smw + i] +=
+ src[mad24(k*height + max(y - j, 0), srcStep, xExt)] +
+ src[mad24(k*height + min(y + j, height - 1), srcStep, xExt)];
+ }
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (y < height && y >= 0 && x < width && x >= 0)
+ {
+ // Horizontal pass
+
+ row += tx + ksizeHalf;
+ float res[5];
+
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ res[k] = row[k*smw];
+
+ for (int i = 1; i <= ksizeHalf; ++i)
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ res[k] += row[k*smw - i] + row[k*smw + i];
+
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ dst[mad24(k*height + y, dstStep, x)] = res[k] * boxAreaInv;
+ }
+}
+
+__kernel void updateFlow(__global float4 * flowx, __global float4 * flowy,
+ __global const float4 * M,
+ const int height, const int width,
+ int xStep, int yStep, int mStep)
+{
+ const int y = get_global_id(1);
+ const int x = get_global_id(0);
+
+ xStep /= sizeof(*flowx);
+ yStep /= sizeof(*flowy);
+ mStep /= sizeof(*M);
+
+ if (y < height && y >= 0 && x < width && x >= 0)
+ {
+ float4 g11 = M[mad24(y, mStep, x)];
+ float4 g12 = M[mad24(height + y, mStep, x)];
+ float4 g22 = M[mad24(2*height + y, mStep, x)];
+ float4 h1 = M[mad24(3*height + y, mStep, x)];
+ float4 h2 = M[mad24(4*height + y, mStep, x)];
+
+ float4 detInv = (float4)(1.f) / (g11*g22 - g12*g12 + (float4)(1e-3f));
+
+ flowx[mad24(y, xStep, x)] = (g11*h2 - g12*h1) * detInv;
+ flowy[mad24(y, yStep, x)] = (g22*h1 - g12*h2) * detInv;
+ }
+}
+
+__kernel void gaussianBlur5(__global float * dst,
+ __global const float * src,
+ __global const float * c_gKer,
+ __local float * smem,
+ const int height, const int width,
+ int dstStep, int srcStep,
+ const int ksizeHalf)
+{
+ const int y = get_global_id(1);
+ const int x = get_global_id(0);
+
+ const int smw = bdx + 2*ksizeHalf; // shared memory "width"
+ __local volatile float *row = smem + 5 * ty * smw;
+
+ dstStep /= sizeof(*dst);
+ srcStep /= sizeof(*src);
+
+ if (y < height)
+ {
+ // Vertical pass
+ for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)
+ {
+ int xExt = (int)(bx * bdx) + i - ksizeHalf;
+ xExt = idx_col(xExt, width - 1);
+
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)] * c_gKer[0];
+
+ for (int j = 1; j <= ksizeHalf; ++j)
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ row[k*smw + i] +=
+ (src[mad24(k*height + idx_row_low(y - j, height - 1), srcStep, xExt)] +
+ src[mad24(k*height + idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j];
+ }
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (y < height && y >= 0 && x < width && x >= 0)
+ {
+ // Horizontal pass
+
+ row += tx + ksizeHalf;
+ float res[5];
+
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ res[k] = row[k*smw] * c_gKer[0];
+
+ for (int i = 1; i <= ksizeHalf; ++i)
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];
+
+ #pragma unroll
+ for (int k = 0; k < 5; ++k)
+ dst[mad24(k*height + y, dstStep, x)] = res[k];
+ }
+}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Sen Liu, swjtuls1987@126.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+#include "opencv2/video/tracking.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+#define MIN_SIZE 32
+
+namespace cv
+{
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *optical_flow_farneback;
+ }
+}
+
+namespace cv { namespace ocl { namespace optflow_farneback
+{
+ oclMat g;
+ oclMat xg;
+ oclMat xxg;
+ oclMat gKer;
+
+ float ig[4];
+
+ inline int divUp(int total, int grain)
+ {
+ return (total + grain - 1) / grain;
+ }
+
+ inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
+ {
+ cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
+ gKer.upload(t_gKer);
+ }
+
+ void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
+ {
+ string kernelName("gaussianBlur");
+ size_t localThreads[3] = { 256, 1, 1 };
+ size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 };
+ int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
+
+ CV_Assert(dst.size() == src.size());
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
+ args.push_back(std::make_pair(smem_size, (void *)NULL));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
+
+ openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+ globalThreads, localThreads, args, -1, -1);
+ }
+
+ void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst)
+ {
+ string kernelName("polynomialExpansion");
+ size_t localThreads[3] = { 256, 1, 1 };
+ size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 };
+ int smem_size = 3 * localThreads[0] * sizeof(float);
+
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data));
+ args.push_back(std::make_pair(smem_size, (void *)NULL));
+ args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+
+ char opt [128];
+ sprintf(opt, "-D polyN=%d", polyN);
+
+ openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+ globalThreads, localThreads, args, -1, -1, opt);
+ }
+
+ void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M)
+ {
+ string kernelName("updateMatrices");
+ size_t localThreads[3] = { 32, 8, 1 };
+ size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0],
+ divUp(flowx.rows, localThreads[1]) * localThreads[1],
+ 1 };
+
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step));
+
+ openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+ globalThreads, localThreads, args, -1, -1);
+ }
+
+ void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
+ {
+ string kernelName("boxFilter5");
+ int height = src.rows / 5;
+ size_t localThreads[3] = { 256, 1, 1 };
+ size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 };
+ int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
+
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ args.push_back(std::make_pair(smem_size, (void *)NULL));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
+
+ openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+ globalThreads, localThreads, args, -1, -1);
+ }
+
+ void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
+ {
+ string kernelName("updateFlow");
+ int cols = divUp(flowx.cols, 4);
+ size_t localThreads[3] = { 32, 8, 1 };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
+ divUp(flowx.rows, localThreads[1]) * localThreads[0],
+ 1 };
+
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step));
+
+ openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+ globalThreads, localThreads, args, -1, -1);
+ }
+
+ void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
+ {
+ string kernelName("gaussianBlur5");
+ int height = src.rows / 5;
+ int width = src.cols;
+ size_t localThreads[3] = { 256, 1, 1 };
+ size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 };
+ int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
+
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
+ args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
+ args.push_back(std::make_pair(smem_size, (void *)NULL));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&width));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
+ args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
+
+ openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName,
+ globalThreads, localThreads, args, -1, -1);
+ }
+}}} // namespace cv { namespace ocl { namespace optflow_farneback
+
+static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat)
+{
+ if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
+ return mat(Rect(0, 0, cols, rows));
+ return mat = oclMat(rows, cols, type);
+}
+
+void cv::ocl::FarnebackOpticalFlow::prepareGaussian(
+ int n, double sigma, float *g, float *xg, float *xxg,
+ double &ig11, double &ig03, double &ig33, double &ig55)
+{
+ double s = 0.;
+ for (int x = -n; x <= n; x++)
+ {
+ g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
+ s += g[x];
+ }
+
+ s = 1./s;
+ for (int x = -n; x <= n; x++)
+ {
+ g[x] = (float)(g[x]*s);
+ xg[x] = (float)(x*g[x]);
+ xxg[x] = (float)(x*x*g[x]);
+ }
+
+ Mat_<double> G(6, 6);
+ G.setTo(0);
+
+ for (int y = -n; y <= n; y++)
+ {
+ for (int x = -n; x <= n; x++)
+ {
+ G(0,0) += g[y]*g[x];
+ G(1,1) += g[y]*g[x]*x*x;
+ G(3,3) += g[y]*g[x]*x*x*x*x;
+ G(5,5) += g[y]*g[x]*x*x*y*y;
+ }
+ }
+
+ //G[0][0] = 1.;
+ G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
+ G(4,4) = G(3,3);
+ G(3,4) = G(4,3) = G(5,5);
+
+ // invG:
+ // [ x e e ]
+ // [ y ]
+ // [ y ]
+ // [ e z ]
+ // [ e z ]
+ // [ u ]
+ Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
+
+ ig11 = invG(1,1);
+ ig03 = invG(0,3);
+ ig33 = invG(3,3);
+ ig55 = invG(5,5);
+}
+
+void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
+{
+ vector<float> buf(n*6 + 3);
+ float* g = &buf[0] + n;
+ float* xg = g + n*2 + 1;
+ float* xxg = xg + n*2 + 1;
+
+ if (sigma < FLT_EPSILON)
+ sigma = n*0.3;
+
+ double ig11, ig03, ig33, ig55;
+ prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
+
+ cv::Mat t_g(1, n + 1, CV_32FC1, g);
+ cv::Mat t_xg(1, n + 1, CV_32FC1, xg);
+ cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg);
+
+ optflow_farneback::g.upload(t_g);
+ optflow_farneback::xg.upload(t_xg);
+ optflow_farneback::xxg.upload(t_xxg);
+
+ optflow_farneback::ig[0] = static_cast<float>(ig11);
+ optflow_farneback::ig[1] = static_cast<float>(ig03);
+ optflow_farneback::ig[2] = static_cast<float>(ig33);
+ optflow_farneback::ig[3] = static_cast<float>(ig55);
+}
+
+void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter(
+ const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy,
+ oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
+{
+ optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM);
+
+ swap(M, bufM);
+
+ finish();
+
+ optflow_farneback::updateFlowOcl(M, flowx, flowy);
+
+ if (updateMatrices)
+ optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
+}
+
+
+void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur(
+ const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy,
+ oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices)
+{
+ optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM);
+
+ swap(M, bufM);
+
+ optflow_farneback::updateFlowOcl(M, flowx, flowy);
+
+ if (updateMatrices)
+ optflow_farneback::updateMatricesOcl(flowx, flowy, R0, R1, M);
+}
+
+
+void cv::ocl::FarnebackOpticalFlow::operator ()(
+ const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
+{
+ CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
+ CV_Assert(frame0.size() == frame1.size());
+ CV_Assert(polyN == 5 || polyN == 7);
+ CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
+
+ Size size = frame0.size();
+ oclMat prevFlowX, prevFlowY, curFlowX, curFlowY;
+
+ flowx.create(size, CV_32F);
+ flowy.create(size, CV_32F);
+ oclMat flowx0 = flowx;
+ oclMat flowy0 = flowy;
+
+ // Crop unnecessary levels
+ double scale = 1;
+ int numLevelsCropped = 0;
+ for (; numLevelsCropped < numLevels; numLevelsCropped++)
+ {
+ scale *= pyrScale;
+ if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
+ break;
+ }
+
+ frame0.convertTo(frames_[0], CV_32F);
+ frame1.convertTo(frames_[1], CV_32F);
+
+ if (fastPyramids)
+ {
+ // Build Gaussian pyramids using pyrDown()
+ pyramid0_.resize(numLevelsCropped + 1);
+ pyramid1_.resize(numLevelsCropped + 1);
+ pyramid0_[0] = frames_[0];
+ pyramid1_[0] = frames_[1];
+ for (int i = 1; i <= numLevelsCropped; ++i)
+ {
+ pyrDown(pyramid0_[i - 1], pyramid0_[i]);
+ pyrDown(pyramid1_[i - 1], pyramid1_[i]);
+ }
+ }
+
+ setPolynomialExpansionConsts(polyN, polySigma);
+
+ for (int k = numLevelsCropped; k >= 0; k--)
+ {
+ scale = 1;
+ for (int i = 0; i < k; i++)
+ scale *= pyrScale;
+
+ double sigma = (1./scale - 1) * 0.5;
+ int smoothSize = cvRound(sigma*5) | 1;
+ smoothSize = std::max(smoothSize, 3);
+
+ int width = cvRound(size.width*scale);
+ int height = cvRound(size.height*scale);
+
+ if (fastPyramids)
+ {
+ width = pyramid0_[k].cols;
+ height = pyramid0_[k].rows;
+ }
+
+ if (k > 0)
+ {
+ curFlowX.create(height, width, CV_32F);
+ curFlowY.create(height, width, CV_32F);
+ }
+ else
+ {
+ curFlowX = flowx0;
+ curFlowY = flowy0;
+ }
+
+ if (!prevFlowX.data)
+ {
+ if (flags & cv::OPTFLOW_USE_INITIAL_FLOW)
+ {
+ resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
+ resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
+ multiply(scale, curFlowX, curFlowX);
+ multiply(scale, curFlowY, curFlowY);
+ }
+ else
+ {
+ curFlowX.setTo(0);
+ curFlowY.setTo(0);
+ }
+ }
+ else
+ {
+ resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR);
+ resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR);
+ multiply(1./pyrScale, curFlowX, curFlowX);
+ multiply(1./pyrScale, curFlowY, curFlowY);
+ }
+
+ oclMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
+ oclMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
+ oclMat R[2] =
+ {
+ allocMatFromBuf(5*height, width, CV_32F, R_[0]),
+ allocMatFromBuf(5*height, width, CV_32F, R_[1])
+ };
+
+ if (fastPyramids)
+ {
+ optflow_farneback::polynomialExpansionOcl(pyramid0_[k], polyN, R[0]);
+ optflow_farneback::polynomialExpansionOcl(pyramid1_[k], polyN, R[1]);
+ }
+ else
+ {
+ oclMat blurredFrame[2] =
+ {
+ allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
+ allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
+ };
+ oclMat pyrLevel[2] =
+ {
+ allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
+ allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
+ };
+
+ Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
+ optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
+
+ for (int i = 0; i < 2; i++)
+ {
+ optflow_farneback::gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]);
+ resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR);
+ optflow_farneback::polynomialExpansionOcl(pyrLevel[i], polyN, R[i]);
+ }
+ }
+
+ optflow_farneback::updateMatricesOcl(curFlowX, curFlowY, R[0], R[1], M);
+
+ if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
+ {
+ Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F);
+ optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize/2), winSize/2);
+ }
+ for (int i = 0; i < numIters; i++)
+ {
+ if (flags & OPTFLOW_FARNEBACK_GAUSSIAN)
+ updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
+ else
+ updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1);
+ }
+
+ prevFlowX = curFlowX;
+ prevFlowY = curFlowY;
+ }
+
+ flowx = curFlowX;
+ flowy = curFlowY;
+}
+
INSTANTIATE_TEST_CASE_P(OCL_Video, Sparse, Combine(
Values(false, true),
Values(false, true)));
+//////////////////////////////////////////////////////
+// FarnebackOpticalFlow
+
+namespace
+{
+ IMPLEMENT_PARAM_CLASS(PyrScale, double)
+ IMPLEMENT_PARAM_CLASS(PolyN, int)
+ CV_FLAGS(FarnebackOptFlowFlags, 0, OPTFLOW_FARNEBACK_GAUSSIAN)
+ IMPLEMENT_PARAM_CLASS(UseInitFlow, bool)
+}
+
+PARAM_TEST_CASE(Farneback, PyrScale, PolyN, FarnebackOptFlowFlags, UseInitFlow)
+{
+ double pyrScale;
+ int polyN;
+ int flags;
+ bool useInitFlow;
+
+ virtual void SetUp()
+ {
+ pyrScale = GET_PARAM(0);
+ polyN = GET_PARAM(1);
+ flags = GET_PARAM(2);
+ useInitFlow = GET_PARAM(3);
+ }
+};
+
+TEST_P(Farneback, Accuracy)
+{
+ cv::Mat frame0 = imread(workdir + "/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame0.empty());
+
+ cv::Mat frame1 = imread(workdir + "/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame1.empty());
+
+ double polySigma = polyN <= 5 ? 1.1 : 1.5;
+
+ cv::ocl::FarnebackOpticalFlow farn;
+ farn.pyrScale = pyrScale;
+ farn.polyN = polyN;
+ farn.polySigma = polySigma;
+ farn.flags = flags;
+
+ cv::ocl::oclMat d_flowx, d_flowy;
+ farn(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+
+ cv::Mat flow;
+ if (useInitFlow)
+ {
+ cv::Mat flowxy[] = {cv::Mat(d_flowx), cv::Mat(d_flowy)};
+ cv::merge(flowxy, 2, flow);
+
+ farn.flags |= cv::OPTFLOW_USE_INITIAL_FLOW;
+ farn(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+ }
+
+ cv::calcOpticalFlowFarneback(
+ frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
+ farn.numIters, farn.polyN, farn.polySigma, farn.flags);
+
+ std::vector<cv::Mat> flowxy;
+ cv::split(flow, flowxy);
+
+ EXPECT_MAT_SIMILAR(flowxy[0], d_flowx, 0.1);
+ EXPECT_MAT_SIMILAR(flowxy[1], d_flowy, 0.1);
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, Farneback, testing::Combine(
+ testing::Values(PyrScale(0.3), PyrScale(0.5), PyrScale(0.8)),
+ testing::Values(PolyN(5), PolyN(7)),
+ testing::Values(FarnebackOptFlowFlags(0), FarnebackOptFlowFlags(cv::OPTFLOW_FARNEBACK_GAUSSIAN)),
+ testing::Values(UseInitFlow(false), UseInitFlow(true))));
#endif // HAVE_OPENCL