From: peng xiao Date: Thu, 27 Jun 2013 01:57:42 +0000 (+0800) Subject: Fix white-spacing X-Git-Tag: accepted/tizen/ivi/20140515.103456~1^2~671^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dcc4766129947853344a3cbb4819d2adcbb62b20;p=profile%2Fivi%2Fopencv.git Fix white-spacing --- diff --git a/modules/ocl/perf/perf_opticalflow.cpp b/modules/ocl/perf/perf_opticalflow.cpp index 2b01e19..936d7a7 100644 --- a/modules/ocl/perf/perf_opticalflow.cpp +++ b/modules/ocl/perf/perf_opticalflow.cpp @@ -136,11 +136,13 @@ PERFTEST(PyrLKOpticalFlow) size_t mismatch = 0; for (int i = 0; i < (int)nextPts.size(); ++i) { - if(status[i] != ocl_status.at(0, i)){ + if(status[i] != ocl_status.at(0, i)) + { mismatch++; continue; } - if(status[i]){ + if(status[i]) + { Point2f gpu_rst = ocl_nextPts.at(0, i); Point2f cpu_rst = nextPts[i]; if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.) @@ -193,24 +195,24 @@ PERFTEST(tvl1flow) WARMUP_ON; d_alg(d0, d1, d_flowx, d_flowy); WARMUP_OFF; -/* - double diff1 = 0.0, diff2 = 0.0; - if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1 - &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1) - TestSystem::instance().setAccurate(1); - else - TestSystem::instance().setAccurate(0); + /* + double diff1 = 0.0, diff2 = 0.0; + if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1 + &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1) + TestSystem::instance().setAccurate(1); + else + TestSystem::instance().setAccurate(0); - TestSystem::instance().setDiff(diff1); - TestSystem::instance().setDiff(diff2); -*/ + TestSystem::instance().setDiff(diff1); + TestSystem::instance().setDiff(diff2); + */ GPU_ON; d_alg(d0, d1, d_flowx, d_flowy); d_alg.collectGarbage(); GPU_OFF; - + cv::Mat flowx, flowy; @@ -352,4 +354,3 @@ PERFTEST(FarnebackOpticalFlow) } } } - diff --git a/modules/ocl/src/opencl/optical_flow_farneback.cl b/modules/ocl/src/opencl/optical_flow_farneback.cl index 2e5c6d9..7cc564e 100644 --- a/modules/ocl/src/opencl/optical_flow_farneback.cl +++ b/modules/ocl/src/opencl/optical_flow_farneback.cl @@ -71,7 +71,7 @@ __kernel void polynomialExpansion(__global float * dst, dstStep /= sizeof(*dst); srcStep /= sizeof(*src); - + int xWarped; __local float *row = smem + tx; @@ -168,7 +168,7 @@ __kernel void gaussianBlur(__global float * dst, srcStep /= sizeof(*src); __local float *row = smem + ty * (bdx + 2*ksizeHalf); - + if (y < height) { // Vertical pass @@ -184,7 +184,7 @@ __kernel void gaussianBlur(__global float * dst, } barrier(CLK_LOCAL_MEM_FENCE); - + if (y < height && y >= 0 && x < width && x >= 0) { // Horizontal pass @@ -207,7 +207,7 @@ __kernel void updateMatrices(__global float * M, { const int y = get_global_id(1); const int x = get_global_id(0); - + mStep /= sizeof(*M); xStep /= sizeof(*flowx); yStep /= sizeof(*flowy); @@ -223,7 +223,8 @@ __kernel void updateMatrices(__global float * M, int x1 = convert_int(floor(fx)); int y1 = convert_int(floor(fy)); - fx -= x1; fy -= y1; + fx -= x1; + fy -= y1; float r2, r3, r4, r5, r6; @@ -278,13 +279,16 @@ __kernel void updateMatrices(__global float * M, r3 += r6*dy + r5*dx; float scale = - c_border[min(x, BORDER_SIZE)] * - c_border[min(y, BORDER_SIZE)] * - c_border[min(width - x - 1, BORDER_SIZE)] * - c_border[min(height - y - 1, BORDER_SIZE)]; + c_border[min(x, BORDER_SIZE)] * + c_border[min(y, BORDER_SIZE)] * + c_border[min(width - x - 1, BORDER_SIZE)] * + c_border[min(height - y - 1, BORDER_SIZE)]; - r2 *= scale; r3 *= scale; r4 *= scale; - r5 *= scale; r6 *= scale; + r2 *= scale; + r3 *= scale; + r4 *= scale; + r5 *= scale; + r6 *= scale; M[mad24(y, mStep, x)] = r4*r4 + r6*r6; M[mad24(height + y, mStep, x)] = (r4 + r5)*r6; @@ -303,7 +307,7 @@ __kernel void boxFilter5(__global float * dst, { const int y = get_global_id(1); const int x = get_global_id(0); - + const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf)); const int smw = bdx + 2*ksizeHalf; // shared memory "width" __local float *row = smem + 5 * ty * smw; @@ -319,16 +323,16 @@ __kernel void boxFilter5(__global float * dst, int xExt = (int)(bx * bdx) + i - ksizeHalf; xExt = min(max(xExt, 0), width - 1); - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)]; for (int j = 1; j <= ksizeHalf; ++j) - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) row[k*smw + i] += - src[mad24(k*height + max(y - j, 0), srcStep, xExt)] + - src[mad24(k*height + min(y + j, height - 1), srcStep, xExt)]; + src[mad24(k*height + max(y - j, 0), srcStep, xExt)] + + src[mad24(k*height + min(y + j, height - 1), srcStep, xExt)]; } } @@ -341,16 +345,16 @@ __kernel void boxFilter5(__global float * dst, row += tx + ksizeHalf; float res[5]; - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) res[k] = row[k*smw]; for (int i = 1; i <= ksizeHalf; ++i) - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) res[k] += row[k*smw - i] + row[k*smw + i]; - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) dst[mad24(k*height + y, dstStep, x)] = res[k] * boxAreaInv; } @@ -372,7 +376,7 @@ __kernel void updateFlow(__global float4 * flowx, __global float4 * flowy, { float4 g11 = M[mad24(y, mStep, x)]; float4 g12 = M[mad24(height + y, mStep, x)]; - float4 g22 = M[mad24(2*height + y, mStep, x)]; + float4 g22 = M[mad24(2*height + y, mStep, x)]; float4 h1 = M[mad24(3*height + y, mStep, x)]; float4 h2 = M[mad24(4*height + y, mStep, x)]; @@ -408,16 +412,16 @@ __kernel void gaussianBlur5(__global float * dst, int xExt = (int)(bx * bdx) + i - ksizeHalf; xExt = idx_col(xExt, width - 1); - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) row[k*smw + i] = src[mad24(k*height + y, srcStep, xExt)] * c_gKer[0]; for (int j = 1; j <= ksizeHalf; ++j) - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) row[k*smw + i] += - (src[mad24(k*height + idx_row_low(y - j, height - 1), srcStep, xExt)] + - src[mad24(k*height + idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j]; + (src[mad24(k*height + idx_row_low(y - j, height - 1), srcStep, xExt)] + + src[mad24(k*height + idx_row_high(y + j, height - 1), srcStep, xExt)]) * c_gKer[j]; } } @@ -430,16 +434,16 @@ __kernel void gaussianBlur5(__global float * dst, row += tx + ksizeHalf; float res[5]; - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) res[k] = row[k*smw] * c_gKer[0]; for (int i = 1; i <= ksizeHalf; ++i) - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i]; - #pragma unroll +#pragma unroll for (int k = 0; k < 5; ++k) dst[mad24(k*height + y, dstStep, x)] = res[k]; } diff --git a/modules/ocl/src/optical_flow_farneback.cpp b/modules/ocl/src/optical_flow_farneback.cpp index 17c021d..e5a7abc 100644 --- a/modules/ocl/src/optical_flow_farneback.cpp +++ b/modules/ocl/src/optical_flow_farneback.cpp @@ -55,178 +55,184 @@ using namespace cv::ocl; namespace cv { - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *optical_flow_farneback; - } +namespace ocl +{ +///////////////////////////OpenCL kernel strings/////////////////////////// +extern const char *optical_flow_farneback; +} } -namespace cv { namespace ocl { namespace optflow_farneback +namespace cv { +namespace ocl { +namespace optflow_farneback { - oclMat g; - oclMat xg; - oclMat xxg; - oclMat gKer; +oclMat g; +oclMat xg; +oclMat xxg; +oclMat gKer; - float ig[4]; +float ig[4]; - inline int divUp(int total, int grain) - { - return (total + grain - 1) / grain; - } +inline int divUp(int total, int grain) +{ + return (total + grain - 1) / grain; +} - inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf) - { - cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast(c_gKer)); - gKer.upload(t_gKer); - } +inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf) +{ + cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast(c_gKer)); + gKer.upload(t_gKer); +} - static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst) - { - string kernelName("gaussianBlur"); - size_t localThreads[3] = { 256, 1, 1 }; - size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 }; - int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float); - - CV_Assert(dst.size() == src.size()); - std::vector< std::pair > args; - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); - args.push_back(std::make_pair(smem_size, (void *)NULL)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); - - openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, - globalThreads, localThreads, args, -1, -1); - } +static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst) +{ + string kernelName("gaussianBlur"); + size_t localThreads[3] = { 256, 1, 1 }; + size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 }; + int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float); + + CV_Assert(dst.size() == src.size()); + std::vector< std::pair > args; + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); + args.push_back(std::make_pair(smem_size, (void *)NULL)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); + + openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, + globalThreads, localThreads, args, -1, -1); +} - static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst) - { - string kernelName("polynomialExpansion"); - size_t localThreads[3] = { 256, 1, 1 }; - size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 }; - int smem_size = 3 * localThreads[0] * sizeof(float); - - std::vector< std::pair > args; - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data)); - args.push_back(std::make_pair(smem_size, (void *)NULL)); - args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); - - char opt [128]; - sprintf(opt, "-D polyN=%d", polyN); - - openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, - globalThreads, localThreads, args, -1, -1, opt); - } +static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst) +{ + string kernelName("polynomialExpansion"); + size_t localThreads[3] = { 256, 1, 1 }; + size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 }; + int smem_size = 3 * localThreads[0] * sizeof(float); + + std::vector< std::pair > args; + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data)); + args.push_back(std::make_pair(smem_size, (void *)NULL)); + args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); + + char opt [128]; + sprintf(opt, "-D polyN=%d", polyN); + + openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, + globalThreads, localThreads, args, -1, -1, opt); +} - static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M) - { - string kernelName("updateMatrices"); - size_t localThreads[3] = { 32, 8, 1 }; - size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0], - divUp(flowx.rows, localThreads[1]) * localThreads[1], - 1 }; - - std::vector< std::pair > args; - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step)); - - openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, - globalThreads, localThreads, args, -1, -1); - } +static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M) +{ + string kernelName("updateMatrices"); + size_t localThreads[3] = { 32, 8, 1 }; + size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0], + divUp(flowx.rows, localThreads[1]) * localThreads[1], + 1 + }; + + std::vector< std::pair > args; + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R0.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&R1.data)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.cols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&R0.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&R1.step)); + + openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, + globalThreads, localThreads, args, -1, -1); +} - static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) - { - string kernelName("boxFilter5"); - int height = src.rows / 5; - size_t localThreads[3] = { 256, 1, 1 }; - size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 }; - int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float); - - std::vector< std::pair > args; - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back(std::make_pair(smem_size, (void *)NULL)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); - - openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, - globalThreads, localThreads, args, -1, -1); - } +static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) +{ + string kernelName("boxFilter5"); + int height = src.rows / 5; + size_t localThreads[3] = { 256, 1, 1 }; + size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 }; + int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float); + + std::vector< std::pair > args; + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(std::make_pair(smem_size, (void *)NULL)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); + + openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, + globalThreads, localThreads, args, -1, -1); +} - static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy) - { - string kernelName("updateFlow"); - int cols = divUp(flowx.cols, 4); - size_t localThreads[3] = { 32, 8, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(flowx.rows, localThreads[1]) * localThreads[0], - 1 }; - - std::vector< std::pair > args; - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step)); - - openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, - globalThreads, localThreads, args, -1, -1); - } +static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy) +{ + string kernelName("updateFlow"); + int cols = divUp(flowx.cols, 4); + size_t localThreads[3] = { 32, 8, 1 }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], + divUp(flowx.rows, localThreads[1]) * localThreads[0], + 1 + }; + + std::vector< std::pair > args; + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowy.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.rows)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&cols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowx.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&flowy.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&M.step)); + + openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, + globalThreads, localThreads, args, -1, -1); +} - static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) - { - string kernelName("gaussianBlur5"); - int height = src.rows / 5; - int width = src.cols; - size_t localThreads[3] = { 256, 1, 1 }; - size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 }; - int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float); - - std::vector< std::pair > args; - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); - args.push_back(std::make_pair(smem_size, (void *)NULL)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&width)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); - - openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, - globalThreads, localThreads, args, -1, -1); - } -}}} // namespace cv { namespace ocl { namespace optflow_farneback +static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) +{ + string kernelName("gaussianBlur5"); + int height = src.rows / 5; + int width = src.cols; + size_t localThreads[3] = { 256, 1, 1 }; + size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 }; + int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float); + + std::vector< std::pair > args; + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); + args.push_back(std::make_pair(smem_size, (void *)NULL)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&width)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); + + openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, + globalThreads, localThreads, args, -1, -1); +} +} +} +} // namespace cv { namespace ocl { namespace optflow_farneback static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat) { @@ -236,8 +242,8 @@ static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat) } void cv::ocl::FarnebackOpticalFlow::prepareGaussian( - int n, double sigma, float *g, float *xg, float *xxg, - double &ig11, double &ig03, double &ig33, double &ig55) + int n, double sigma, float *g, float *xg, float *xxg, + double &ig11, double &ig03, double &ig33, double &ig55) { double s = 0.; for (int x = -n; x <= n; x++) @@ -316,8 +322,8 @@ void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double s } void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter( - const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy, - oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) + const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat &flowy, + oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) { optflow_farneback::boxFilter5Ocl(M, blockSize/2, bufM); @@ -333,8 +339,8 @@ void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter( void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur( - const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy, - oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) + const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy, + oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) { optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM); @@ -348,7 +354,7 @@ void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur( void cv::ocl::FarnebackOpticalFlow::operator ()( - const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy) + const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy) { CV_Assert(frame0.channels() == 1 && frame1.channels() == 1); CV_Assert(frame0.size() == frame1.size()); @@ -504,4 +510,3 @@ void cv::ocl::FarnebackOpticalFlow::operator ()( flowx = curFlowX; flowy = curFlowY; } -