From: krodyush Date: Fri, 4 Apr 2014 07:26:08 +0000 (+0400) Subject: Merge remote-tracking branch 'github/master' into pullreq/140319-PyrLKOpticalFlow X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~470^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4ca695cab0cd3046c900c1189da65bb4b9b7cec0;p=profile%2Fivi%2Fopencv.git Merge remote-tracking branch 'github/master' into pullreq/140319-PyrLKOpticalFlow Conflicts: modules/video/src/opencl/pyrlk.cl --- 4ca695cab0cd3046c900c1189da65bb4b9b7cec0 diff --cc modules/video/src/opencl/pyrlk.cl index 45571c7,822e628..1e27c8a --- a/modules/video/src/opencl/pyrlk.cl +++ b/modules/video/src/opencl/pyrlk.cl @@@ -264,81 -262,9 +264,40 @@@ inline void GetError(image2d_t J, cons *errval += fabs(diff); } - inline void SetPatch4(image2d_t I, const float x, const float y, - float4* Pch, float4* Dx, float4* Dy, - float* A11, float* A12, float* A22) - { - *Pch = read_imagef(I, sampler, (float2)(x, y)); - - float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) - - (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1))); - - float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) - - (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1))); - - - *Dx = dIdx; - *Dy = dIdy; - float4 sqIdx = dIdx * dIdx; - *A11 += sqIdx.x + sqIdx.y + sqIdx.z; - sqIdx = dIdx * dIdy; - *A12 += sqIdx.x + sqIdx.y + sqIdx.z; - sqIdx = dIdy * dIdy; - *A22 += sqIdx.x + sqIdx.y + sqIdx.z; - } - - inline void GetPatch4(image2d_t J, const float x, const float y, - const float4* Pch, const float4* Dx, const float4* Dy, - float* b1, float* b2) - { - float4 J_val = read_imagef(J, sampler, (float2)(x, y)); - float4 diff = (J_val - *Pch) * 32.0f; - float4 xdiff = diff* *Dx; - *b1 += xdiff.x + xdiff.y + xdiff.z; - xdiff = diff* *Dy; - *b2 += xdiff.x + xdiff.y + xdiff.z; - } - - inline void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval) - { - float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch; - *errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z); - } - -#define GRIDSIZE 3 + +//macro to read pixel value into local memory. +#define READI(_y,_x) IPatchLocal[yid+((_y)*LSy)][xid+((_x)*LSx)] = read_imagef(I, sampler, (float2)(Point.x + xid+(_x)*LSx + 0.5f-1, Point.y + yid+(_y)*LSy+ 0.5f-1)).x; +void ReadPatchIToLocalMem(image2d_t I, float2 Point, local float IPatchLocal[LSy*GRIDSIZE+2][LSx*GRIDSIZE+2]) +{ + unsigned int xid=get_local_id(0); + unsigned int yid=get_local_id(1); + //read (3*LSx)*(3*LSy) window. each macro call read LSx*LSy pixels block + READI(0,0);READI(0,1);READI(0,2); + READI(1,0);READI(1,1);READI(1,2); + READI(2,0);READI(2,1);READI(2,2); + if(xid<2) + {// read last 2 columns border. each macro call reads 2*LSy pixels block + READI(0,3); + READI(1,3); + READI(2,3); + } + + if(yid<2) + {// read last 2 row. each macro call reads LSx*2 pixels block + READI(3,0);READI(3,1);READI(3,2); + } + + if(yid<2 && xid<2) + {// read right bottom 2x2 corner. one macro call reads 2*2 pixels block + READI(3,3); + } + barrier(CLK_LOCAL_MEM_FENCE); +} +#undef READI + +__attribute__((reqd_work_group_size(LSx, LSy, 1))) __kernel void lkSparse(image2d_t I, image2d_t J, - __global const float2* prevPts, __global float2* nextPts, __global uchar* status, __global float* err, + __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err, const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr) { __local float smem1[BUFFER];