*errval += fabs(diff);
}
- inline void SetPatch4(image2d_t I, const float x, const float y,
- float4* Pch, float4* Dx, float4* Dy,
- float* A11, float* A12, float* A22)
- {
- *Pch = read_imagef(I, sampler, (float2)(x, y));
-
- float4 dIdx = 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x + 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
- (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x - 1, y)) + 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)));
-
- float4 dIdy = 3.0f * read_imagef(I, sampler, (float2)(x - 1, y + 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y + 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y + 1)) -
- (3.0f * read_imagef(I, sampler, (float2)(x - 1, y - 1)) + 10.0f * read_imagef(I, sampler, (float2)(x, y - 1)) + 3.0f * read_imagef(I, sampler, (float2)(x + 1, y - 1)));
-
-
- *Dx = dIdx;
- *Dy = dIdy;
- float4 sqIdx = dIdx * dIdx;
- *A11 += sqIdx.x + sqIdx.y + sqIdx.z;
- sqIdx = dIdx * dIdy;
- *A12 += sqIdx.x + sqIdx.y + sqIdx.z;
- sqIdx = dIdy * dIdy;
- *A22 += sqIdx.x + sqIdx.y + sqIdx.z;
- }
-
- inline void GetPatch4(image2d_t J, const float x, const float y,
- const float4* Pch, const float4* Dx, const float4* Dy,
- float* b1, float* b2)
- {
- float4 J_val = read_imagef(J, sampler, (float2)(x, y));
- float4 diff = (J_val - *Pch) * 32.0f;
- float4 xdiff = diff* *Dx;
- *b1 += xdiff.x + xdiff.y + xdiff.z;
- xdiff = diff* *Dy;
- *b2 += xdiff.x + xdiff.y + xdiff.z;
- }
-
- inline void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
- {
- float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch;
- *errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
- }
-
-#define GRIDSIZE 3
+
+//macro to read pixel value into local memory.
+#define READI(_y,_x) IPatchLocal[yid+((_y)*LSy)][xid+((_x)*LSx)] = read_imagef(I, sampler, (float2)(Point.x + xid+(_x)*LSx + 0.5f-1, Point.y + yid+(_y)*LSy+ 0.5f-1)).x;
+void ReadPatchIToLocalMem(image2d_t I, float2 Point, local float IPatchLocal[LSy*GRIDSIZE+2][LSx*GRIDSIZE+2])
+{
+ unsigned int xid=get_local_id(0);
+ unsigned int yid=get_local_id(1);
+ //read (3*LSx)*(3*LSy) window. each macro call read LSx*LSy pixels block
+ READI(0,0);READI(0,1);READI(0,2);
+ READI(1,0);READI(1,1);READI(1,2);
+ READI(2,0);READI(2,1);READI(2,2);
+ if(xid<2)
+ {// read last 2 columns border. each macro call reads 2*LSy pixels block
+ READI(0,3);
+ READI(1,3);
+ READI(2,3);
+ }
+
+ if(yid<2)
+ {// read last 2 row. each macro call reads LSx*2 pixels block
+ READI(3,0);READI(3,1);READI(3,2);
+ }
+
+ if(yid<2 && xid<2)
+ {// read right bottom 2x2 corner. one macro call reads 2*2 pixels block
+ READI(3,3);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+}
+#undef READI
+
+__attribute__((reqd_work_group_size(LSx, LSy, 1)))
__kernel void lkSparse(image2d_t I, image2d_t J,
- __global const float2* prevPts, __global float2* nextPts, __global uchar* status, __global float* err,
+ __global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
__local float smem1[BUFFER];