//
//
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
+#endif
__kernel void LUT_C1( __global const srcT * src, __global const dstT *lut,
__global dstT *dst,
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
- #pragma OPENCL EXTENSION cl_khr_fp64:enable
- #define CV_PI 3.1415926535897932384626433832795
- #ifndef DBL_EPSILON
- #define DBL_EPSILON 0x1.0p-52
- #endif
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
#else
- #define CV_PI 3.1415926535897932384626433832795f
- #ifndef DBL_EPSILON
- #define DBL_EPSILON 0x1.0p-52f
- #endif
+#define CV_PI M_PI_F
#endif
-
__kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
- __global float *dst1, int dst1_step, int dst1_offset, //magnitude
- __global float *dst2, int dst2_step, int dst2_offset, //cartToPolar
+ __global float *dst1, int dst1_step, int dst1_offset, // magnitude
+ __global float *dst2, int dst2_step, int dst2_offset, // cartToPolar
int rows, int cols, int angInDegree)
{
int x = get_global_id(0);
float y2 = y * y;
float magnitude = sqrt(x2 + y2);
- float cartToPolar;
float tmp = y >= 0 ? 0 : CV_PI*2;
tmp = x < 0 ? CV_PI : tmp;
float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f;
- cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp :
- tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
+ float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp :
+ tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON);
- cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
+ cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
*((__global float *)((__global char *)dst1 + dst1_index)) = magnitude;
*((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar;
}
#if defined (DOUBLE_SUPPORT)
+
__kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *src2, int src2_step, int src2_offset,
__global double *dst1, int dst1_step, int dst1_offset,
double y2 = y * y;
double magnitude = sqrt(x2 + y2);
- double cartToPolar;
float tmp = y >= 0 ? 0 : CV_PI*2;
tmp = x < 0 ? CV_PI : tmp;
float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5;
- cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + (float)DBL_EPSILON) + tmp :
- tmp1 - x*y/(y2 + 0.28f*x2 + (float)DBL_EPSILON);
+ double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp :
+ tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
- cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
+ cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
*((__global double *)((__global char *)dst1 + dst1_index)) = magnitude;
*((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar;
}
}
+
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
+#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
+#endif
__kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
/**************************************PUBLICFUNC*************************************/
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
//M*/
/**************************************PUBLICFUNC*************************************/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
#define RES_TYPE double4
#define CONVERT_RES_TYPE convert_double4
#else
//M*/
/**************************************PUBLICFUNC*************************************/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
#define RES_TYPE double4
#define CONVERT_RES_TYPE convert_double4
#else
// the use of this software, even if advised of the possibility of such damage.
//
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
//
//
-#if defined (DOUBLE_SUPPORT)
- #ifdef cl_amd_fp64
- #pragma OPENCL EXTENSION cl_amd_fp64:enable
- #elif defined (cl_khr_fp64)
- #pragma OPENCL EXTENSION cl_khr_fp64:enable
- #endif
- #define CV_PI M_PI
- #define CV_2PI (2 * CV_PI)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
+#define CV_2PI (2 * CV_PI)
#else
- #define CV_PI M_PI_F
- #define CV_2PI (2 * CV_PI)
+#define CV_PI M_PI_F
+#define CV_2PI (2 * CV_PI)
#endif
/**************************************phase inradians**************************************/
double data1 = src1[src1_index];
double data2 = src2[src2_index];
- double tmp = atan2(src2[src2_index], src1[src1_index]);
+ double tmp = atan2(data2, data1);
tmp = 180 * tmp / CV_PI;
if (tmp < 0)
//M*/
#ifdef DOUBLE_SUPPORT
- #ifdef cl_amd_fp64
- #pragma OPENCL EXTENSION cl_amd_fp64:enable
- #elif defined (cl_khr_fp64)
- #pragma OPENCL EXTENSION cl_khr_fp64:enable
- #endif
- #define CV_PI M_PI
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
#else
- #define CV_PI M_PI_F
+#define CV_PI M_PI_F
#endif
/////////////////////////////////////////////////////////////////////////////////////////////////////
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
-typedef double F;
-typedef double4 F4;
-#define convert_F4 convert_double4;
+#endif
+#define F double
#else
-typedef float F;
-typedef float4 F4;
-#define convert_F4 convert_float4;
+#define F float
#endif
+
/************************************** pow **************************************/
+
__kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1,
- F p)
+ int rows, int cols, int dst_step1, F p)
{
int x = get_global_id(0);
*((__global float *)((__global char *)dst + dst_index)) = tmp;
}
-
}
#if defined (DOUBLE_SUPPORT)
+
__kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1,
- F p)
+ int rows, int cols, int dst_step1, F p)
{
int x = get_global_id(0);
double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data))));
*((__global double *)((__global char *)dst + dst_index)) = tmp;
}
-
}
+
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
{
return fmax(var + learningRate * (diff * diff - var), minVar);
}
+
#else
+
#define T_FRAME uchar4
#define T_MEAN_VAR float4
#define CONVERT_TYPE convert_uchar4_sat
#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+
inline float4 cvt(const uchar4 val)
{
float4 result;
return (val.x + val.y + val.z);
}
+static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+ float4 val = ptr[(k * rows + y) * ptr_step + x];
+ ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+ ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+
static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
{
float4 result;
result.w = 0.0f;
return result;
}
+
#endif
typedef struct
float c_varMax;
float c_tau;
uchar c_shadowVal;
-}con_srtuct_t;
+} con_srtuct_t;
static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
{
ptr[((k + 1) * rows + y) * ptr_step + x] = val;
}
-static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
-{
- float4 val = ptr[(k * rows + y) * ptr_step + x];
- ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
- ptr[((k + 1) * rows + y) * ptr_step + x] = val;
-}
-
__kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
__global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
int frame_row, int frame_col, int frame_step, int fgmask_step,
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#define DIST_TYPE 0
#endif
-//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-static int bit1Count(int v)
-{
- v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
- v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
- return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
-}
-
// dirty fix for non-template support
#if (DIST_TYPE == 0) // L1Dist
# ifdef T_FLOAT
typedef float result_type;
#define DIST_RES(x) sqrt(x)
#elif (DIST_TYPE == 2) // Hamming
+//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+static int bit1Count(int v)
+{
+ v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
+ v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
+ return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+}
#define DIST(x, y) bit1Count( (x) ^ (y) )
typedef int value_type;
typedef int result_type;
//
//
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
+#endif
-__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
- int dstStep_in_piexl,int pixel_end)
+__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
+ int cols, int rows,
+ int dstStep_in_piexl, int pixel_end)
{
int id = get_global_id(0);
int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2));
dst[addr.y] = outpix1;
}
else if(outx.x<cols && outy.x<rows)
- {
dst[addr.x] = outpix0;
- }
}
-__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
- int srcStep_in_pixel,int pixel_end)
+__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
+ int cols, int rows,
+ int srcStep_in_pixel, int pixel_end)
{
int id = get_global_id(0)<<2;
int y = id / cols;
dst[outaddr.y] = outpixel1;
}
else if(outaddr.x <= pixel_end)
- {
dst[outaddr.x] = pixel0;
- }
}
#endif
#if USE_DOUBLE
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
#endif
#if USE_DOUBLE
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
//
//M*/
-// Enter your kernel in this window
-//#pragma OPENCL EXTENSION cl_amd_printf:enable
#define CV_HAAR_FEATURE_MAX 3
typedef int sumtype;
typedef float sqsumtype;
int counter = get_global_id(0);
int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
GpuHidHaarTreeNode t1 = *(orinode + counter);
-#pragma unroll
+ #pragma unroll
for (i = 0; i < 3; i++)
{
tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
counter += nodenum;
-#pragma unroll
+ #pragma unroll
for (i = 0; i < 3; i++)
{
newnode[counter].p[i][0] = tr_x[i];
//
//M*/
-#if defined (__ATI__)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (__NVIDIA__)
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
+#endif
/************************************** convolve **************************************/
//
//
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
+
#define LSIZE 256
#define LSIZE_1 255
#define LSIZE_2 254
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported.
// We shall support other types later if necessary.
-#if defined DOUBLE_SUPPORT
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
#define F double
#else
#define F float
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
//warpAffine kernel
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
typedef double F;
typedef double4 F4;
//wrapPerspective kernel
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
typedef double F;
typedef double4 F4;
#define my_comp(x,y) ((x) < (y))
#endif
-///////////// parallel merge sort ///////////////
-// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
-static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
-{
- // The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
- uint firstIndex = left;
- uint lastIndex = right;
-
- // This loops through [firstIndex, lastIndex)
- // Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
- // this while loop will be divergent within a wavefront
- while( firstIndex < lastIndex )
- {
- K_T dataVal = data[ firstIndex ];
-
- // This branch will create divergent wavefronts
- if( my_comp( dataVal, searchVal ) )
- {
- firstIndex = firstIndex+1;
- }
- else
- {
- break;
- }
- }
-
- return firstIndex;
-}
-
// This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
// by a base pointer and left and right index for a particular candidate value. The comparison operator is
// passed as a functor parameter my_comp
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
#define TYPE double
#else
#define TYPE float
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
-
#define TYPE_IMAGE_SQSUM double
#else
#define TYPE_IMAGE_SQSUM float
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
-
+#endif
///////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////optimized code using vector roi//////////////////////////
////////////vector fuction name format: merge_vector_C(channels number)D_(data type depth)//////
////////////////////////////////////////////////////////////////////////////////////////////////
+
__kernel void merge_vector_C2_D0(__global uchar *mat_dst, int dst_step, int dst_offset,
__global uchar *mat_src0, int src0_step, int src0_offset,
__global uchar *mat_src1, int src1_step, int src1_offset,
//
//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
typedef double T;
#else
//
#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
+#endif
__kernel void convert_to(
__global const srcT* restrict srcMat,
//
//
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//
//M*/
-//#pragma OPENCL EXTENSION cl_amd_printf : enable
-
#define BUFFER 64
#define BUFFER2 BUFFER>>1
#ifndef WAVE_SIZE
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
+#endif
#if DATA_DEPTH == 0
#define BASE_TYPE uchar
{
unsigned int cache = cols[0];
-#pragma unroll
for(int i = 1; i <= winsz; i++)
cache += cols[i];
//M*/
#if defined (DOUBLE_SUPPORT)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
-
#endif
#ifdef T_FLOAT
//
//M*/
-
-#ifndef FLT_MAX
-#define FLT_MAX CL_FLT_MAX
-#endif
-
-#ifndef SHRT_MAX
-#define SHRT_MAX CL_SHORT_MAX
-#endif
-
-
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
+
__kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
__global short *ctemp, int h, int w, int nr_plane,
int cmsg_step1, int cdisp_step1, int cndisp)
}
}
}
+
__kernel void get_first_k_initial_global_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
__global float *ctemp, int h, int w, int nr_plane,
int cmsg_step1, int cdisp_step1, int cndisp)
////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
+
__kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
__global short *ctemp,int h, int w, int nr_plane,
int cmsg_step1, int cdisp_step1, int cndisp)
///////////////////////////////////////////////////////////////
/////////////////////// init data cost ////////////////////////
///////////////////////////////////////////////////////////////
+
inline float compute_3(__global uchar* left, __global uchar* right,
float cdata_weight, float cmax_data_term)
{
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
}
+
inline float compute_1(__global uchar* left, __global uchar* right,
float cdata_weight, float cmax_data_term)
{
}
}
}
+
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
}
}
}
+
////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
+
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
///////////////////////////////////////////////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
+
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
}
}
}
+
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
}
}
}
+
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
+
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__global uchar *cleft, __global uchar *cright,__local float *smem,
int level, int rows, int cols, int h, int nr_plane,
}
}
-static void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
- __global float *r_new, __global const float *u_cur, __global const float *d_cur,
- __global const float *l_cur, __global const float *r_cur,
- __global float *data_cost_selected, __global float *disparity_selected_new,
- __global float *data_cost_new, __global const float *data_cost_cur,
- __global const float *disparity_selected_cur,
- int nr_plane, int nr_plane2,
- int cdisp_step1, int cdisp_step2)
-{
- for(int i = 0; i < nr_plane; i++)
- {
- float minimum = FLT_MAX;
- int id = 0;
-
- for(int j = 0; j < nr_plane2; j++)
- {
- float cur = data_cost_new[j * cdisp_step1];
- if(cur < minimum)
- {
- minimum = cur;
- id = j;
- }
- }
-
- data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
- disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
-
- u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
- d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
- l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
- r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
- data_cost_new[id * cdisp_step1] = FLT_MAX;
-
- }
-}
__kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
__global short *r_new_, __global short *u_cur_, __global const short *d_cur_,
__global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
cdisp_step1, cdisp_step2);
}
}
+
__kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
__global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
__global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
// the use of this software, even if advised of the possibility of such damage.
//
//
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define TYPE double
#else
#else
#define POW(X,Y) X
#endif
-#define FLT_MAX 3.402823466e+38F
#define MAX_VAL (FLT_MAX*1e-3)
__kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
dst[row * dst_step + col] = temp1;
}
}
-}
\ No newline at end of file
+}
//M*/
__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
-__global float* dx, __global float* dy, int dx_step)
+ __global float* dx, __global float* dy, int dx_step)
{
int x = get_global_id(0);
int y = get_global_id(1);
{
int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
int src_x2 = (x - 1) > 0 ? (x -1) : 0;
-
- //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
- //{
- // printf("y = %d\n", y);
- // printf("src_x1 = %d\n", src_x1);
- // printf("src_x2 = %d\n", src_x2);
- //}
dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
int u2_offset_x,
int u2_offset_y)
{
- const int x = get_global_id(0);
- const int y = get_global_id(1);
+ int x = get_global_id(0);
+ int y = get_global_id(1);
if(x < I0_col&&y < I0_row)
{
- //const float u1Val = u1(y, x);
- const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
- //const float u2Val = u2(y, x);
- const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+ //float u1Val = u1(y, x);
+ float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+ //float u2Val = u2(y, x);
+ float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
- const float wx = x + u1Val;
- const float wy = y + u2Val;
+ float wx = x + u1Val;
+ float wy = y + u2Val;
- const int xmin = ceil(wx - 2.0f);
- const int xmax = floor(wx + 2.0f);
+ int xmin = ceil(wx - 2.0f);
+ int xmax = floor(wx + 2.0f);
- const int ymin = ceil(wy - 2.0f);
- const int ymax = floor(wy + 2.0f);
+ int ymin = ceil(wy - 2.0f);
+ int ymax = floor(wy + 2.0f);
float sum = 0.0f;
float sumx = 0.0f;
{
for (int cx = xmin; cx <= xmax; ++cx)
{
- const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+ float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
//sum += w * tex2D(tex_I1 , cx, cy);
int2 cood = (int2)(cx, cy);
}
}
- const float coeff = 1.0f / wsum;
+ float coeff = 1.0f / wsum;
- const float I1wVal = sum * coeff;
- const float I1wxVal = sumx * coeff;
- const float I1wyVal = sumy * coeff;
+ float I1wVal = sum * coeff;
+ float I1wxVal = sumx * coeff;
+ float I1wyVal = sumy * coeff;
I1w[y * I1w_step + x] = I1wVal;
I1wx[y * I1w_step + x] = I1wxVal;
I1wy[y * I1w_step + x] = I1wyVal;
- const float Ix2 = I1wxVal * I1wxVal;
- const float Iy2 = I1wyVal * I1wyVal;
+ float Ix2 = I1wxVal * I1wxVal;
+ float Iy2 = I1wyVal * I1wyVal;
// store the |Grad(I1)|^2
grad[y * I1w_step + x] = Ix2 + Iy2;
// compute the constant part of the rho function
- const float I0Val = I0[y * I0_step + x];
+ float I0Val = I0[y * I0_step + x];
rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
}
-static float readImage(__global const float *image, const int x, const int y, const int rows, const int cols, const int elemCntPerRow)
+static float readImage(__global float *image, int x, int y, int rows, int cols, int elemCntPerRow)
{
int i0 = clamp(x, 0, cols - 1);
int j0 = clamp(y, 0, rows - 1);
int I1_step,
int I1x_step)
{
- const int x = get_global_id(0);
- const int y = get_global_id(1);
+ int x = get_global_id(0);
+ int y = get_global_id(1);
if(x < I0_col&&y < I0_row)
{
- //const float u1Val = u1(y, x);
- const float u1Val = u1[y * u1_step + x];
- //const float u2Val = u2(y, x);
- const float u2Val = u2[y * u2_step + x];
+ //float u1Val = u1(y, x);
+ float u1Val = u1[y * u1_step + x];
+ //float u2Val = u2(y, x);
+ float u2Val = u2[y * u2_step + x];
- const float wx = x + u1Val;
- const float wy = y + u2Val;
+ float wx = x + u1Val;
+ float wy = y + u2Val;
- const int xmin = ceil(wx - 2.0f);
- const int xmax = floor(wx + 2.0f);
+ int xmin = ceil(wx - 2.0f);
+ int xmax = floor(wx + 2.0f);
- const int ymin = ceil(wy - 2.0f);
- const int ymax = floor(wy + 2.0f);
+ int ymin = ceil(wy - 2.0f);
+ int ymax = floor(wy + 2.0f);
float sum = 0.0f;
float sumx = 0.0f;
{
for (int cx = xmin; cx <= xmax; ++cx)
{
- const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+ float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
int2 cood = (int2)(cx, cy);
sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
}
}
- const float coeff = 1.0f / wsum;
+ float coeff = 1.0f / wsum;
- const float I1wVal = sum * coeff;
- const float I1wxVal = sumx * coeff;
- const float I1wyVal = sumy * coeff;
+ float I1wVal = sum * coeff;
+ float I1wxVal = sumx * coeff;
+ float I1wyVal = sumy * coeff;
I1w[y * I1w_step + x] = I1wVal;
I1wx[y * I1w_step + x] = I1wxVal;
I1wy[y * I1w_step + x] = I1wyVal;
- const float Ix2 = I1wxVal * I1wxVal;
- const float Iy2 = I1wyVal * I1wyVal;
+ float Ix2 = I1wxVal * I1wxVal;
+ float Iy2 = I1wyVal * I1wyVal;
// store the |Grad(I1)|^2
grad[y * I1w_step + x] = Ix2 + Iy2;
// compute the constant part of the rho function
- const float I0Val = I0[y * I0_step + x];
+ float I0Val = I0[y * I0_step + x];
rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
__global float* p12,
__global float* p21,
__global float* p22,
- const float taut,
+ float taut,
int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y)
{
-
- //const int x = blockIdx.x * blockDim.x + threadIdx.x;
- //const int y = blockIdx.y * blockDim.y + threadIdx.y;
- const int x = get_global_id(0);
- const int y = get_global_id(1);
+ int x = get_global_id(0);
+ int y = get_global_id(1);
if(x < u1_col && y < u1_row)
{
int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
- const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+ float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
- const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+ float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
- const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+ float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
- const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+ float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
- const float g1 = hypot(u1x, u1y);
- const float g2 = hypot(u2x, u2y);
+ float g1 = hypot(u1x, u1y);
+ float g2 = hypot(u2x, u2y);
- const float ng1 = 1.0f + taut * g1;
- const float ng2 = 1.0f + taut * g2;
+ float ng1 = 1.0f + taut * g1;
+ float ng2 = 1.0f + taut * g2;
p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;
p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;
if (x > 0 && y > 0)
{
- const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
- const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
+ float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
+ float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
return v1x + v2y;
}
else
__global const float* p22, /*int p22_step,*/
__global float* u1, int u1_step,
__global float* u2,
- __global float* error, const float l_t, const float theta, int u2_step,
+ __global float* error, float l_t, float theta, int u2_step,
int u1_offset_x,
int u1_offset_y,
int u2_offset_x,
int u2_offset_y,
char calc_error)
{
-
- //const int x = blockIdx.x * blockDim.x + threadIdx.x;
- //const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
int x = get_global_id(0);
int y = get_global_id(1);
-
if(x < I1wx_col && y < I1wx_row)
{
- const float I1wxVal = I1wx[y * I1wx_step + x];
- const float I1wyVal = I1wy[y * I1wx_step + x];
- const float gradVal = grad[y * I1wx_step + x];
- const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
- const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+ float I1wxVal = I1wx[y * I1wx_step + x];
+ float I1wyVal = I1wy[y * I1wx_step + x];
+ float gradVal = grad[y * I1wx_step + x];
+ float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+ float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
- const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
+ float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
// estimate the values of the variable (v1, v2) (thresholding operator TH)
}
else if (gradVal > 1.192092896e-07f)
{
- const float fi = -rho / gradVal;
+ float fi = -rho / gradVal;
d1 = fi * I1wxVal;
d2 = fi * I1wyVal;
}
- const float v1 = u1OldVal + d1;
- const float v2 = u2OldVal + d2;
+ float v1 = u1OldVal + d1;
+ float v2 = u2OldVal + d2;
// compute the divergence of the dual variable (p1, p2)
- const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
- const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
+ float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
+ float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
// estimate the values of the optical flow (u1, u2)
- const float u1NewVal = v1 + theta * div_p1;
- const float u2NewVal = v2 + theta * div_p2;
+ float u1NewVal = v1 + theta * div_p1;
+ float u2NewVal = v2 + theta * div_p2;
u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
if(calc_error)
{
- const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
- const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+ float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+ float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error[y * I1wx_step + x] = n1 + n2;
}
}