From 96121a66c583000bc9a91fe97fe00cb4292fe251 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 11 Nov 2013 20:08:40 +0400 Subject: [PATCH] kernel warnings on AMD --- modules/ocl/src/opencl/arithm_LUT.cl | 6 +- .../ocl/src/opencl/arithm_absdiff_nonsaturate.cl | 8 +- modules/ocl/src/opencl/arithm_add.cl | 8 +- modules/ocl/src/opencl/arithm_addWeighted.cl | 8 +- modules/ocl/src/opencl/arithm_add_mask.cl | 8 +- modules/ocl/src/opencl/arithm_add_scalar.cl | 8 +- modules/ocl/src/opencl/arithm_add_scalar_mask.cl | 8 +- .../opencl/arithm_bitwise_binary_scalar_mask.cl | 8 -- modules/ocl/src/opencl/arithm_bitwise_not.cl | 8 +- modules/ocl/src/opencl/arithm_cartToPolar.cl | 39 +++-- modules/ocl/src/opencl/arithm_compare.cl | 8 +- modules/ocl/src/opencl/arithm_exp.cl | 8 +- modules/ocl/src/opencl/arithm_flip.cl | 8 +- modules/ocl/src/opencl/arithm_log.cl | 6 +- modules/ocl/src/opencl/arithm_magnitude.cl | 6 +- modules/ocl/src/opencl/arithm_minMax.cl | 2 +- modules/ocl/src/opencl/arithm_minMaxLoc.cl | 7 +- modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl | 7 +- modules/ocl/src/opencl/arithm_nonzero.cl | 2 +- modules/ocl/src/opencl/arithm_phase.cl | 22 +-- modules/ocl/src/opencl/arithm_polarToCart.cl | 14 +- modules/ocl/src/opencl/arithm_pow.cl | 26 ++-- modules/ocl/src/opencl/arithm_setidentity.cl | 8 +- modules/ocl/src/opencl/arithm_sum.cl | 8 +- modules/ocl/src/opencl/arithm_transpose.cl | 2 +- modules/ocl/src/opencl/bgfg_mog.cl | 21 +-- modules/ocl/src/opencl/blend_linear.cl | 2 +- modules/ocl/src/opencl/brute_force_match.cl | 15 +- modules/ocl/src/opencl/convertC3C4.cl | 20 +-- modules/ocl/src/opencl/filtering_boxFilter.cl | 4 + modules/ocl/src/opencl/filtering_filter2D.cl | 4 + modules/ocl/src/opencl/haarobjectdetect_scaled2.cl | 6 +- modules/ocl/src/opencl/imgproc_convolve.cl | 6 +- modules/ocl/src/opencl/imgproc_copymakeboder.cl | 2 +- modules/ocl/src/opencl/imgproc_integral.cl | 9 +- modules/ocl/src/opencl/imgproc_integral_sum.cl | 8 +- modules/ocl/src/opencl/imgproc_remap.cl | 8 +- modules/ocl/src/opencl/imgproc_resize.cl | 6 +- modules/ocl/src/opencl/imgproc_threshold.cl | 2 +- modules/ocl/src/opencl/imgproc_warpAffine.cl | 8 +- modules/ocl/src/opencl/imgproc_warpPerspective.cl | 8 +- modules/ocl/src/opencl/kernel_stablesort_by_key.cl | 29 ---- modules/ocl/src/opencl/knearest.cl | 7 +- modules/ocl/src/opencl/match_template.cl | 10 +- modules/ocl/src/opencl/merge_mat.cl | 8 +- modules/ocl/src/opencl/moments.cl | 8 +- modules/ocl/src/opencl/operator_convertTo.cl | 4 + modules/ocl/src/opencl/operator_copyToM.cl | 8 +- modules/ocl/src/opencl/operator_setTo.cl | 8 +- modules/ocl/src/opencl/operator_setToM.cl | 8 +- modules/ocl/src/opencl/pyrlk.cl | 2 - modules/ocl/src/opencl/split_mat.cl | 7 +- modules/ocl/src/opencl/stereobm.cl | 1 - modules/ocl/src/opencl/stereobp.cl | 8 +- modules/ocl/src/opencl/stereocsbp.cl | 58 ++------ modules/ocl/src/opencl/svm.cl | 12 +- modules/ocl/src/opencl/tvl1flow.cl | 157 ++++++++++----------- 57 files changed, 336 insertions(+), 371 deletions(-) diff --git a/modules/ocl/src/opencl/arithm_LUT.cl b/modules/ocl/src/opencl/arithm_LUT.cl index 658e1f4..30407bb 100644 --- a/modules/ocl/src/opencl/arithm_LUT.cl +++ b/modules/ocl/src/opencl/arithm_LUT.cl @@ -34,9 +34,13 @@ // // -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#endif __kernel void LUT_C1( __global const srcT * src, __global const dstT *lut, __global dstT *dst, diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl index fcf3874..c09560a 100644 --- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl +++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl @@ -44,11 +44,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl index a73b65d..04262b8 100644 --- a/modules/ocl/src/opencl/arithm_add.cl +++ b/modules/ocl/src/opencl/arithm_add.cl @@ -44,11 +44,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_addWeighted.cl b/modules/ocl/src/opencl/arithm_addWeighted.cl index 8272806..872ee85 100644 --- a/modules/ocl/src/opencl/arithm_addWeighted.cl +++ b/modules/ocl/src/opencl/arithm_addWeighted.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_add_mask.cl b/modules/ocl/src/opencl/arithm_add_mask.cl index ea96d8a..b115d9b 100644 --- a/modules/ocl/src/opencl/arithm_add_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_mask.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl index b82eff5..05ea48d 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl index 0762b19..a8b9657 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl index 03f46cc..756f201 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl @@ -43,14 +43,6 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) -#pragma OPENCL EXTENSION cl_amd_fp64:enable -#endif -#endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////bitwise_binary//////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/opencl/arithm_bitwise_not.cl b/modules/ocl/src/opencl/arithm_bitwise_not.cl index 5bc1839..b6f76d6 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_not.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_not.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_cartToPolar.cl b/modules/ocl/src/opencl/arithm_cartToPolar.cl index e37818c..f634f2d 100644 --- a/modules/ocl/src/opencl/arithm_cartToPolar.cl +++ b/modules/ocl/src/opencl/arithm_cartToPolar.cl @@ -43,24 +43,21 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) - #pragma OPENCL EXTENSION cl_khr_fp64:enable - #define CV_PI 3.1415926535897932384626433832795 - #ifndef DBL_EPSILON - #define DBL_EPSILON 0x1.0p-52 - #endif +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#define CV_PI M_PI #else - #define CV_PI 3.1415926535897932384626433832795f - #ifndef DBL_EPSILON - #define DBL_EPSILON 0x1.0p-52f - #endif +#define CV_PI M_PI_F #endif - __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset, __global float *src2, int src2_step, int src2_offset, - __global float *dst1, int dst1_step, int dst1_offset, //magnitude - __global float *dst2, int dst2_step, int dst2_offset, //cartToPolar + __global float *dst1, int dst1_step, int dst1_offset, // magnitude + __global float *dst2, int dst2_step, int dst2_offset, // cartToPolar int rows, int cols, int angInDegree) { int x = get_global_id(0); @@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr float y2 = y * y; float magnitude = sqrt(x2 + y2); - float cartToPolar; float tmp = y >= 0 ? 0 : CV_PI*2; tmp = x < 0 ? CV_PI : tmp; float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f; - cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp : - tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON); + float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp : + tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON); - cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI); + cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI); *((__global float *)((__global char *)dst1 + dst1_index)) = magnitude; *((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar; @@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr } #if defined (DOUBLE_SUPPORT) + __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset, __global double *src2, int src2_step, int src2_offset, __global double *dst1, int dst1_step, int dst1_offset, @@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s double y2 = y * y; double magnitude = sqrt(x2 + y2); - double cartToPolar; float tmp = y >= 0 ? 0 : CV_PI*2; tmp = x < 0 ? CV_PI : tmp; float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5; - cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + (float)DBL_EPSILON) + tmp : - tmp1 - x*y/(y2 + 0.28f*x2 + (float)DBL_EPSILON); + double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp : + tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON); - cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI); + cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI); *((__global double *)((__global char *)dst1 + dst1_index)) = magnitude; *((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar; } } + #endif diff --git a/modules/ocl/src/opencl/arithm_compare.cl b/modules/ocl/src/opencl/arithm_compare.cl index 005d3c7..73e6299 100644 --- a/modules/ocl/src/opencl/arithm_compare.cl +++ b/modules/ocl/src/opencl/arithm_compare.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_exp.cl b/modules/ocl/src/opencl/arithm_exp.cl index 835bc95..f0a1893 100644 --- a/modules/ocl/src/opencl/arithm_exp.cl +++ b/modules/ocl/src/opencl/arithm_exp.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_flip.cl b/modules/ocl/src/opencl/arithm_flip.cl index 416240b..b9bacd3 100644 --- a/modules/ocl/src/opencl/arithm_flip.cl +++ b/modules/ocl/src/opencl/arithm_flip.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_log.cl b/modules/ocl/src/opencl/arithm_log.cl index fe1b304..ba5f32d 100644 --- a/modules/ocl/src/opencl/arithm_log.cl +++ b/modules/ocl/src/opencl/arithm_log.cl @@ -43,9 +43,13 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#endif ////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////LOG///////////////////////////////////////////////////// diff --git a/modules/ocl/src/opencl/arithm_magnitude.cl b/modules/ocl/src/opencl/arithm_magnitude.cl index 7c8cc18..6fd2ac3 100644 --- a/modules/ocl/src/opencl/arithm_magnitude.cl +++ b/modules/ocl/src/opencl/arithm_magnitude.cl @@ -43,9 +43,13 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#endif __kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset, __global float *src2, int src2_step, int src2_offset, diff --git a/modules/ocl/src/opencl/arithm_minMax.cl b/modules/ocl/src/opencl/arithm_minMax.cl index 33a39d8..01db7d0 100644 --- a/modules/ocl/src/opencl/arithm_minMax.cl +++ b/modules/ocl/src/opencl/arithm_minMax.cl @@ -45,7 +45,7 @@ /**************************************PUBLICFUNC*************************************/ -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT #ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable #elif defined (cl_khr_fp64) diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc.cl b/modules/ocl/src/opencl/arithm_minMaxLoc.cl index 076fb06..b80ce2b 100644 --- a/modules/ocl/src/opencl/arithm_minMaxLoc.cl +++ b/modules/ocl/src/opencl/arithm_minMaxLoc.cl @@ -44,8 +44,13 @@ //M*/ /**************************************PUBLICFUNC*************************************/ -#if defined (DOUBLE_SUPPORT) + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif #define RES_TYPE double4 #define CONVERT_RES_TYPE convert_double4 #else diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl index 4d73be9..fbde684 100644 --- a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl +++ b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl @@ -44,8 +44,13 @@ //M*/ /**************************************PUBLICFUNC*************************************/ -#if defined (DOUBLE_SUPPORT) + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif #define RES_TYPE double4 #define CONVERT_RES_TYPE convert_double4 #else diff --git a/modules/ocl/src/opencl/arithm_nonzero.cl b/modules/ocl/src/opencl/arithm_nonzero.cl index fc98257..3180c26 100644 --- a/modules/ocl/src/opencl/arithm_nonzero.cl +++ b/modules/ocl/src/opencl/arithm_nonzero.cl @@ -42,7 +42,7 @@ // the use of this software, even if advised of the possibility of such damage. // -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT #ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable #elif defined (cl_khr_fp64) diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl index 978fd3b..40346b2 100644 --- a/modules/ocl/src/opencl/arithm_phase.cl +++ b/modules/ocl/src/opencl/arithm_phase.cl @@ -44,17 +44,17 @@ // // -#if defined (DOUBLE_SUPPORT) - #ifdef cl_amd_fp64 - #pragma OPENCL EXTENSION cl_amd_fp64:enable - #elif defined (cl_khr_fp64) - #pragma OPENCL EXTENSION cl_khr_fp64:enable - #endif - #define CV_PI M_PI - #define CV_2PI (2 * CV_PI) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#define CV_PI M_PI +#define CV_2PI (2 * CV_PI) #else - #define CV_PI M_PI_F - #define CV_2PI (2 * CV_PI) +#define CV_PI M_PI_F +#define CV_2PI (2 * CV_PI) #endif /**************************************phase inradians**************************************/ @@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, double data1 = src1[src1_index]; double data2 = src2[src2_index]; - double tmp = atan2(src2[src2_index], src1[src1_index]); + double tmp = atan2(data2, data1); tmp = 180 * tmp / CV_PI; if (tmp < 0) diff --git a/modules/ocl/src/opencl/arithm_polarToCart.cl b/modules/ocl/src/opencl/arithm_polarToCart.cl index 9e2e860..f3ec311 100644 --- a/modules/ocl/src/opencl/arithm_polarToCart.cl +++ b/modules/ocl/src/opencl/arithm_polarToCart.cl @@ -44,14 +44,14 @@ //M*/ #ifdef DOUBLE_SUPPORT - #ifdef cl_amd_fp64 - #pragma OPENCL EXTENSION cl_amd_fp64:enable - #elif defined (cl_khr_fp64) - #pragma OPENCL EXTENSION cl_khr_fp64:enable - #endif - #define CV_PI M_PI +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#define CV_PI M_PI #else - #define CV_PI M_PI_F +#define CV_PI M_PI_F #endif ///////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl index 1704f6b..36a22b6 100644 --- a/modules/ocl/src/opencl/arithm_pow.cl +++ b/modules/ocl/src/opencl/arithm_pow.cl @@ -43,21 +43,22 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable -typedef double F; -typedef double4 F4; -#define convert_F4 convert_double4; +#endif +#define F double #else -typedef float F; -typedef float4 F4; -#define convert_F4 convert_float4; +#define F float #endif + /************************************** pow **************************************/ + __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset, __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1, - F p) + int rows, int cols, int dst_step1, F p) { int x = get_global_id(0); @@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse *((__global float *)((__global char *)dst + dst_index)) = tmp; } - } #if defined (DOUBLE_SUPPORT) + __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset, __global double *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1, - F p) + int rows, int cols, int dst_step1, F p) { int x = get_global_id(0); @@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data)))); *((__global double *)((__global char *)dst + dst_index)) = tmp; } - } + #endif diff --git a/modules/ocl/src/opencl/arithm_setidentity.cl b/modules/ocl/src/opencl/arithm_setidentity.cl index fb684c3..0ead5b0 100644 --- a/modules/ocl/src/opencl/arithm_setidentity.cl +++ b/modules/ocl/src/opencl/arithm_setidentity.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_sum.cl b/modules/ocl/src/opencl/arithm_sum.cl index 7ada5be..514cf2a 100644 --- a/modules/ocl/src/opencl/arithm_sum.cl +++ b/modules/ocl/src/opencl/arithm_sum.cl @@ -43,11 +43,11 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #endif diff --git a/modules/ocl/src/opencl/arithm_transpose.cl b/modules/ocl/src/opencl/arithm_transpose.cl index bd06a52..8cde654 100644 --- a/modules/ocl/src/opencl/arithm_transpose.cl +++ b/modules/ocl/src/opencl/arithm_transpose.cl @@ -43,7 +43,7 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT #ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable #elif defined (cl_khr_fp64) diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl index a13a30e..06e18c2 100644 --- a/modules/ocl/src/opencl/bgfg_mog.cl +++ b/modules/ocl/src/opencl/bgfg_mog.cl @@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar) { return fmax(var + learningRate * (diff * diff - var), minVar); } + #else + #define T_FRAME uchar4 #define T_MEAN_VAR float4 #define CONVERT_TYPE convert_uchar4_sat #define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f) + inline float4 cvt(const uchar4 val) { float4 result; @@ -93,6 +96,14 @@ inline float sum(const float4 val) return (val.x + val.y + val.z); } +static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step) +{ + float4 val = ptr[(k * rows + y) * ptr_step + x]; + ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x]; + ptr[((k + 1) * rows + y) * ptr_step + x] = val; +} + + static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar) { float4 result; @@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl result.w = 0.0f; return result; } + #endif typedef struct @@ -114,7 +126,7 @@ typedef struct float c_varMax; float c_tau; uchar c_shadowVal; -}con_srtuct_t; +} con_srtuct_t; static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step) { @@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste ptr[((k + 1) * rows + y) * ptr_step + x] = val; } -static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step) -{ - float4 val = ptr[(k * rows + y) * ptr_step + x]; - ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x]; - ptr[((k + 1) * rows + y) * ptr_step + x] = val; -} - __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask, __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step, diff --git a/modules/ocl/src/opencl/blend_linear.cl b/modules/ocl/src/opencl/blend_linear.cl index 06a51f2..bc7aa46 100644 --- a/modules/ocl/src/opencl/blend_linear.cl +++ b/modules/ocl/src/opencl/blend_linear.cl @@ -43,7 +43,7 @@ // //M*/ -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT #ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable #elif defined (cl_khr_fp64) diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl index ce0d86e..a005284 100644 --- a/modules/ocl/src/opencl/brute_force_match.cl +++ b/modules/ocl/src/opencl/brute_force_match.cl @@ -63,14 +63,6 @@ #define DIST_TYPE 0 #endif -//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel -static int bit1Count(int v) -{ - v = v - ((v >> 1) & 0x55555555); // reuse input as temporary - v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp - return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count -} - // dirty fix for non-template support #if (DIST_TYPE == 0) // L1Dist # ifdef T_FLOAT @@ -89,6 +81,13 @@ typedef float value_type; typedef float result_type; #define DIST_RES(x) sqrt(x) #elif (DIST_TYPE == 2) // Hamming +//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel +static int bit1Count(int v) +{ + v = v - ((v >> 1) & 0x55555555); // reuse input as temporary + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp + return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count +} #define DIST(x, y) bit1Count( (x) ^ (y) ) typedef int value_type; typedef int result_type; diff --git a/modules/ocl/src/opencl/convertC3C4.cl b/modules/ocl/src/opencl/convertC3C4.cl index b3e699d..4c519fd 100644 --- a/modules/ocl/src/opencl/convertC3C4.cl +++ b/modules/ocl/src/opencl/convertC3C4.cl @@ -33,12 +33,17 @@ // // -#if defined (DOUBLE_SUPPORT) +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#endif -__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows, - int dstStep_in_piexl,int pixel_end) +__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, + int cols, int rows, + int dstStep_in_piexl, int pixel_end) { int id = get_global_id(0); int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2)); @@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY dst[addr.y] = outpix1; } else if(outx.x>1 #ifndef WAVE_SIZE diff --git a/modules/ocl/src/opencl/split_mat.cl b/modules/ocl/src/opencl/split_mat.cl index b9aa048..b52b3c2 100644 --- a/modules/ocl/src/opencl/split_mat.cl +++ b/modules/ocl/src/opencl/split_mat.cl @@ -38,9 +38,14 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#if defined (DOUBLE_SUPPORT) + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#endif #if DATA_DEPTH == 0 #define BASE_TYPE uchar diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl index 207bf00..0edccdb 100644 --- a/modules/ocl/src/opencl/stereobm.cl +++ b/modules/ocl/src/opencl/stereobm.cl @@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz) { unsigned int cache = cols[0]; -#pragma unroll for(int i = 1; i <= winsz; i++) cache += cols[i]; diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl index ec02f82..4b5864f 100644 --- a/modules/ocl/src/opencl/stereobp.cl +++ b/modules/ocl/src/opencl/stereobp.cl @@ -45,13 +45,11 @@ //M*/ #if defined (DOUBLE_SUPPORT) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif - #endif #ifdef T_FLOAT diff --git a/modules/ocl/src/opencl/stereocsbp.cl b/modules/ocl/src/opencl/stereocsbp.cl index 13a201c..72c1707 100644 --- a/modules/ocl/src/opencl/stereocsbp.cl +++ b/modules/ocl/src/opencl/stereocsbp.cl @@ -44,19 +44,10 @@ // //M*/ - -#ifndef FLT_MAX -#define FLT_MAX CL_FLT_MAX -#endif - -#ifndef SHRT_MAX -#define SHRT_MAX CL_SHORT_MAX -#endif - - /////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////get_first_k_initial_global////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////// + __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, __global short *selected_disp_pyr, __global short *ctemp, int h, int w, int nr_plane, int cmsg_step1, int cdisp_step1, int cndisp) @@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, } } } + __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_, __global float *selected_disp_pyr, __global float *ctemp, int h, int w, int nr_plane, int cmsg_step1, int cdisp_step1, int cndisp) @@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global float *data_cost_selected_, //////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////get_first_k_initial_local//////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////// + __kernel void get_first_k_initial_local_0(__global short *data_cost_selected_, __global short *selected_disp_pyr, __global short *ctemp,int h, int w, int nr_plane, int cmsg_step1, int cdisp_step1, int cndisp) @@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _ /////////////////////////////////////////////////////////////// /////////////////////// init data cost //////////////////////// /////////////////////////////////////////////////////////////// + inline float compute_3(__global uchar* left, __global uchar* right, float cdata_weight, float cmax_data_term) { @@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right, return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term); } + inline float compute_1(__global uchar* left, __global uchar* right, float cdata_weight, float cmax_data_term) { @@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g } } } + __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright, int h, int w, int level, int channels, int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1, @@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g } } } + //////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////init_data_cost_reduce////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// + __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright, __local float *smem, int level, int rows, int cols, int h, int winsz, int channels, int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth, @@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle /////////////////////////////////////////////////////////////// ////////////////////// compute data cost ////////////////////// /////////////////////////////////////////////////////////////// + __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_, __global uchar *cleft, __global uchar *cright, int h, int w, int level, int nr_plane, int channels, @@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo } } } + __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_, __global uchar *cleft, __global uchar *cright, int h, int w, int level, int nr_plane, int channels, @@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo } } } + //////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////compute_data_cost_reduce////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////// + __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_, __global uchar *cleft, __global uchar *cright,__local float *smem, int level, int rows, int cols, int h, int nr_plane, @@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short } } -static void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new, - __global float *r_new, __global const float *u_cur, __global const float *d_cur, - __global const float *l_cur, __global const float *r_cur, - __global float *data_cost_selected, __global float *disparity_selected_new, - __global float *data_cost_new, __global const float *data_cost_cur, - __global const float *disparity_selected_cur, - int nr_plane, int nr_plane2, - int cdisp_step1, int cdisp_step2) -{ - for(int i = 0; i < nr_plane; i++) - { - float minimum = FLT_MAX; - int id = 0; - - for(int j = 0; j < nr_plane2; j++) - { - float cur = data_cost_new[j * cdisp_step1]; - if(cur < minimum) - { - minimum = cur; - id = j; - } - } - - data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1]; - disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2]; - - u_new[i * cdisp_step1] = u_cur[id * cdisp_step2]; - d_new[i * cdisp_step1] = d_cur[id * cdisp_step2]; - l_new[i * cdisp_step1] = l_cur[id * cdisp_step2]; - r_new[i * cdisp_step1] = r_cur[id * cdisp_step2]; - data_cost_new[id * cdisp_step1] = FLT_MAX; - - } -} __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_, __global short *r_new_, __global short *u_cur_, __global const short *d_cur_, __global const short *l_cur_, __global const short *r_cur_, __global short *ctemp, @@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g cdisp_step1, cdisp_step2); } } + __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_, __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_, __global const float *l_cur_, __global const float *r_cur_, __global float *ctemp, diff --git a/modules/ocl/src/opencl/svm.cl b/modules/ocl/src/opencl/svm.cl index 36ae38e..32b8194 100644 --- a/modules/ocl/src/opencl/svm.cl +++ b/modules/ocl/src/opencl/svm.cl @@ -33,11 +33,12 @@ // the use of this software, even if advised of the possibility of such damage. // // -#if defined (DOUBLE_SUPPORT) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #define TYPE double #else @@ -53,7 +54,6 @@ #else #define POW(X,Y) X #endif -#define FLT_MAX 3.402823466e+38F #define MAX_VAL (FLT_MAX*1e-3) __kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols, @@ -206,4 +206,4 @@ __kernel void svm_rbf(__global float* src, int src_step, __global float* src2, i dst[row * dst_step + col] = temp1; } } -} \ No newline at end of file +} diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl index 2787f00..6111a4a 100644 --- a/modules/ocl/src/opencl/tvl1flow.cl +++ b/modules/ocl/src/opencl/tvl1flow.cl @@ -44,7 +44,7 @@ //M*/ __kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step, -__global float* dx, __global float* dy, int dx_step) + __global float* dx, __global float* dy, int dx_step) { int x = get_global_id(0); int y = get_global_id(1); @@ -53,13 +53,6 @@ __global float* dx, __global float* dy, int dx_step) { int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1); int src_x2 = (x - 1) > 0 ? (x -1) : 0; - - //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2]) - //{ - // printf("y = %d\n", y); - // printf("src_x1 = %d\n", src_x1); - // printf("src_x2 = %d\n", src_x2); - //} dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]); int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1); @@ -97,24 +90,24 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c int u2_offset_x, int u2_offset_y) { - const int x = get_global_id(0); - const int y = get_global_id(1); + int x = get_global_id(0); + int y = get_global_id(1); if(x < I0_col&&y < I0_row) { - //const float u1Val = u1(y, x); - const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; - //const float u2Val = u2(y, x); - const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + //float u1Val = u1(y, x); + float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + //float u2Val = u2(y, x); + float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; - const float wx = x + u1Val; - const float wy = y + u2Val; + float wx = x + u1Val; + float wy = y + u2Val; - const int xmin = ceil(wx - 2.0f); - const int xmax = floor(wx + 2.0f); + int xmin = ceil(wx - 2.0f); + int xmax = floor(wx + 2.0f); - const int ymin = ceil(wy - 2.0f); - const int ymax = floor(wy + 2.0f); + int ymin = ceil(wy - 2.0f); + int ymax = floor(wy + 2.0f); float sum = 0.0f; float sumx = 0.0f; @@ -126,7 +119,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c { for (int cx = xmin; cx <= xmax; ++cx) { - const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); //sum += w * tex2D(tex_I1 , cx, cy); int2 cood = (int2)(cx, cy); @@ -140,30 +133,30 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c } } - const float coeff = 1.0f / wsum; + float coeff = 1.0f / wsum; - const float I1wVal = sum * coeff; - const float I1wxVal = sumx * coeff; - const float I1wyVal = sumy * coeff; + float I1wVal = sum * coeff; + float I1wxVal = sumx * coeff; + float I1wyVal = sumy * coeff; I1w[y * I1w_step + x] = I1wVal; I1wx[y * I1w_step + x] = I1wxVal; I1wy[y * I1w_step + x] = I1wyVal; - const float Ix2 = I1wxVal * I1wxVal; - const float Iy2 = I1wyVal * I1wyVal; + float Ix2 = I1wxVal * I1wxVal; + float Iy2 = I1wyVal * I1wyVal; // store the |Grad(I1)|^2 grad[y * I1w_step + x] = Ix2 + Iy2; // compute the constant part of the rho function - const float I0Val = I0[y * I0_step + x]; + float I0Val = I0[y * I0_step + x]; rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; } } -static float readImage(__global const float *image, const int x, const int y, const int rows, const int cols, const int elemCntPerRow) +static float readImage(__global float *image, int x, int y, int rows, int cols, int elemCntPerRow) { int i0 = clamp(x, 0, cols - 1); int j0 = clamp(y, 0, rows - 1); @@ -185,24 +178,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I1_step, int I1x_step) { - const int x = get_global_id(0); - const int y = get_global_id(1); + int x = get_global_id(0); + int y = get_global_id(1); if(x < I0_col&&y < I0_row) { - //const float u1Val = u1(y, x); - const float u1Val = u1[y * u1_step + x]; - //const float u2Val = u2(y, x); - const float u2Val = u2[y * u2_step + x]; + //float u1Val = u1(y, x); + float u1Val = u1[y * u1_step + x]; + //float u2Val = u2(y, x); + float u2Val = u2[y * u2_step + x]; - const float wx = x + u1Val; - const float wy = y + u2Val; + float wx = x + u1Val; + float wy = y + u2Val; - const int xmin = ceil(wx - 2.0f); - const int xmax = floor(wx + 2.0f); + int xmin = ceil(wx - 2.0f); + int xmax = floor(wx + 2.0f); - const int ymin = ceil(wy - 2.0f); - const int ymax = floor(wy + 2.0f); + int ymin = ceil(wy - 2.0f); + int ymax = floor(wy + 2.0f); float sum = 0.0f; float sumx = 0.0f; @@ -213,7 +206,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, { for (int cx = xmin; cx <= xmax; ++cx) { - const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); int2 cood = (int2)(cx, cy); sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step); @@ -223,24 +216,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, } } - const float coeff = 1.0f / wsum; + float coeff = 1.0f / wsum; - const float I1wVal = sum * coeff; - const float I1wxVal = sumx * coeff; - const float I1wyVal = sumy * coeff; + float I1wVal = sum * coeff; + float I1wxVal = sumx * coeff; + float I1wyVal = sumy * coeff; I1w[y * I1w_step + x] = I1wVal; I1wx[y * I1w_step + x] = I1wxVal; I1wy[y * I1w_step + x] = I1wyVal; - const float Ix2 = I1wxVal * I1wxVal; - const float Iy2 = I1wyVal * I1wyVal; + float Ix2 = I1wxVal * I1wxVal; + float Iy2 = I1wyVal * I1wyVal; // store the |Grad(I1)|^2 grad[y * I1w_step + x] = Ix2 + Iy2; // compute the constant part of the rho function - const float I0Val = I0[y * I0_step + x]; + float I0Val = I0[y * I0_step + x]; rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; } @@ -253,38 +246,35 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, __global float* p12, __global float* p21, __global float* p22, - const float taut, + float taut, int u2_step, int u1_offset_x, int u1_offset_y, int u2_offset_x, int u2_offset_y) { - - //const int x = blockIdx.x * blockDim.x + threadIdx.x; - //const int y = blockIdx.y * blockDim.y + threadIdx.y; - const int x = get_global_id(0); - const int y = get_global_id(1); + int x = get_global_id(0); + int y = get_global_id(1); if(x < u1_col && y < u1_row) { int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); - const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); - const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); - const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); - const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; - const float g1 = hypot(u1x, u1y); - const float g2 = hypot(u2x, u2y); + float g1 = hypot(u1x, u1y); + float g2 = hypot(u2x, u2y); - const float ng1 = 1.0f + taut * g1; - const float ng2 = 1.0f + taut * g2; + float ng1 = 1.0f + taut * g1; + float ng2 = 1.0f + taut * g2; p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1; p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1; @@ -299,8 +289,8 @@ static float divergence(__global const float* v1, __global const float* v2, int if (x > 0 && y > 0) { - const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1]; - const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x]; + float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1]; + float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x]; return v1x + v2y; } else @@ -328,30 +318,25 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx __global const float* p22, /*int p22_step,*/ __global float* u1, int u1_step, __global float* u2, - __global float* error, const float l_t, const float theta, int u2_step, + __global float* error, float l_t, float theta, int u2_step, int u1_offset_x, int u1_offset_y, int u2_offset_x, int u2_offset_y, char calc_error) { - - //const int x = blockIdx.x * blockDim.x + threadIdx.x; - //const int y = blockIdx.y * blockDim.y + threadIdx.y; - int x = get_global_id(0); int y = get_global_id(1); - if(x < I1wx_col && y < I1wx_row) { - const float I1wxVal = I1wx[y * I1wx_step + x]; - const float I1wyVal = I1wy[y * I1wx_step + x]; - const float gradVal = grad[y * I1wx_step + x]; - const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; - const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + float I1wxVal = I1wx[y * I1wx_step + x]; + float I1wyVal = I1wy[y * I1wx_step + x]; + float gradVal = grad[y * I1wx_step + x]; + float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; - const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); + float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); // estimate the values of the variable (v1, v2) (thresholding operator TH) @@ -370,31 +355,31 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx } else if (gradVal > 1.192092896e-07f) { - const float fi = -rho / gradVal; + float fi = -rho / gradVal; d1 = fi * I1wxVal; d2 = fi * I1wyVal; } - const float v1 = u1OldVal + d1; - const float v2 = u2OldVal + d2; + float v1 = u1OldVal + d1; + float v2 = u2OldVal + d2; // compute the divergence of the dual variable (p1, p2) - const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step); - const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step); + float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step); + float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step); // estimate the values of the optical flow (u1, u2) - const float u1NewVal = v1 + theta * div_p1; - const float u2NewVal = v2 + theta * div_p2; + float u1NewVal = v1 + theta * div_p1; + float u2NewVal = v2 + theta * div_p2; u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; if(calc_error) { - const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); - const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); error[y * I1wx_step + x] = n1 + n2; } } -- 2.7.4