From 4f6f6e8cacfec0cfac430a63a41a4ed62ee70492 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Thu, 26 Dec 2013 21:20:32 +0400
Subject: [PATCH] static function qualifier replaced on inline to enable kernel
 compilation with OpenCL 1.1 embedded profile.

---
 modules/ocl/src/opencl/bgfg_mog.cl      |  8 ++++----
 modules/ocl/src/opencl/kmeans_kernel.cl |  2 +-
 modules/ocl/src/opencl/meanShift.cl     |  2 +-
 modules/ocl/src/opencl/objdetect_hog.cl |  2 +-
 modules/ocl/src/opencl/pyrlk.cl         | 20 ++++++++++----------
 modules/ocl/src/opencl/stereobp.cl      |  4 ++--
 modules/ocl/src/opencl/tvl1flow.cl      |  6 +++---
 7 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
index 06e18c2..6a95316 100644
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -63,7 +63,7 @@ inline float sum(float val)
     return val;
 }
 
-static float clamp1(float var, float learningRate, float diff, float minVar)
+inline float clamp1(float var, float learningRate, float diff, float minVar)
 {
     return fmax(var + learningRate * (diff * diff - var), minVar);
 }
@@ -96,7 +96,7 @@ inline float sum(const float4 val)
     return (val.x + val.y + val.z);
 }
 
-static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+inline void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
 {
     float4 val = ptr[(k * rows + y) * ptr_step + x];
     ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
@@ -104,7 +104,7 @@ static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_s
 }
 
 
-static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
+inline float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
 {
     float4 result;
     result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
@@ -128,7 +128,7 @@ typedef struct
     uchar c_shadowVal;
 } con_srtuct_t;
 
-static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+inline void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
 {
     float val = ptr[(k * rows + y) * ptr_step + x];
     ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
diff --git a/modules/ocl/src/opencl/kmeans_kernel.cl b/modules/ocl/src/opencl/kmeans_kernel.cl
index 244d52c..bb0e9c9 100644
--- a/modules/ocl/src/opencl/kmeans_kernel.cl
+++ b/modules/ocl/src/opencl/kmeans_kernel.cl
@@ -44,7 +44,7 @@
 //
 //M*/
 
-static float distance_(__global const float * center, __global const float * src, int feature_length)
+inline float distance_(__global const float * center, __global const float * src, int feature_length)
 {
     float res = 0;
     float4 v0, v1, v2;
diff --git a/modules/ocl/src/opencl/meanShift.cl b/modules/ocl/src/opencl/meanShift.cl
index ea5060e..3fff473 100644
--- a/modules/ocl/src/opencl/meanShift.cl
+++ b/modules/ocl/src/opencl/meanShift.cl
@@ -46,7 +46,7 @@
 //
 //M*/
 
-static short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
+inline short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
                __global uchar4* in, int in_step, int dst_off, int src_off,
                int cols, int rows, int sp, int sr, int maxIter, float eps)
 {
diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl
index 60d7346..e931e82 100644
--- a/modules/ocl/src/opencl/objdetect_hog.cl
+++ b/modules/ocl/src/opencl/objdetect_hog.cl
@@ -208,7 +208,7 @@ __kernel void normalize_hists_36_kernel(__global float* block_hists,
 //-------------------------------------------------------------
 //  Normalization of histograms via L2Hys_norm
 //
-static float reduce_smem(volatile __local float* smem, int size)
+inline float reduce_smem(volatile __local float* smem, int size)
 {
     unsigned int tid = get_local_id(0);
     float sum = smem[tid];
diff --git a/modules/ocl/src/opencl/pyrlk.cl b/modules/ocl/src/opencl/pyrlk.cl
index 303d268..f34aee9 100644
--- a/modules/ocl/src/opencl/pyrlk.cl
+++ b/modules/ocl/src/opencl/pyrlk.cl
@@ -52,7 +52,7 @@
 #endif
 #ifdef CPU
 
-static void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local float* smem2,  __local float* smem3, int tid)
+inline void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local float* smem2,  __local float* smem3, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
@@ -71,7 +71,7 @@ static void reduce3(float val1, float val2, float val3,  __local float* smem1,
     }
 }
 
-static void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
+inline void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
@@ -88,7 +88,7 @@ static void reduce2(float val1, float val2, volatile __local float* smem1, volat
     }
 }
 
-static void reduce1(float val1, volatile __local float* smem1, int tid)
+inline void reduce1(float val1, volatile __local float* smem1, int tid)
 {
     smem1[tid] = val1;
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -103,7 +103,7 @@ static void reduce1(float val1, volatile __local float* smem1, int tid)
     }
 }
 #else
-static void reduce3(float val1, float val2, float val3,
+inline void reduce3(float val1, float val2, float val3,
              __local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
 {
     smem1[tid] = val1;
@@ -150,7 +150,7 @@ static void reduce3(float val1, float val2, float val3,
     barrier(CLK_LOCAL_MEM_FENCE);
 }
 
-static void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
+inline void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
@@ -189,7 +189,7 @@ static void reduce2(float val1, float val2, __local volatile float* smem1, __loc
     barrier(CLK_LOCAL_MEM_FENCE);
 }
 
-static void reduce1(float val1, __local volatile float* smem1, int tid)
+inline void reduce1(float val1, __local volatile float* smem1, int tid)
 {
     smem1[tid] = val1;
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -225,7 +225,7 @@ static void reduce1(float val1, __local volatile float* smem1, int tid)
 // Image read mode
 __constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
 
-static void SetPatch(image2d_t I, float x, float y,
+inline void SetPatch(image2d_t I, float x, float y,
               float* Pch, float* Dx, float* Dy,
               float* A11, float* A12, float* A22)
 {
@@ -262,7 +262,7 @@ inline void GetError(image2d_t J, const float x, const float y, const float* Pch
     *errval += fabs(diff);
 }
 
-static void SetPatch4(image2d_t I, const float x, const float y,
+inline void SetPatch4(image2d_t I, const float x, const float y,
                float4* Pch, float4* Dx, float4* Dy,
                float* A11, float* A12, float* A22)
 {
@@ -285,7 +285,7 @@ static void SetPatch4(image2d_t I, const float x, const float y,
     *A22 += sqIdx.x + sqIdx.y + sqIdx.z;
 }
 
-static void GetPatch4(image2d_t J, const float x, const float y,
+inline void GetPatch4(image2d_t J, const float x, const float y,
                const float4* Pch, const float4* Dx, const float4* Dy,
                float* b1, float* b2)
 {
@@ -297,7 +297,7 @@ static void GetPatch4(image2d_t J, const float x, const float y,
     *b2 += xdiff.x + xdiff.y + xdiff.z;
 }
 
-static void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
+inline void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
 {
     float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch;
     *errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl
index 4b5864f..5a1bf08 100644
--- a/modules/ocl/src/opencl/stereobp.cl
+++ b/modules/ocl/src/opencl/stereobp.cl
@@ -97,7 +97,7 @@ inline float pix_diff_1(const uchar4 l, __global const uchar *rs)
     return abs((int)(l.x) - *rs);
 }
 
-static float pix_diff_4(const uchar4 l, __global const uchar *rs)
+inline float pix_diff_4(const uchar4 l, __global const uchar *rs)
 {
     uchar4 r;
     r = *((__global uchar4 *)rs);
@@ -233,7 +233,7 @@ __kernel void level_up_message(__global T *src, int src_rows, int src_step,
 ///////////////////////////////////////////////////////////////
 ////////////////////  calc all iterations /////////////////////
 ///////////////////////////////////////////////////////////////
-static void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
+inline void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
               const __global T *dt,
               int u_step, int msg_disp_step, int data_disp_step,
               float4 cmax_disc_term, float4 cdisc_single_jump)
diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl
index 6111a4a..b488e89 100644
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -62,7 +62,7 @@ __kernel void centeredGradientKernel(__global const float* src, int src_col, int
 
 }
 
-static float bicubicCoeff(float x_)
+inline float bicubicCoeff(float x_)
 {
 
     float x = fabs(x_);
@@ -156,7 +156,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
 
 }
 
-static float readImage(__global float *image,  int x,  int y,  int rows,  int cols, int elemCntPerRow)
+inline float readImage(__global float *image,  int x,  int y,  int rows,  int cols, int elemCntPerRow)
 {
     int i0 = clamp(x, 0, cols - 1);
     int j0 = clamp(y, 0, rows - 1);
@@ -284,7 +284,7 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
 
 }
 
-static float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
+inline float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
 {
 
     if (x > 0 && y > 0)
-- 
2.7.4