--- /dev/null
-
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/vec_traits.hpp"
+#include "opencv2/core/cuda/vec_math.hpp"
+#include "opencv2/core/cuda/limits.hpp"
+
+namespace cv { namespace cuda { namespace device
+{
+ namespace mog2
+ {
+ ///////////////////////////////////////////////////////////////
+ // Utility
+
+ __device__ __forceinline__ float cvt(uchar val)
+ {
+ return val;
+ }
+ __device__ __forceinline__ float3 cvt(const uchar3& val)
+ {
+ return make_float3(val.x, val.y, val.z);
+ }
+ __device__ __forceinline__ float4 cvt(const uchar4& val)
+ {
+ return make_float4(val.x, val.y, val.z, val.w);
+ }
+
+ __device__ __forceinline__ float sqr(float val)
+ {
+ return val * val;
+ }
+ __device__ __forceinline__ float sqr(const float3& val)
+ {
+ return val.x * val.x + val.y * val.y + val.z * val.z;
+ }
+ __device__ __forceinline__ float sqr(const float4& val)
+ {
+ return val.x * val.x + val.y * val.y + val.z * val.z;
+ }
+
+ __device__ __forceinline__ float sum(float val)
+ {
+ return val;
+ }
+ __device__ __forceinline__ float sum(const float3& val)
+ {
+ return val.x + val.y + val.z;
+ }
+ __device__ __forceinline__ float sum(const float4& val)
+ {
+ return val.x + val.y + val.z;
+ }
+
+ template <class Ptr2D>
+ __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
+ {
+ typename Ptr2D::elem_type val = ptr(k * rows + y, x);
+ ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
+ ptr((k + 1) * rows + y, x) = val;
+ }
+
+ ///////////////////////////////////////////////////////////////
+ // MOG2
+
+ __constant__ int c_nmixtures;
+ __constant__ float c_Tb;
+ __constant__ float c_TB;
+ __constant__ float c_Tg;
+ __constant__ float c_varInit;
+ __constant__ float c_varMin;
+ __constant__ float c_varMax;
+ __constant__ float c_tau;
+ __constant__ unsigned char c_shadowVal;
+
+ void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
+ {
+ varMin = ::fminf(varMin, varMax);
+ varMax = ::fmaxf(varMin, varMax);
+
+ cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
+ cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
+ cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
+ cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
+ cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
+ cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
+ cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
+ cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
+ cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
+ }
+
+ template <bool detectShadows, typename SrcT, typename WorkT>
+ __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
+ PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
+ const float alphaT, const float alpha1, const float prune)
+ {
+ const int x = blockIdx.x * blockDim.x + threadIdx.x;
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (x >= frame.cols || y >= frame.rows)
+ return;
+
+ WorkT pix = cvt(frame(y, x));
+
+ //calculate distances to the modes (+ sort)
+ //here we need to go in descending order!!!
+
+ bool background = false; // true - the pixel classified as background
+
+ //internal:
+
+ bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+ int nmodes = modesUsed(y, x);
+ int nNewModes = nmodes; //current number of modes in GMM
+
+ float totalWeight = 0.0f;
+
+ //go through all modes
+
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+ //need only weight if fit is found
+ float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
- gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value
++ int swap_count = 0;
+ //fit not found yet
+ if (!fitsPDF)
+ {
+ //check if it belongs to some of the remaining modes
+ float var = gmm_variance(mode * frame.rows + y, x);
+
+ WorkT mean = gmm_mean(mode * frame.rows + y, x);
+
+ //calculate difference and distance
+ WorkT diff = mean - pix;
+ float dist2 = sqr(diff);
+
+ //background? - Tb - usually larger than Tg
+ if (totalWeight < c_TB && dist2 < c_Tb * var)
+ background = true;
+
+ //check fit
+ if (dist2 < c_Tg * var)
+ {
+ //belongs to the mode
+ fitsPDF = true;
+
+ //update distribution
+
+ //update weight
+ weight += alphaT;
+ float k = alphaT / weight;
+
+ //update mean
+ gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
+
+ //update variance
+ float varnew = var + k * (dist2 - var);
+
+ //limit the variance
+ varnew = ::fmaxf(varnew, c_varMin);
+ varnew = ::fminf(varnew, c_varMax);
+
+ gmm_variance(mode * frame.rows + y, x) = varnew;
+
+ //sort
+ //all other weights are at the same place and
+ //only the matched (iModes) is higher -> just find the new place for it
+
+ for (int i = mode; i > 0; --i)
+ {
+ //check one up
+ if (weight < gmm_weight((i - 1) * frame.rows + y, x))
+ break;
+
++ swap_count++;
+ //swap one up
+ swap(gmm_weight, x, y, i - 1, frame.rows);
+ swap(gmm_variance, x, y, i - 1, frame.rows);
+ swap(gmm_mean, x, y, i - 1, frame.rows);
+ }
+
+ //belongs to the mode - bFitsPDF becomes 1
+ }
+ } // !fitsPDF
+
+ //check prune
+ if (weight < -prune)
+ {
+ weight = 0.0f;
+ nmodes--;
+ }
+
++ gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
+ totalWeight += weight;
+ }
+
+ //renormalize weights
+
+ totalWeight = 1.f / totalWeight;
+ for (int mode = 0; mode < nmodes; ++mode)
+ gmm_weight(mode * frame.rows + y, x) *= totalWeight;
+
+ nmodes = nNewModes;
+
+ //make new mode if needed and exit
+
+ if (!fitsPDF)
+ {
+ // replace the weakest or add a new one
+ int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
+
+ if (nmodes == 1)
+ gmm_weight(mode * frame.rows + y, x) = 1.f;
+ else
+ {
+ gmm_weight(mode * frame.rows + y, x) = alphaT;
+
+ // renormalize all other weights
+
+ for (int i = 0; i < nmodes - 1; ++i)
+ gmm_weight(i * frame.rows + y, x) *= alpha1;
+ }
+
+ // init
+
+ gmm_mean(mode * frame.rows + y, x) = pix;
+ gmm_variance(mode * frame.rows + y, x) = c_varInit;
+
+ //sort
+ //find the new place for it
+
+ for (int i = nmodes - 1; i > 0; --i)
+ {
+ // check one up
+ if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
+ break;
+
+ //swap one up
+ swap(gmm_weight, x, y, i - 1, frame.rows);
+ swap(gmm_variance, x, y, i - 1, frame.rows);
+ swap(gmm_mean, x, y, i - 1, frame.rows);
+ }
+ }
+
+ //set the number of modes
+ modesUsed(y, x) = nmodes;
+
+ bool isShadow = false;
+ if (detectShadows && !background)
+ {
+ float tWeight = 0.0f;
+
+ // check all the components marked as background:
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+ WorkT mean = gmm_mean(mode * frame.rows + y, x);
+
+ WorkT pix_mean = pix * mean;
+
+ float numerator = sum(pix_mean);
+ float denominator = sqr(mean);
+
+ // no division by zero allowed
+ if (denominator == 0)
+ break;
+
+ // if tau < a < 1 then also check the color distortion
+ if (numerator <= denominator && numerator >= c_tau * denominator)
+ {
+ float a = numerator / denominator;
+
+ WorkT dD = a * mean - pix;
+
+ if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
+ {
+ isShadow = true;
+ break;
+ }
+ };
+
+ tWeight += gmm_weight(mode * frame.rows + y, x);
+ if (tWeight > c_TB)
+ break;
+ }
+ }
+
+ fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
+ }
+
+ template <typename SrcT, typename WorkT>
+ void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
+ float alphaT, float prune, bool detectShadows, cudaStream_t stream)
+ {
+ dim3 block(32, 8);
+ dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
+
+ const float alpha1 = 1.0f - alphaT;
+
+ if (detectShadows)
+ {
+ cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
+
+ mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
+ weight, variance, (PtrStepSz<WorkT>) mean,
+ alphaT, alpha1, prune);
+ }
+ else
+ {
+ cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
+
+ mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
+ weight, variance, (PtrStepSz<WorkT>) mean,
+ alphaT, alpha1, prune);
+ }
+
+ cudaSafeCall( cudaGetLastError() );
+
+ if (stream == 0)
+ cudaSafeCall( cudaDeviceSynchronize() );
+ }
+
+ void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
+ float alphaT, float prune, bool detectShadows, cudaStream_t stream)
+ {
+ typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
+
+ static const func_t funcs[] =
+ {
+ 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
+ };
+
+ funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
+ }
+
+ template <typename WorkT, typename OutT>
+ __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
+ {
+ const int x = blockIdx.x * blockDim.x + threadIdx.x;
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (x >= modesUsed.cols || y >= modesUsed.rows)
+ return;
+
+ int nmodes = modesUsed(y, x);
+
+ WorkT meanVal = VecTraits<WorkT>::all(0.0f);
+ float totalWeight = 0.0f;
+
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+ float weight = gmm_weight(mode * modesUsed.rows + y, x);
+
+ WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
+ meanVal = meanVal + weight * mean;
+
+ totalWeight += weight;
+
+ if(totalWeight > c_TB)
+ break;
+ }
+
+ meanVal = meanVal * (1.f / totalWeight);
+
+ dst(y, x) = saturate_cast<OutT>(meanVal);
+ }
+
+ template <typename WorkT, typename OutT>
+ void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
+ {
+ dim3 block(32, 8);
+ dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
+
+ cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
+
+ getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
+ cudaSafeCall( cudaGetLastError() );
+
+ if (stream == 0)
+ cudaSafeCall( cudaDeviceSynchronize() );
+ }
+
+ void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
+ {
+ typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
+
+ static const func_t funcs[] =
+ {
+ 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
+ };
+
+ funcs[cn](modesUsed, weight, mean, dst, stream);
+ }
+ }
+}}}
+
+
+#endif /* CUDA_DISABLER */
--- /dev/null
-
+#if CN==1
+
+#define T_MEAN float
+#define F_ZERO (0.0f)
+#define cnMode 1
+
+#define frameToMean(a, b) (b) = *(a);
+#define meanToFrame(a, b) *b = convert_uchar_sat(a);
+
+inline float sqr(float val)
+{
+ return val * val;
+}
+
+inline float sum(float val)
+{
+ return val;
+}
+
+#else
+
+#define T_MEAN float4
+#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+#define cnMode 4
+
+#define meanToFrame(a, b)\
+ b[0] = convert_uchar_sat(a.x); \
+ b[1] = convert_uchar_sat(a.y); \
+ b[2] = convert_uchar_sat(a.z);
+
+#define frameToMean(a, b)\
+ b.x = a[0]; \
+ b.y = a[1]; \
+ b.z = a[2]; \
+ b.w = 0.0f;
+
+inline float sqr(const float4 val)
+{
+ return val.x * val.x + val.y * val.y + val.z * val.z;
+}
+
+inline float sum(const float4 val)
+{
+ return (val.x + val.y + val.z);
+}
+
+inline void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+ float4 val = ptr[(k * rows + y) * ptr_step + x];
+ ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+ ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+#endif
+
+inline void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+ float val = ptr[(k * rows + y) * ptr_step + x];
+ ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+ ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+__kernel void mog2_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col, //uchar || uchar3
+ __global uchar* modesUsed, int modesUsed_step, int modesUsed_offset, //int
+ __global uchar* weight, int weight_step, int weight_offset, //float
+ __global uchar* mean, int mean_step, int mean_offset, //T_MEAN=float || float4
+ __global uchar* variance, int var_step, int var_offset, //float
+ __global uchar* fgmask, int fgmask_step, int fgmask_offset, //int
+ float alphaT, float alpha1, float prune,
+ int detectShadows_flag,
+ float c_Tb, float c_TB, float c_Tg, float c_varMin, //constants
+ float c_varMax, float c_varInit, float c_tau, uchar c_shadowVal)
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ weight_step/= sizeof(float);
+ var_step /= sizeof(float);
+ mean_step /= (sizeof(float)*cnMode);
+
+ if( x < frame_col && y < frame_row)
+ {
+ __global const uchar* _frame = (frame + mad24( y, frame_step, x*CN + frame_offset));
+ T_MEAN pix;
+ frameToMean(_frame, pix);
+
+ bool background = false; // true - the pixel classified as background
+
+ bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+ __global int* _modesUsed = (__global int*)(modesUsed + mad24( y, modesUsed_step, x*(int)(sizeof(int))));
+ int nmodes = _modesUsed[0];
+ int nNewModes = nmodes; //current number of modes in GMM
+
+ float totalWeight = 0.0f;
+
+ __global float* _weight = (__global float*)(weight);
+ __global float* _variance = (__global float*)(variance);
+ __global T_MEAN* _mean = (__global T_MEAN*)(mean);
+
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+
+ float c_weight = alpha1 * _weight[(mode * frame_row + y) * weight_step + x] + prune;
- _weight[(mode * frame_row + y) * weight_step + x] = c_weight; //update weight by the calculated value
++ int swap_count = 0;
+ if (!fitsPDF)
+ {
+ float c_var = _variance[(mode * frame_row + y) * var_step + x];
+
+ T_MEAN c_mean = _mean[(mode * frame_row + y) * mean_step + x];
+
+ T_MEAN diff = c_mean - pix;
+ float dist2 = sqr(diff);
+
+ if (totalWeight < c_TB && dist2 < c_Tb * c_var)
+ background = true;
+
+ if (dist2 < c_Tg * c_var)
+ {
+ fitsPDF = true;
+ c_weight += alphaT;
+ float k = alphaT / c_weight;
+
+ _mean[(mode * frame_row + y) * mean_step + x] = c_mean - k * diff;
+
+ float varnew = c_var + k * (dist2 - c_var);
+ varnew = fmax(varnew, c_varMin);
+ varnew = fmin(varnew, c_varMax);
+
+ _variance[(mode * frame_row + y) * var_step + x] = varnew;
+ for (int i = mode; i > 0; --i)
+ {
+ if (c_weight < _weight[((i - 1) * frame_row + y) * weight_step + x])
+ break;
++ swap_count++;
+ swap(_weight, x, y, i - 1, frame_row, weight_step);
+ swap(_variance, x, y, i - 1, frame_row, var_step);
+ #if (CN==1)
+ swap(_mean, x, y, i - 1, frame_row, mean_step);
+ #else
+ swap4(_mean, x, y, i - 1, frame_row, mean_step);
+ #endif
+ }
+ }
+ } // !fitsPDF
+
+ if (c_weight < -prune)
+ {
+ c_weight = 0.0f;
+ nmodes--;
+ }
+
++ _weight[((mode - swap_count) * frame_row + y) * weight_step + x] = c_weight; //update weight by the calculated value
+ totalWeight += c_weight;
+ }
+
+ totalWeight = 1.f / totalWeight;
+ for (int mode = 0; mode < nmodes; ++mode)
+ _weight[(mode * frame_row + y) * weight_step + x] *= totalWeight;
+
+ nmodes = nNewModes;
+
+ if (!fitsPDF)
+ {
+ int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
+
+ if (nmodes == 1)
+ _weight[(mode * frame_row + y) * weight_step + x] = 1.f;
+ else
+ {
+ _weight[(mode * frame_row + y) * weight_step + x] = alphaT;
+
+ for (int i = 0; i < nmodes - 1; ++i)
+ _weight[(i * frame_row + y) * weight_step + x] *= alpha1;
+ }
+
+ _mean[(mode * frame_row + y) * mean_step + x] = pix;
+ _variance[(mode * frame_row + y) * var_step + x] = c_varInit;
+
+ for (int i = nmodes - 1; i > 0; --i)
+ {
+ if (alphaT < _weight[((i - 1) * frame_row + y) * weight_step + x])
+ break;
+
+ swap(_weight, x, y, i - 1, frame_row, weight_step);
+ swap(_variance, x, y, i - 1, frame_row, var_step);
+ #if (CN==1)
+ swap(_mean, x, y, i - 1, frame_row, mean_step);
+ #else
+ swap4(_mean, x, y, i - 1, frame_row, mean_step);
+ #endif
+ }
+ }
+
+ _modesUsed[0] = nmodes;
+ bool isShadow = false;
+ if (detectShadows_flag && !background)
+ {
+ float tWeight = 0.0f;
+
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+ T_MEAN c_mean = _mean[(mode * frame_row + y) * mean_step + x];
+
+ T_MEAN pix_mean = pix * c_mean;
+
+ float numerator = sum(pix_mean);
+ float denominator = sqr(c_mean);
+
+ if (denominator == 0)
+ break;
+
+ if (numerator <= denominator && numerator >= c_tau * denominator)
+ {
+ float a = numerator / denominator;
+
+ T_MEAN dD = a * c_mean - pix;
+
+ if (sqr(dD) < c_Tb * _variance[(mode * frame_row + y) * var_step + x] * a * a)
+ {
+ isShadow = true;
+ break;
+ }
+ }
+
+ tWeight += _weight[(mode * frame_row + y) * weight_step + x];
+ if (tWeight > c_TB)
+ break;
+ }
+ }
+ __global int* _fgmask = (__global int*)(fgmask + mad24(y, fgmask_step, x*(int)(sizeof(int)) + fgmask_offset));
+ *_fgmask = background ? 0 : isShadow ? c_shadowVal : 255;
+ }
+}
+
+__kernel void getBackgroundImage2_kernel(__global const uchar* modesUsed, int modesUsed_step, int modesUsed_offset, int modesUsed_row, int modesUsed_col,
+ __global const uchar* weight, int weight_step, int weight_offset,
+ __global const uchar* mean, int mean_step, int mean_offset,
+ __global uchar* dst, int dst_step, int dst_offset,
+ float c_TB)
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ if(x < modesUsed_col && y < modesUsed_row)
+ {
+ __global int* _modesUsed = (__global int*)(modesUsed + mad24( y, modesUsed_step, x*(int)(sizeof(int))));
+ int nmodes = _modesUsed[0];
+
+ T_MEAN meanVal = (T_MEAN)F_ZERO;
+
+ float totalWeight = 0.0f;
+
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+ __global const float* _weight = (__global const float*)(weight + mad24(mode * modesUsed_row + y, weight_step, x*(int)(sizeof(float))));
+ float c_weight = _weight[0];
+
+ __global const T_MEAN* _mean = (__global const T_MEAN*)(mean + mad24(mode * modesUsed_row + y, mean_step, x*(int)(sizeof(float))*cnMode));
+ T_MEAN c_mean = _mean[0];
+ meanVal = meanVal + c_weight * c_mean;
+
+ totalWeight += c_weight;
+
+ if(totalWeight > c_TB)
+ break;
+ }
+
+ meanVal = meanVal * (1.f / totalWeight);
+ __global uchar* _dst = dst + y * dst_step + x*CN + dst_offset;
+ meanToFrame(meanVal, _dst);
+ }
+}