1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or bpied warranties, including, but not limited to, the bpied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #if !defined CUDA_DISABLER
45 #include "opencv2/gpu/device/common.hpp"
46 #include "opencv2/gpu/device/vec_traits.hpp"
47 #include "opencv2/gpu/device/vec_math.hpp"
48 #include "opencv2/gpu/device/limits.hpp"
50 namespace cv { namespace gpu { namespace device
54 ///////////////////////////////////////////////////////////////
57 __device__ __forceinline__ float cvt(uchar val)
61 __device__ __forceinline__ float3 cvt(const uchar3& val)
63 return make_float3(val.x, val.y, val.z);
65 __device__ __forceinline__ float4 cvt(const uchar4& val)
67 return make_float4(val.x, val.y, val.z, val.w);
70 __device__ __forceinline__ float sqr(float val)
74 __device__ __forceinline__ float sqr(const float3& val)
76 return val.x * val.x + val.y * val.y + val.z * val.z;
78 __device__ __forceinline__ float sqr(const float4& val)
80 return val.x * val.x + val.y * val.y + val.z * val.z;
83 __device__ __forceinline__ float sum(float val)
87 __device__ __forceinline__ float sum(const float3& val)
89 return val.x + val.y + val.z;
91 __device__ __forceinline__ float sum(const float4& val)
93 return val.x + val.y + val.z;
96 __device__ __forceinline__ float clamp(float var, float learningRate, float diff, float minVar)
98 return ::fmaxf(var + learningRate * (diff * diff - var), minVar);
100 __device__ __forceinline__ float3 clamp(const float3& var, float learningRate, const float3& diff, float minVar)
102 return make_float3(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar),
103 ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar),
104 ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar));
106 __device__ __forceinline__ float4 clamp(const float4& var, float learningRate, const float4& diff, float minVar)
108 return make_float4(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar),
109 ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar),
110 ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar),
114 template <class Ptr2D>
115 __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
117 typename Ptr2D::elem_type val = ptr(k * rows + y, x);
118 ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
119 ptr((k + 1) * rows + y, x) = val;
122 ///////////////////////////////////////////////////////////////
123 // MOG without learning
125 template <typename SrcT, typename WorkT>
126 __global__ void mog_withoutLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask,
127 const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, const PtrStep<WorkT> gmm_var,
128 const int nmixtures, const float varThreshold, const float backgroundRatio)
130 const int x = blockIdx.x * blockDim.x + threadIdx.x;
131 const int y = blockIdx.y * blockDim.y + threadIdx.y;
133 if (x >= frame.cols || y >= frame.rows)
136 WorkT pix = cvt(frame(y, x));
139 int kForeground = -1;
141 for (int k = 0; k < nmixtures; ++k)
143 if (gmm_weight(k * frame.rows + y, x) < numeric_limits<float>::epsilon())
146 WorkT mu = gmm_mean(k * frame.rows + y, x);
147 WorkT var = gmm_var(k * frame.rows + y, x);
149 WorkT diff = pix - mu;
151 if (sqr(diff) < varThreshold * sum(var))
161 for (int k = 0; k < nmixtures; ++k)
163 wsum += gmm_weight(k * frame.rows + y, x);
165 if (wsum > backgroundRatio)
173 fgmask(y, x) = (uchar) (-(kHit < 0 || kHit >= kForeground));
176 template <typename SrcT, typename WorkT>
177 void mog_withoutLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var,
178 int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream)
181 dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
183 cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
185 mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
186 weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
187 nmixtures, varThreshold, backgroundRatio);
189 cudaSafeCall( cudaGetLastError() );
192 cudaSafeCall( cudaDeviceSynchronize() );
195 ///////////////////////////////////////////////////////////////
198 template <typename SrcT, typename WorkT>
199 __global__ void mog_withLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask,
200 PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep<WorkT> gmm_mean, PtrStep<WorkT> gmm_var,
201 const int nmixtures, const float varThreshold, const float backgroundRatio, const float learningRate, const float minVar)
203 const float w0 = 0.05f;
204 const float sk0 = w0 / (30.0f * 0.5f * 2.0f);
205 const float var0 = 30.0f * 0.5f * 30.0f * 0.5f * 4.0f;
207 const int x = blockIdx.x * blockDim.x + threadIdx.x;
208 const int y = blockIdx.y * blockDim.y + threadIdx.y;
210 if (x >= frame.cols || y >= frame.rows)
213 WorkT pix = cvt(frame(y, x));
217 int kForeground = -1;
220 for (; k < nmixtures; ++k)
222 float w = gmm_weight(k * frame.rows + y, x);
225 if (w < numeric_limits<float>::epsilon())
228 WorkT mu = gmm_mean(k * frame.rows + y, x);
229 WorkT var = gmm_var(k * frame.rows + y, x);
231 WorkT diff = pix - mu;
233 if (sqr(diff) < varThreshold * sum(var))
236 float dw = learningRate * (1.0f - w);
238 var = clamp(var, learningRate, diff, minVar);
240 float sortKey_prev = w / ::sqrtf(sum(var));
241 gmm_sortKey(k * frame.rows + y, x) = sortKey_prev;
243 float weight_prev = w + dw;
244 gmm_weight(k * frame.rows + y, x) = weight_prev;
246 WorkT mean_prev = mu + learningRate * diff;
247 gmm_mean(k * frame.rows + y, x) = mean_prev;
249 WorkT var_prev = var;
250 gmm_var(k * frame.rows + y, x) = var_prev;
256 float sortKey_next = gmm_sortKey(k1 * frame.rows + y, x);
257 float weight_next = gmm_weight(k1 * frame.rows + y, x);
258 WorkT mean_next = gmm_mean(k1 * frame.rows + y, x);
259 WorkT var_next = gmm_var(k1 * frame.rows + y, x);
261 for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
263 gmm_sortKey(k1 * frame.rows + y, x) = sortKey_prev;
264 gmm_sortKey((k1 + 1) * frame.rows + y, x) = sortKey_next;
266 gmm_weight(k1 * frame.rows + y, x) = weight_prev;
267 gmm_weight((k1 + 1) * frame.rows + y, x) = weight_next;
269 gmm_mean(k1 * frame.rows + y, x) = mean_prev;
270 gmm_mean((k1 + 1) * frame.rows + y, x) = mean_next;
272 gmm_var(k1 * frame.rows + y, x) = var_prev;
273 gmm_var((k1 + 1) * frame.rows + y, x) = var_next;
275 sortKey_prev = sortKey_next;
276 sortKey_next = k1 > 0 ? gmm_sortKey((k1 - 1) * frame.rows + y, x) : 0.0f;
278 weight_prev = weight_next;
279 weight_next = k1 > 0 ? gmm_weight((k1 - 1) * frame.rows + y, x) : 0.0f;
281 mean_prev = mean_next;
282 mean_next = k1 > 0 ? gmm_mean((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f);
285 var_next = k1 > 0 ? gmm_var((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f);
296 // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one
297 kHit = k = ::min(k, nmixtures - 1);
298 wsum += w0 - gmm_weight(k * frame.rows + y, x);
300 gmm_weight(k * frame.rows + y, x) = w0;
301 gmm_mean(k * frame.rows + y, x) = pix;
302 gmm_var(k * frame.rows + y, x) = VecTraits<WorkT>::all(var0);
303 gmm_sortKey(k * frame.rows + y, x) = sk0;
307 for( ; k < nmixtures; k++)
308 wsum += gmm_weight(k * frame.rows + y, x);
311 float wscale = 1.0f / wsum;
313 for (k = 0; k < nmixtures; ++k)
315 float w = gmm_weight(k * frame.rows + y, x);
318 gmm_weight(k * frame.rows + y, x) = w;
319 gmm_sortKey(k * frame.rows + y, x) *= wscale;
321 if (wsum > backgroundRatio && kForeground < 0)
325 fgmask(y, x) = (uchar)(-(kHit >= kForeground));
328 template <typename SrcT, typename WorkT>
329 void mog_withLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var,
330 int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar,
334 dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
336 cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
338 mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
339 weight, sortKey, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
340 nmixtures, varThreshold, backgroundRatio, learningRate, minVar);
342 cudaSafeCall( cudaGetLastError() );
345 cudaSafeCall( cudaDeviceSynchronize() );
348 ///////////////////////////////////////////////////////////////
351 void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream)
353 typedef void (*withoutLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream);
354 typedef void (*withLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream);
356 static const withoutLearning_t withoutLearning[] =
358 0, mog_withoutLearning_caller<uchar, float>, 0, mog_withoutLearning_caller<uchar3, float3>, mog_withoutLearning_caller<uchar4, float4>
360 static const withLearning_t withLearning[] =
362 0, mog_withLearning_caller<uchar, float>, 0, mog_withLearning_caller<uchar3, float3>, mog_withLearning_caller<uchar4, float4>
365 const float minVar = noiseSigma * noiseSigma;
367 if (learningRate > 0.0f)
368 withLearning[cn](frame, fgmask, weight, sortKey, mean, var, nmixtures, varThreshold, backgroundRatio, learningRate, minVar, stream);
370 withoutLearning[cn](frame, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio, stream);
373 template <typename WorkT, typename OutT>
374 __global__ void getBackgroundImage(const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStepSz<OutT> dst, const int nmixtures, const float backgroundRatio)
376 const int x = blockIdx.x * blockDim.x + threadIdx.x;
377 const int y = blockIdx.y * blockDim.y + threadIdx.y;
379 if (x >= dst.cols || y >= dst.rows)
382 WorkT meanVal = VecTraits<WorkT>::all(0.0f);
383 float totalWeight = 0.0f;
385 for (int mode = 0; mode < nmixtures; ++mode)
387 float weight = gmm_weight(mode * dst.rows + y, x);
389 WorkT mean = gmm_mean(mode * dst.rows + y, x);
390 meanVal = meanVal + weight * mean;
392 totalWeight += weight;
394 if(totalWeight > backgroundRatio)
398 meanVal = meanVal * (1.f / totalWeight);
400 dst(y, x) = saturate_cast<OutT>(meanVal);
403 template <typename WorkT, typename OutT>
404 void getBackgroundImage_caller(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
407 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
409 cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
411 getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst, nmixtures, backgroundRatio);
412 cudaSafeCall( cudaGetLastError() );
415 cudaSafeCall( cudaDeviceSynchronize() );
418 void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
420 typedef void (*func_t)(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
422 static const func_t funcs[] =
424 0, getBackgroundImage_caller<float, uchar>, 0, getBackgroundImage_caller<float3, uchar3>, getBackgroundImage_caller<float4, uchar4>
427 funcs[cn](weight, mean, dst, nmixtures, backgroundRatio, stream);
430 ///////////////////////////////////////////////////////////////
433 __constant__ int c_nmixtures;
434 __constant__ float c_Tb;
435 __constant__ float c_TB;
436 __constant__ float c_Tg;
437 __constant__ float c_varInit;
438 __constant__ float c_varMin;
439 __constant__ float c_varMax;
440 __constant__ float c_tau;
441 __constant__ unsigned char c_shadowVal;
443 void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
445 varMin = ::fminf(varMin, varMax);
446 varMax = ::fmaxf(varMin, varMax);
448 cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
449 cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
450 cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
451 cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
452 cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
453 cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
454 cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
455 cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
456 cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
459 template <bool detectShadows, typename SrcT, typename WorkT>
460 __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
461 PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
462 const float alphaT, const float alpha1, const float prune)
464 const int x = blockIdx.x * blockDim.x + threadIdx.x;
465 const int y = blockIdx.y * blockDim.y + threadIdx.y;
467 if (x >= frame.cols || y >= frame.rows)
470 WorkT pix = cvt(frame(y, x));
472 //calculate distances to the modes (+ sort)
473 //here we need to go in descending order!!!
475 bool background = false; // true - the pixel classified as background
479 bool fitsPDF = false; //if it remains zero a new GMM mode will be added
481 int nmodes = modesUsed(y, x);
482 int nNewModes = nmodes; //current number of modes in GMM
484 float totalWeight = 0.0f;
486 //go through all modes
488 for (int mode = 0; mode < nmodes; ++mode)
490 //need only weight if fit is found
491 float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
496 //check if it belongs to some of the remaining modes
497 float var = gmm_variance(mode * frame.rows + y, x);
499 WorkT mean = gmm_mean(mode * frame.rows + y, x);
501 //calculate difference and distance
502 WorkT diff = mean - pix;
503 float dist2 = sqr(diff);
505 //background? - Tb - usually larger than Tg
506 if (totalWeight < c_TB && dist2 < c_Tb * var)
510 if (dist2 < c_Tg * var)
512 //belongs to the mode
515 //update distribution
519 float k = alphaT / weight;
522 gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
525 float varnew = var + k * (dist2 - var);
528 varnew = ::fmaxf(varnew, c_varMin);
529 varnew = ::fminf(varnew, c_varMax);
531 gmm_variance(mode * frame.rows + y, x) = varnew;
534 //all other weights are at the same place and
535 //only the matched (iModes) is higher -> just find the new place for it
537 for (int i = mode; i > 0; --i)
540 if (weight < gmm_weight((i - 1) * frame.rows + y, x))
544 swap(gmm_weight, x, y, i - 1, frame.rows);
545 swap(gmm_variance, x, y, i - 1, frame.rows);
546 swap(gmm_mean, x, y, i - 1, frame.rows);
549 //belongs to the mode - bFitsPDF becomes 1
560 gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value
561 totalWeight += weight;
564 //renormalize weights
566 totalWeight = 1.f / totalWeight;
567 for (int mode = 0; mode < nmodes; ++mode)
568 gmm_weight(mode * frame.rows + y, x) *= totalWeight;
572 //make new mode if needed and exit
576 // replace the weakest or add a new one
577 int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
580 gmm_weight(mode * frame.rows + y, x) = 1.f;
583 gmm_weight(mode * frame.rows + y, x) = alphaT;
585 // renormalize all other weights
587 for (int i = 0; i < nmodes - 1; ++i)
588 gmm_weight(i * frame.rows + y, x) *= alpha1;
593 gmm_mean(mode * frame.rows + y, x) = pix;
594 gmm_variance(mode * frame.rows + y, x) = c_varInit;
597 //find the new place for it
599 for (int i = nmodes - 1; i > 0; --i)
602 if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
606 swap(gmm_weight, x, y, i - 1, frame.rows);
607 swap(gmm_variance, x, y, i - 1, frame.rows);
608 swap(gmm_mean, x, y, i - 1, frame.rows);
612 //set the number of modes
613 modesUsed(y, x) = nmodes;
615 bool isShadow = false;
616 if (detectShadows && !background)
618 float tWeight = 0.0f;
620 // check all the components marked as background:
621 for (int mode = 0; mode < nmodes; ++mode)
623 WorkT mean = gmm_mean(mode * frame.rows + y, x);
625 WorkT pix_mean = pix * mean;
627 float numerator = sum(pix_mean);
628 float denominator = sqr(mean);
630 // no division by zero allowed
631 if (denominator == 0)
634 // if tau < a < 1 then also check the color distortion
635 if (numerator <= denominator && numerator >= c_tau * denominator)
637 float a = numerator / denominator;
639 WorkT dD = a * mean - pix;
641 if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
648 tWeight += gmm_weight(mode * frame.rows + y, x);
654 fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
657 template <typename SrcT, typename WorkT>
658 void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
659 float alphaT, float prune, bool detectShadows, cudaStream_t stream)
662 dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
664 const float alpha1 = 1.0f - alphaT;
668 cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
670 mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
671 weight, variance, (PtrStepSz<WorkT>) mean,
672 alphaT, alpha1, prune);
676 cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
678 mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
679 weight, variance, (PtrStepSz<WorkT>) mean,
680 alphaT, alpha1, prune);
683 cudaSafeCall( cudaGetLastError() );
686 cudaSafeCall( cudaDeviceSynchronize() );
689 void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
690 float alphaT, float prune, bool detectShadows, cudaStream_t stream)
692 typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
694 static const func_t funcs[] =
696 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
699 funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
702 template <typename WorkT, typename OutT>
703 __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
705 const int x = blockIdx.x * blockDim.x + threadIdx.x;
706 const int y = blockIdx.y * blockDim.y + threadIdx.y;
708 if (x >= modesUsed.cols || y >= modesUsed.rows)
711 int nmodes = modesUsed(y, x);
713 WorkT meanVal = VecTraits<WorkT>::all(0.0f);
714 float totalWeight = 0.0f;
716 for (int mode = 0; mode < nmodes; ++mode)
718 float weight = gmm_weight(mode * modesUsed.rows + y, x);
720 WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
721 meanVal = meanVal + weight * mean;
723 totalWeight += weight;
725 if(totalWeight > c_TB)
729 meanVal = meanVal * (1.f / totalWeight);
731 dst(y, x) = saturate_cast<OutT>(meanVal);
734 template <typename WorkT, typename OutT>
735 void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
738 dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
740 cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
742 getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
743 cudaSafeCall( cudaGetLastError() );
746 cudaSafeCall( cudaDeviceSynchronize() );
749 void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
751 typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
753 static const func_t funcs[] =
755 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
758 funcs[cn](modesUsed, weight, mean, dst, stream);
764 #endif /* CUDA_DISABLER */