modules/gpu/src/cuda/bgfg_mog.cu

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or bpied warranties, including, but not limited to, the bpied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #if !defined CUDA_DISABLER
  44
  45 #include "opencv2/gpu/device/common.hpp"
  46 #include "opencv2/gpu/device/vec_traits.hpp"
  47 #include "opencv2/gpu/device/vec_math.hpp"
  48 #include "opencv2/gpu/device/limits.hpp"
  49
  50 namespace cv { namespace gpu { namespace device
  51 {
  52     namespace mog
  53     {
  54         ///////////////////////////////////////////////////////////////
  55         // Utility
  56
  57         __device__ __forceinline__ float cvt(uchar val)
  58         {
  59             return val;
  60         }
  61         __device__ __forceinline__ float3 cvt(const uchar3& val)
  62         {
  63             return make_float3(val.x, val.y, val.z);
  64         }
  65         __device__ __forceinline__ float4 cvt(const uchar4& val)
  66         {
  67             return make_float4(val.x, val.y, val.z, val.w);
  68         }
  69
  70         __device__ __forceinline__ float sqr(float val)
  71         {
  72             return val * val;
  73         }
  74         __device__ __forceinline__ float sqr(const float3& val)
  75         {
  76             return val.x * val.x + val.y * val.y + val.z * val.z;
  77         }
  78         __device__ __forceinline__ float sqr(const float4& val)
  79         {
  80             return val.x * val.x + val.y * val.y + val.z * val.z;
  81         }
  82
  83         __device__ __forceinline__ float sum(float val)
  84         {
  85             return val;
  86         }
  87         __device__ __forceinline__ float sum(const float3& val)
  88         {
  89             return val.x + val.y + val.z;
  90         }
  91         __device__ __forceinline__ float sum(const float4& val)
  92         {
  93             return val.x + val.y + val.z;
  94         }
  95
  96         __device__ __forceinline__ float clamp(float var, float learningRate, float diff, float minVar)
  97         {
  98              return ::fmaxf(var + learningRate * (diff * diff - var), minVar);
  99         }
 100         __device__ __forceinline__ float3 clamp(const float3& var, float learningRate, const float3& diff, float minVar)
 101         {
 102              return make_float3(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar),
 103                                 ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar),
 104                                 ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar));
 105         }
 106         __device__ __forceinline__ float4 clamp(const float4& var, float learningRate, const float4& diff, float minVar)
 107         {
 108              return make_float4(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar),
 109                                 ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar),
 110                                 ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar),
 111                                 0.0f);
 112         }
 113
 114         template <class Ptr2D>
 115         __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
 116         {
 117             typename Ptr2D::elem_type val = ptr(k * rows + y, x);
 118             ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
 119             ptr((k + 1) * rows + y, x) = val;
 120         }
 121
 122         ///////////////////////////////////////////////////////////////
 123         // MOG without learning
 124
 125         template <typename SrcT, typename WorkT>
 126         __global__ void mog_withoutLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask,
 127                                             const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, const PtrStep<WorkT> gmm_var,
 128                                             const int nmixtures, const float varThreshold, const float backgroundRatio)
 129         {
 130             const int x = blockIdx.x * blockDim.x + threadIdx.x;
 131             const int y = blockIdx.y * blockDim.y + threadIdx.y;
 132
 133             if (x >= frame.cols || y >= frame.rows)
 134                 return;
 135
 136             WorkT pix = cvt(frame(y, x));
 137
 138             int kHit = -1;
 139             int kForeground = -1;
 140
 141             for (int k = 0; k < nmixtures; ++k)
 142             {
 143                 if (gmm_weight(k * frame.rows + y, x) < numeric_limits<float>::epsilon())
 144                     break;
 145
 146                 WorkT mu = gmm_mean(k * frame.rows + y, x);
 147                 WorkT var = gmm_var(k * frame.rows + y, x);
 148
 149                 WorkT diff = pix - mu;
 150
 151                 if (sqr(diff) < varThreshold * sum(var))
 152                 {
 153                     kHit = k;
 154                     break;
 155                 }
 156             }
 157
 158             if (kHit >= 0)
 159             {
 160                 float wsum = 0.0f;
 161                 for (int k = 0; k < nmixtures; ++k)
 162                 {
 163                     wsum += gmm_weight(k * frame.rows + y, x);
 164
 165                     if (wsum > backgroundRatio)
 166                     {
 167                         kForeground = k + 1;
 168                         break;
 169                     }
 170                 }
 171             }
 172
 173             fgmask(y, x) = (uchar) (-(kHit < 0 || kHit >= kForeground));
 174         }
 175
 176         template <typename SrcT, typename WorkT>
 177         void mog_withoutLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var,
 178                                         int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream)
 179         {
 180             dim3 block(32, 8);
 181             dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
 182
 183             cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
 184
 185             mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
 186                                                                          weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
 187                                                                          nmixtures, varThreshold, backgroundRatio);
 188
 189             cudaSafeCall( cudaGetLastError() );
 190
 191             if (stream == 0)
 192                 cudaSafeCall( cudaDeviceSynchronize() );
 193         }
 194
 195         ///////////////////////////////////////////////////////////////
 196         // MOG with learning
 197
 198         template <typename SrcT, typename WorkT>
 199         __global__ void mog_withLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask,
 200                                          PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep<WorkT> gmm_mean, PtrStep<WorkT> gmm_var,
 201                                          const int nmixtures, const float varThreshold, const float backgroundRatio, const float learningRate, const float minVar)
 202         {
 203             const float w0 = 0.05f;
 204             const float sk0 = w0 / (30.0f * 0.5f * 2.0f);
 205             const float var0 = 30.0f * 0.5f * 30.0f * 0.5f * 4.0f;
 206
 207             const int x = blockIdx.x * blockDim.x + threadIdx.x;
 208             const int y = blockIdx.y * blockDim.y + threadIdx.y;
 209
 210             if (x >= frame.cols || y >= frame.rows)
 211                 return;
 212
 213             WorkT pix = cvt(frame(y, x));
 214
 215             float wsum = 0.0f;
 216             int kHit = -1;
 217             int kForeground = -1;
 218
 219             int k = 0;
 220             for (; k < nmixtures; ++k)
 221             {
 222                 float w = gmm_weight(k * frame.rows + y, x);
 223                 wsum += w;
 224
 225                 if (w < numeric_limits<float>::epsilon())
 226                     break;
 227
 228                 WorkT mu = gmm_mean(k * frame.rows + y, x);
 229                 WorkT var = gmm_var(k * frame.rows + y, x);
 230
 231                 WorkT diff = pix - mu;
 232
 233                 if (sqr(diff) < varThreshold * sum(var))
 234                 {
 235                     wsum -= w;
 236                     float dw = learningRate * (1.0f - w);
 237
 238                     var = clamp(var, learningRate, diff, minVar);
 239
 240                     float sortKey_prev = w / ::sqrtf(sum(var));
 241                     gmm_sortKey(k * frame.rows + y, x) = sortKey_prev;
 242
 243                     float weight_prev = w + dw;
 244                     gmm_weight(k * frame.rows + y, x) = weight_prev;
 245
 246                     WorkT mean_prev = mu + learningRate * diff;
 247                     gmm_mean(k * frame.rows + y, x) = mean_prev;
 248
 249                     WorkT var_prev = var;
 250                     gmm_var(k * frame.rows + y, x) = var_prev;
 251
 252                     int k1 = k - 1;
 253
 254                     if (k1 >= 0)
 255                     {
 256                         float sortKey_next = gmm_sortKey(k1 * frame.rows + y, x);
 257                         float weight_next = gmm_weight(k1 * frame.rows + y, x);
 258                         WorkT mean_next = gmm_mean(k1 * frame.rows + y, x);
 259                         WorkT var_next = gmm_var(k1 * frame.rows + y, x);
 260
 261                         for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
 262                         {
 263                             gmm_sortKey(k1 * frame.rows + y, x) = sortKey_prev;
 264                             gmm_sortKey((k1 + 1) * frame.rows + y, x) = sortKey_next;
 265
 266                             gmm_weight(k1 * frame.rows + y, x) = weight_prev;
 267                             gmm_weight((k1 + 1) * frame.rows + y, x) = weight_next;
 268
 269                             gmm_mean(k1 * frame.rows + y, x) = mean_prev;
 270                             gmm_mean((k1 + 1) * frame.rows + y, x) = mean_next;
 271
 272                             gmm_var(k1 * frame.rows + y, x) = var_prev;
 273                             gmm_var((k1 + 1) * frame.rows + y, x) = var_next;
 274
 275                             sortKey_prev = sortKey_next;
 276                             sortKey_next = k1 > 0 ? gmm_sortKey((k1 - 1) * frame.rows + y, x) : 0.0f;
 277
 278                             weight_prev = weight_next;
 279                             weight_next = k1 > 0 ? gmm_weight((k1 - 1) * frame.rows + y, x) : 0.0f;
 280
 281                             mean_prev = mean_next;
 282                             mean_next = k1 > 0 ? gmm_mean((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f);
 283
 284                             var_prev = var_next;
 285                             var_next = k1 > 0 ? gmm_var((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f);
 286                         }
 287                     }
 288
 289                     kHit = k1 + 1;
 290                     break;
 291                 }
 292             }
 293
 294             if (kHit < 0)
 295             {
 296                 // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one
 297                 kHit = k = ::min(k, nmixtures - 1);
 298                 wsum += w0 - gmm_weight(k * frame.rows + y, x);
 299
 300                 gmm_weight(k * frame.rows + y, x) = w0;
 301                 gmm_mean(k * frame.rows + y, x) = pix;
 302                 gmm_var(k * frame.rows + y, x) = VecTraits<WorkT>::all(var0);
 303                 gmm_sortKey(k * frame.rows + y, x) = sk0;
 304             }
 305             else
 306             {
 307                 for( ; k < nmixtures; k++)
 308                     wsum += gmm_weight(k * frame.rows + y, x);
 309             }
 310
 311             float wscale = 1.0f / wsum;
 312             wsum = 0;
 313             for (k = 0; k < nmixtures; ++k)
 314             {
 315                 float w = gmm_weight(k * frame.rows + y, x);
 316                 wsum += w *= wscale;
 317
 318                 gmm_weight(k * frame.rows + y, x) = w;
 319                 gmm_sortKey(k * frame.rows + y, x) *= wscale;
 320
 321                 if (wsum > backgroundRatio && kForeground < 0)
 322                     kForeground = k + 1;
 323             }
 324
 325             fgmask(y, x) = (uchar)(-(kHit >= kForeground));
 326         }
 327
 328         template <typename SrcT, typename WorkT>
 329         void mog_withLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var,
 330                                      int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar,
 331                                      cudaStream_t stream)
 332         {
 333             dim3 block(32, 8);
 334             dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
 335
 336             cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
 337
 338             mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
 339                                                                       weight, sortKey, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
 340                                                                       nmixtures, varThreshold, backgroundRatio, learningRate, minVar);
 341
 342             cudaSafeCall( cudaGetLastError() );
 343
 344             if (stream == 0)
 345                 cudaSafeCall( cudaDeviceSynchronize() );
 346         }
 347
 348         ///////////////////////////////////////////////////////////////
 349         // MOG
 350
 351         void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream)
 352         {
 353             typedef void (*withoutLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream);
 354             typedef void (*withLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream);
 355
 356             static const withoutLearning_t withoutLearning[] =
 357             {
 358                 0, mog_withoutLearning_caller<uchar, float>, 0, mog_withoutLearning_caller<uchar3, float3>, mog_withoutLearning_caller<uchar4, float4>
 359             };
 360             static const withLearning_t withLearning[] =
 361             {
 362                 0, mog_withLearning_caller<uchar, float>, 0, mog_withLearning_caller<uchar3, float3>, mog_withLearning_caller<uchar4, float4>
 363             };
 364
 365             const float minVar = noiseSigma * noiseSigma;
 366
 367             if (learningRate > 0.0f)
 368                 withLearning[cn](frame, fgmask, weight, sortKey, mean, var, nmixtures, varThreshold, backgroundRatio, learningRate, minVar, stream);
 369             else
 370                 withoutLearning[cn](frame, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio, stream);
 371         }
 372
 373         template <typename WorkT, typename OutT>
 374         __global__ void getBackgroundImage(const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStepSz<OutT> dst, const int nmixtures, const float backgroundRatio)
 375         {
 376             const int x = blockIdx.x * blockDim.x + threadIdx.x;
 377             const int y = blockIdx.y * blockDim.y + threadIdx.y;
 378
 379             if (x >= dst.cols || y >= dst.rows)
 380                 return;
 381
 382             WorkT meanVal = VecTraits<WorkT>::all(0.0f);
 383             float totalWeight = 0.0f;
 384
 385             for (int mode = 0; mode < nmixtures; ++mode)
 386             {
 387                 float weight = gmm_weight(mode * dst.rows + y, x);
 388
 389                 WorkT mean = gmm_mean(mode * dst.rows + y, x);
 390                 meanVal = meanVal + weight * mean;
 391
 392                 totalWeight += weight;
 393
 394                 if(totalWeight > backgroundRatio)
 395                     break;
 396             }
 397
 398             meanVal = meanVal * (1.f / totalWeight);
 399
 400             dst(y, x) = saturate_cast<OutT>(meanVal);
 401         }
 402
 403         template <typename WorkT, typename OutT>
 404         void getBackgroundImage_caller(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
 405         {
 406             dim3 block(32, 8);
 407             dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
 408
 409             cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
 410
 411             getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst, nmixtures, backgroundRatio);
 412             cudaSafeCall( cudaGetLastError() );
 413
 414             if (stream == 0)
 415                 cudaSafeCall( cudaDeviceSynchronize() );
 416         }
 417
 418         void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
 419         {
 420             typedef void (*func_t)(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
 421
 422             static const func_t funcs[] =
 423             {
 424                 0, getBackgroundImage_caller<float, uchar>, 0, getBackgroundImage_caller<float3, uchar3>, getBackgroundImage_caller<float4, uchar4>
 425             };
 426
 427             funcs[cn](weight, mean, dst, nmixtures, backgroundRatio, stream);
 428         }
 429
 430         ///////////////////////////////////////////////////////////////
 431         // MOG2
 432
 433         __constant__ int           c_nmixtures;
 434         __constant__ float         c_Tb;
 435         __constant__ float         c_TB;
 436         __constant__ float         c_Tg;
 437         __constant__ float         c_varInit;
 438         __constant__ float         c_varMin;
 439         __constant__ float         c_varMax;
 440         __constant__ float         c_tau;
 441         __constant__ unsigned char c_shadowVal;
 442
 443         void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
 444         {
 445             varMin = ::fminf(varMin, varMax);
 446             varMax = ::fmaxf(varMin, varMax);
 447
 448             cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
 449             cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
 450             cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
 451             cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
 452             cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
 453             cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
 454             cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
 455             cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
 456             cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
 457         }
 458
 459         template <bool detectShadows, typename SrcT, typename WorkT>
 460         __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
 461                              PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
 462                              const float alphaT, const float alpha1, const float prune)
 463         {
 464             const int x = blockIdx.x * blockDim.x + threadIdx.x;
 465             const int y = blockIdx.y * blockDim.y + threadIdx.y;
 466
 467             if (x >= frame.cols || y >= frame.rows)
 468                 return;
 469
 470             WorkT pix = cvt(frame(y, x));
 471
 472             //calculate distances to the modes (+ sort)
 473             //here we need to go in descending order!!!
 474
 475             bool background = false; // true - the pixel classified as background
 476
 477             //internal:
 478
 479             bool fitsPDF = false; //if it remains zero a new GMM mode will be added
 480
 481             int nmodes = modesUsed(y, x);
 482             int nNewModes = nmodes; //current number of modes in GMM
 483
 484             float totalWeight = 0.0f;
 485
 486             //go through all modes
 487
 488             for (int mode = 0; mode < nmodes; ++mode)
 489             {
 490                 //need only weight if fit is found
 491                 float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
 492
 493                 //fit not found yet
 494                 if (!fitsPDF)
 495                 {
 496                     //check if it belongs to some of the remaining modes
 497                     float var = gmm_variance(mode * frame.rows + y, x);
 498
 499                     WorkT mean = gmm_mean(mode * frame.rows + y, x);
 500
 501                     //calculate difference and distance
 502                     WorkT diff = mean - pix;
 503                     float dist2 = sqr(diff);
 504
 505                     //background? - Tb - usually larger than Tg
 506                     if (totalWeight < c_TB && dist2 < c_Tb * var)
 507                         background = true;
 508
 509                     //check fit
 510                     if (dist2 < c_Tg * var)
 511                     {
 512                         //belongs to the mode
 513                         fitsPDF = true;
 514
 515                         //update distribution
 516
 517                         //update weight
 518                         weight += alphaT;
 519                         float k = alphaT / weight;
 520
 521                         //update mean
 522                         gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
 523
 524                         //update variance
 525                         float varnew = var + k * (dist2 - var);
 526
 527                         //limit the variance
 528                         varnew = ::fmaxf(varnew, c_varMin);
 529                         varnew = ::fminf(varnew, c_varMax);
 530
 531                         gmm_variance(mode * frame.rows + y, x) = varnew;
 532
 533                         //sort
 534                         //all other weights are at the same place and
 535                         //only the matched (iModes) is higher -> just find the new place for it
 536
 537                         for (int i = mode; i > 0; --i)
 538                         {
 539                             //check one up
 540                             if (weight < gmm_weight((i - 1) * frame.rows + y, x))
 541                                 break;
 542
 543                             //swap one up
 544                             swap(gmm_weight, x, y, i - 1, frame.rows);
 545                             swap(gmm_variance, x, y, i - 1, frame.rows);
 546                             swap(gmm_mean, x, y, i - 1, frame.rows);
 547                         }
 548
 549                         //belongs to the mode - bFitsPDF becomes 1
 550                     }
 551                 } // !fitsPDF
 552
 553                 //check prune
 554                 if (weight < -prune)
 555                 {
 556                     weight = 0.0;
 557                     nmodes--;
 558                 }
 559
 560                 gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value
 561                 totalWeight += weight;
 562             }
 563
 564             //renormalize weights
 565
 566             totalWeight = 1.f / totalWeight;
 567             for (int mode = 0; mode < nmodes; ++mode)
 568                 gmm_weight(mode * frame.rows + y, x) *= totalWeight;
 569
 570             nmodes = nNewModes;
 571
 572             //make new mode if needed and exit
 573
 574             if (!fitsPDF)
 575             {
 576                 // replace the weakest or add a new one
 577                 int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
 578
 579                 if (nmodes == 1)
 580                     gmm_weight(mode * frame.rows + y, x) = 1.f;
 581                 else
 582                 {
 583                     gmm_weight(mode * frame.rows + y, x) = alphaT;
 584
 585                     // renormalize all other weights
 586
 587                     for (int i = 0; i < nmodes - 1; ++i)
 588                         gmm_weight(i * frame.rows + y, x) *= alpha1;
 589                 }
 590
 591                 // init
 592
 593                 gmm_mean(mode * frame.rows + y, x) = pix;
 594                 gmm_variance(mode * frame.rows + y, x) = c_varInit;
 595
 596                 //sort
 597                 //find the new place for it
 598
 599                 for (int i = nmodes - 1; i > 0; --i)
 600                 {
 601                     // check one up
 602                     if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
 603                         break;
 604
 605                     //swap one up
 606                     swap(gmm_weight, x, y, i - 1, frame.rows);
 607                     swap(gmm_variance, x, y, i - 1, frame.rows);
 608                     swap(gmm_mean, x, y, i - 1, frame.rows);
 609                 }
 610             }
 611
 612             //set the number of modes
 613             modesUsed(y, x) = nmodes;
 614
 615             bool isShadow = false;
 616             if (detectShadows && !background)
 617             {
 618                 float tWeight = 0.0f;
 619
 620                 // check all the components  marked as background:
 621                 for (int mode = 0; mode < nmodes; ++mode)
 622                 {
 623                     WorkT mean = gmm_mean(mode * frame.rows + y, x);
 624
 625                     WorkT pix_mean = pix * mean;
 626
 627                     float numerator = sum(pix_mean);
 628                     float denominator = sqr(mean);
 629
 630                     // no division by zero allowed
 631                     if (denominator == 0)
 632                         break;
 633
 634                     // if tau < a < 1 then also check the color distortion
 635                     if (numerator <= denominator && numerator >= c_tau * denominator)
 636                     {
 637                         float a = numerator / denominator;
 638
 639                         WorkT dD = a * mean - pix;
 640
 641                         if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
 642                         {
 643                             isShadow = true;
 644                             break;
 645                         }
 646                     };
 647
 648                     tWeight += gmm_weight(mode * frame.rows + y, x);
 649                     if (tWeight > c_TB)
 650                         break;
 651                 };
 652             }
 653
 654             fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
 655         }
 656
 657         template <typename SrcT, typename WorkT>
 658         void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
 659                          float alphaT, float prune, bool detectShadows, cudaStream_t stream)
 660         {
 661             dim3 block(32, 8);
 662             dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
 663
 664             const float alpha1 = 1.0f - alphaT;
 665
 666             if (detectShadows)
 667             {
 668                 cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
 669
 670                 mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
 671                                                                     weight, variance, (PtrStepSz<WorkT>) mean,
 672                                                                     alphaT, alpha1, prune);
 673             }
 674             else
 675             {
 676                 cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
 677
 678                 mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
 679                                                                     weight, variance, (PtrStepSz<WorkT>) mean,
 680                                                                     alphaT, alpha1, prune);
 681             }
 682
 683             cudaSafeCall( cudaGetLastError() );
 684
 685             if (stream == 0)
 686                 cudaSafeCall( cudaDeviceSynchronize() );
 687         }
 688
 689         void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
 690                       float alphaT, float prune, bool detectShadows, cudaStream_t stream)
 691         {
 692             typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
 693
 694             static const func_t funcs[] =
 695             {
 696                 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
 697             };
 698
 699             funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
 700         }
 701
 702         template <typename WorkT, typename OutT>
 703         __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
 704         {
 705             const int x = blockIdx.x * blockDim.x + threadIdx.x;
 706             const int y = blockIdx.y * blockDim.y + threadIdx.y;
 707
 708             if (x >= modesUsed.cols || y >= modesUsed.rows)
 709                 return;
 710
 711             int nmodes = modesUsed(y, x);
 712
 713             WorkT meanVal = VecTraits<WorkT>::all(0.0f);
 714             float totalWeight = 0.0f;
 715
 716             for (int mode = 0; mode < nmodes; ++mode)
 717             {
 718                 float weight = gmm_weight(mode * modesUsed.rows + y, x);
 719
 720                 WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
 721                 meanVal = meanVal + weight * mean;
 722
 723                 totalWeight += weight;
 724
 725                 if(totalWeight > c_TB)
 726                     break;
 727             }
 728
 729             meanVal = meanVal * (1.f / totalWeight);
 730
 731             dst(y, x) = saturate_cast<OutT>(meanVal);
 732         }
 733
 734         template <typename WorkT, typename OutT>
 735         void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
 736         {
 737             dim3 block(32, 8);
 738             dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
 739
 740             cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
 741
 742             getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
 743             cudaSafeCall( cudaGetLastError() );
 744
 745             if (stream == 0)
 746                 cudaSafeCall( cudaDeviceSynchronize() );
 747         }
 748
 749         void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
 750         {
 751             typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
 752
 753             static const func_t funcs[] =
 754             {
 755                 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
 756             };
 757
 758             funcs[cn](modesUsed, weight, mean, dst, stream);
 759         }
 760     }
 761 }}}
 762
 763
 764 #endif /* CUDA_DISABLER */