1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or bpied warranties, including, but not limited to, the bpied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #if !defined CUDA_DISABLER
45 #include "opencv2/gpu/device/common.hpp"
46 #include "opencv2/gpu/device/vec_math.hpp"
47 #include "opencv2/gpu/device/limits.hpp"
48 #include "opencv2/gpu/device/utility.hpp"
49 #include "opencv2/gpu/device/reduce.hpp"
50 #include "opencv2/gpu/device/functional.hpp"
51 #include "fgd_bgfg_common.hpp"
53 using namespace cv::gpu;
54 using namespace cv::gpu::device;
58 ////////////////////////////////////////////////////////////////////////////
61 const unsigned int UINT_BITS = 32U;
62 const int LOG_WARP_SIZE = 5;
63 const int WARP_SIZE = 1 << LOG_WARP_SIZE;
64 #if (__CUDA_ARCH__ < 120)
65 const unsigned int TAG_MASK = (1U << (UINT_BITS - LOG_WARP_SIZE)) - 1U;
68 const int MERGE_THREADBLOCK_SIZE = 256;
70 __device__ __forceinline__ void addByte(unsigned int* s_WarpHist_, unsigned int data, unsigned int threadTag)
72 #if (__CUDA_ARCH__ < 120)
73 volatile unsigned int* s_WarpHist = s_WarpHist_;
77 count = s_WarpHist[data] & TAG_MASK;
78 count = threadTag | (count + 1);
79 s_WarpHist[data] = count;
80 } while (s_WarpHist[data] != count);
82 atomicInc(s_WarpHist_ + data, (unsigned int)(-1));
87 template <typename PT, typename CT>
88 __global__ void calcPartialHistogram(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2)
90 #if (__CUDA_ARCH__ < 200)
91 const int HISTOGRAM_WARP_COUNT = 4;
93 const int HISTOGRAM_WARP_COUNT = 6;
95 const int HISTOGRAM_THREADBLOCK_SIZE = HISTOGRAM_WARP_COUNT * WARP_SIZE;
96 const int HISTOGRAM_THREADBLOCK_MEMORY = HISTOGRAM_WARP_COUNT * HISTOGRAM_BIN_COUNT;
98 //Per-warp subhistogram storage
99 __shared__ unsigned int s_Hist0[HISTOGRAM_THREADBLOCK_MEMORY];
100 __shared__ unsigned int s_Hist1[HISTOGRAM_THREADBLOCK_MEMORY];
101 __shared__ unsigned int s_Hist2[HISTOGRAM_THREADBLOCK_MEMORY];
103 //Clear shared memory storage for current threadblock before processing
105 for (int i = 0; i < (HISTOGRAM_THREADBLOCK_MEMORY / HISTOGRAM_THREADBLOCK_SIZE); ++i)
107 s_Hist0[threadIdx.x + i * HISTOGRAM_THREADBLOCK_SIZE] = 0;
108 s_Hist1[threadIdx.x + i * HISTOGRAM_THREADBLOCK_SIZE] = 0;
109 s_Hist2[threadIdx.x + i * HISTOGRAM_THREADBLOCK_SIZE] = 0;
113 const unsigned int warpId = threadIdx.x >> LOG_WARP_SIZE;
115 unsigned int* s_WarpHist0 = s_Hist0 + warpId * HISTOGRAM_BIN_COUNT;
116 unsigned int* s_WarpHist1 = s_Hist1 + warpId * HISTOGRAM_BIN_COUNT;
117 unsigned int* s_WarpHist2 = s_Hist2 + warpId * HISTOGRAM_BIN_COUNT;
119 const unsigned int tag = threadIdx.x << (UINT_BITS - LOG_WARP_SIZE);
120 const int dataCount = prevFrame.rows * prevFrame.cols;
121 for (unsigned int pos = blockIdx.x * HISTOGRAM_THREADBLOCK_SIZE + threadIdx.x; pos < dataCount; pos += HISTOGRAM_THREADBLOCK_SIZE * PARTIAL_HISTOGRAM_COUNT)
123 const unsigned int y = pos / prevFrame.cols;
124 const unsigned int x = pos % prevFrame.cols;
126 PT prevVal = prevFrame(y, x);
127 CT curVal = curFrame(y, x);
129 int3 diff = make_int3(
130 ::abs(curVal.x - prevVal.x),
131 ::abs(curVal.y - prevVal.y),
132 ::abs(curVal.z - prevVal.z)
135 addByte(s_WarpHist0, diff.x, tag);
136 addByte(s_WarpHist1, diff.y, tag);
137 addByte(s_WarpHist2, diff.z, tag);
141 //Merge per-warp histograms into per-block and write to global memory
142 for (unsigned int bin = threadIdx.x; bin < HISTOGRAM_BIN_COUNT; bin += HISTOGRAM_THREADBLOCK_SIZE)
144 unsigned int sum0 = 0;
145 unsigned int sum1 = 0;
146 unsigned int sum2 = 0;
149 for (int i = 0; i < HISTOGRAM_WARP_COUNT; ++i)
151 #if (__CUDA_ARCH__ < 120)
152 sum0 += s_Hist0[bin + i * HISTOGRAM_BIN_COUNT] & TAG_MASK;
153 sum1 += s_Hist1[bin + i * HISTOGRAM_BIN_COUNT] & TAG_MASK;
154 sum2 += s_Hist2[bin + i * HISTOGRAM_BIN_COUNT] & TAG_MASK;
156 sum0 += s_Hist0[bin + i * HISTOGRAM_BIN_COUNT];
157 sum1 += s_Hist1[bin + i * HISTOGRAM_BIN_COUNT];
158 sum2 += s_Hist2[bin + i * HISTOGRAM_BIN_COUNT];
162 partialBuf0[blockIdx.x * HISTOGRAM_BIN_COUNT + bin] = sum0;
163 partialBuf1[blockIdx.x * HISTOGRAM_BIN_COUNT + bin] = sum1;
164 partialBuf2[blockIdx.x * HISTOGRAM_BIN_COUNT + bin] = sum2;
168 __global__ void mergeHistogram(const unsigned int* partialBuf0, const unsigned int* partialBuf1, const unsigned int* partialBuf2, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2)
170 unsigned int sum0 = 0;
171 unsigned int sum1 = 0;
172 unsigned int sum2 = 0;
175 for (unsigned int i = threadIdx.x; i < PARTIAL_HISTOGRAM_COUNT; i += MERGE_THREADBLOCK_SIZE)
177 sum0 += partialBuf0[blockIdx.x + i * HISTOGRAM_BIN_COUNT];
178 sum1 += partialBuf1[blockIdx.x + i * HISTOGRAM_BIN_COUNT];
179 sum2 += partialBuf2[blockIdx.x + i * HISTOGRAM_BIN_COUNT];
182 __shared__ unsigned int data0[MERGE_THREADBLOCK_SIZE];
183 __shared__ unsigned int data1[MERGE_THREADBLOCK_SIZE];
184 __shared__ unsigned int data2[MERGE_THREADBLOCK_SIZE];
186 plus<unsigned int> op;
187 reduce<MERGE_THREADBLOCK_SIZE>(smem_tuple(data0, data1, data2), thrust::tie(sum0, sum1, sum2), threadIdx.x, thrust::make_tuple(op, op, op));
191 hist0[blockIdx.x] = sum0;
192 hist1[blockIdx.x] = sum1;
193 hist2[blockIdx.x] = sum2;
197 template <typename PT, typename CT>
198 void calcDiffHistogram_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame,
199 unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
200 unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
201 bool cc20, cudaStream_t stream)
203 const int HISTOGRAM_WARP_COUNT = cc20 ? 6 : 4;
204 const int HISTOGRAM_THREADBLOCK_SIZE = HISTOGRAM_WARP_COUNT * WARP_SIZE;
206 calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>(
207 (PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, partialBuf0, partialBuf1, partialBuf2);
208 cudaSafeCall( cudaGetLastError() );
210 mergeHistogram<<<HISTOGRAM_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(partialBuf0, partialBuf1, partialBuf2, hist0, hist1, hist2);
211 cudaSafeCall( cudaGetLastError() );
214 cudaSafeCall( cudaDeviceSynchronize() );
217 template void calcDiffHistogram_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
218 template void calcDiffHistogram_gpu<uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
219 template void calcDiffHistogram_gpu<uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
220 template void calcDiffHistogram_gpu<uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
222 /////////////////////////////////////////////////////////////////////////
223 // calcDiffThreshMask
225 template <typename PT, typename CT>
226 __global__ void calcDiffThreshMask(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame, uchar3 bestThres, PtrStepb changeMask)
228 const int y = blockIdx.y * blockDim.y + threadIdx.y;
229 const int x = blockIdx.x * blockDim.x + threadIdx.x;
231 if (y > prevFrame.rows || x > prevFrame.cols)
234 PT prevVal = prevFrame(y, x);
235 CT curVal = curFrame(y, x);
237 int3 diff = make_int3(
238 ::abs(curVal.x - prevVal.x),
239 ::abs(curVal.y - prevVal.y),
240 ::abs(curVal.z - prevVal.z)
243 if (diff.x > bestThres.x || diff.y > bestThres.y || diff.z > bestThres.z)
244 changeMask(y, x) = 255;
247 template <typename PT, typename CT>
248 void calcDiffThreshMask_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream)
251 dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
253 calcDiffThreshMask<PT, CT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, bestThres, changeMask);
254 cudaSafeCall( cudaGetLastError() );
257 cudaSafeCall( cudaDeviceSynchronize() );
260 template void calcDiffThreshMask_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
261 template void calcDiffThreshMask_gpu<uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
262 template void calcDiffThreshMask_gpu<uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
263 template void calcDiffThreshMask_gpu<uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
265 /////////////////////////////////////////////////////////////////////////
266 // bgfgClassification
268 __constant__ BGPixelStat c_stat;
270 void setBGPixelStat(const BGPixelStat& stat)
272 cudaSafeCall( cudaMemcpyToSymbol(c_stat, &stat, sizeof(BGPixelStat)) );
275 template <typename T> struct Output;
276 template <> struct Output<uchar3>
278 static __device__ __forceinline__ uchar3 make(uchar v0, uchar v1, uchar v2)
280 return make_uchar3(v0, v1, v2);
283 template <> struct Output<uchar4>
285 static __device__ __forceinline__ uchar4 make(uchar v0, uchar v1, uchar v2)
287 return make_uchar4(v0, v1, v2, 255);
291 template <typename PT, typename CT, typename OT>
292 __global__ void bgfgClassification(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame,
293 const PtrStepb Ftd, const PtrStepb Fbd, PtrStepb foreground,
294 int deltaC, int deltaCC, float alpha2, int N1c, int N1cc)
296 const int i = blockIdx.y * blockDim.y + threadIdx.y;
297 const int j = blockIdx.x * blockDim.x + threadIdx.x;
299 if (i > prevFrame.rows || j > prevFrame.cols)
302 if (Fbd(i, j) || Ftd(i, j))
310 // Is it a motion pixel?
313 if (!c_stat.is_trained_dyn_model(i, j))
317 PT prevVal = prevFrame(i, j);
318 CT curVal = curFrame(i, j);
320 // Compare with stored CCt vectors:
321 for (int k = 0; k < N1cc && c_stat.PV_CC(i, j, k) > alpha2; ++k)
323 OT v1 = c_stat.V1_CC<OT>(i, j, k);
324 OT v2 = c_stat.V2_CC<OT>(i, j, k);
326 if (::abs(v1.x - prevVal.x) <= deltaCC &&
327 ::abs(v1.y - prevVal.y) <= deltaCC &&
328 ::abs(v1.z - prevVal.z) <= deltaCC &&
329 ::abs(v2.x - curVal.x) <= deltaCC &&
330 ::abs(v2.y - curVal.y) <= deltaCC &&
331 ::abs(v2.z - curVal.z) <= deltaCC)
333 Pv += c_stat.PV_CC(i, j, k);
334 Pvb += c_stat.PVB_CC(i, j, k);
338 Pb = c_stat.Pbcc(i, j);
339 if (2 * Pvb * Pb <= Pv)
343 else if(c_stat.is_trained_st_model(i, j))
345 CT curVal = curFrame(i, j);
347 // Compare with stored Ct vectors:
348 for (int k = 0; k < N1c && c_stat.PV_C(i, j, k) > alpha2; ++k)
350 OT v = c_stat.V_C<OT>(i, j, k);
352 if (::abs(v.x - curVal.x) <= deltaC &&
353 ::abs(v.y - curVal.y) <= deltaC &&
354 ::abs(v.z - curVal.z) <= deltaC)
356 Pv += c_stat.PV_C(i, j, k);
357 Pvb += c_stat.PVB_C(i, j, k);
360 Pb = c_stat.Pbc(i, j);
361 if (2 * Pvb * Pb <= Pv)
365 // Update foreground:
366 foreground(i, j) = static_cast<uchar>(val);
367 } // end if( change detection...
370 template <typename PT, typename CT, typename OT>
371 void bgfgClassification_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground,
372 int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream)
375 dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
377 cudaSafeCall( cudaFuncSetCacheConfig(bgfgClassification<PT, CT, OT>, cudaFuncCachePreferL1) );
379 bgfgClassification<PT, CT, OT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame,
380 Ftd, Fbd, foreground,
381 deltaC, deltaCC, alpha2, N1c, N1cc);
382 cudaSafeCall( cudaGetLastError() );
385 cudaSafeCall( cudaDeviceSynchronize() );
388 template void bgfgClassification_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
389 template void bgfgClassification_gpu<uchar3, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
390 template void bgfgClassification_gpu<uchar3, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
391 template void bgfgClassification_gpu<uchar3, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
392 template void bgfgClassification_gpu<uchar4, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
393 template void bgfgClassification_gpu<uchar4, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
394 template void bgfgClassification_gpu<uchar4, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
395 template void bgfgClassification_gpu<uchar4, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
397 ////////////////////////////////////////////////////////////////////////////
398 // updateBackgroundModel
400 template <typename PT, typename CT, typename OT, class PrevFramePtr2D, class CurFramePtr2D, class FtdPtr2D, class FbdPtr2D>
401 __global__ void updateBackgroundModel(int cols, int rows, const PrevFramePtr2D prevFrame, const CurFramePtr2D curFrame, const FtdPtr2D Ftd, const FbdPtr2D Fbd,
402 PtrStepb foreground, PtrStep<OT> background,
403 int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T)
405 const int i = blockIdx.y * blockDim.y + threadIdx.y;
406 const int j = blockIdx.x * blockDim.x + threadIdx.x;
408 if (i > rows || j > cols)
411 const float MIN_PV = 1e-10f;
413 const uchar is_trained_dyn_model = c_stat.is_trained_dyn_model(i, j);
414 if (Ftd(i, j) || !is_trained_dyn_model)
416 const float alpha = is_trained_dyn_model ? alpha2 : alpha3;
418 float Pbcc = c_stat.Pbcc(i, j);
421 Pbcc *= (1.0f - alpha);
422 if (!foreground(i, j))
427 int min_dist = numeric_limits<int>::max();
430 PT prevVal = prevFrame(i, j);
431 CT curVal = curFrame(i, j);
433 // Find best Vi match:
434 for (int k = 0; k < N2cc; ++k)
436 float PV_CC = c_stat.PV_CC(i, j, k);
442 c_stat.PV_CC(i, j, k) = 0;
443 c_stat.PVB_CC(i, j, k) = 0;
447 c_stat.PV_CC(i, j, k) = PV_CC * (1.0f - alpha);
448 c_stat.PVB_CC(i, j, k) = c_stat.PVB_CC(i, j, k) * (1.0f - alpha);
450 OT v1 = c_stat.V1_CC<OT>(i, j, k);
452 int3 val1 = make_int3(
453 ::abs(v1.x - prevVal.x),
454 ::abs(v1.y - prevVal.y),
455 ::abs(v1.z - prevVal.z)
458 OT v2 = c_stat.V2_CC<OT>(i, j, k);
460 int3 val2 = make_int3(
461 ::abs(v2.x - curVal.x),
462 ::abs(v2.y - curVal.y),
463 ::abs(v2.z - curVal.z)
466 int dist = val1.x + val1.y + val1.z + val2.x + val2.y + val2.z;
468 if (dist < min_dist &&
469 val1.x <= deltaCC && val1.y <= deltaCC && val1.z <= deltaCC &&
470 val2.x <= deltaCC && val2.y <= deltaCC && val2.z <= deltaCC)
479 // Replace N2th elem in the table by new feature:
481 c_stat.PV_CC(i, j, indx) = alpha;
482 c_stat.PVB_CC(i, j, indx) = alpha;
485 c_stat.V1_CC<OT>(i, j, indx) = Output<OT>::make(prevVal.x, prevVal.y, prevVal.z);
486 c_stat.V2_CC<OT>(i, j, indx) = Output<OT>::make(curVal.x, curVal.y, curVal.z);
491 c_stat.PV_CC(i, j, indx) += alpha;
493 if (!foreground(i, j))
495 c_stat.PVB_CC(i, j, indx) += alpha;
499 //re-sort CCt table by Pv
500 const float PV_CC_indx = c_stat.PV_CC(i, j, indx);
501 const float PVB_CC_indx = c_stat.PVB_CC(i, j, indx);
502 const OT V1_CC_indx = c_stat.V1_CC<OT>(i, j, indx);
503 const OT V2_CC_indx = c_stat.V2_CC<OT>(i, j, indx);
504 for (int k = 0; k < indx; ++k)
506 if (c_stat.PV_CC(i, j, k) <= PV_CC_indx)
510 float Pv_tmp2 = PV_CC_indx;
513 float Pvb_tmp2 = PVB_CC_indx;
516 OT v1_tmp2 = V1_CC_indx;
519 OT v2_tmp2 = V2_CC_indx;
521 for (int l = k; l <= indx; ++l)
523 Pv_tmp1 = c_stat.PV_CC(i, j, l);
524 c_stat.PV_CC(i, j, l) = Pv_tmp2;
527 Pvb_tmp1 = c_stat.PVB_CC(i, j, l);
528 c_stat.PVB_CC(i, j, l) = Pvb_tmp2;
531 v1_tmp1 = c_stat.V1_CC<OT>(i, j, l);
532 c_stat.V1_CC<OT>(i, j, l) = v1_tmp2;
535 v2_tmp1 = c_stat.V2_CC<OT>(i, j, l);
536 c_stat.V2_CC<OT>(i, j, l) = v2_tmp2;
547 //check "once-off" changes
548 for (int k = 0; k < N1cc; ++k)
550 const float PV_CC = c_stat.PV_CC(i, j, k);
555 sum2 += c_stat.PVB_CC(i, j, k);
559 c_stat.is_trained_dyn_model(i, j) = 1;
561 float diff = sum1 - Pbcc * sum2;
563 // Update stat table:
566 //new BG features are discovered
567 for (int k = 0; k < N1cc; ++k)
569 const float PV_CC = c_stat.PV_CC(i, j, k);
573 c_stat.PVB_CC(i, j, k) = (PV_CC - Pbcc * c_stat.PVB_CC(i, j, k)) / (1.0f - Pbcc);
577 c_stat.Pbcc(i, j) = Pbcc;
580 // Handle "stationary" pixel:
583 const float alpha = c_stat.is_trained_st_model(i, j) ? alpha2 : alpha3;
585 float Pbc = c_stat.Pbc(i, j);
588 Pbc *= (1.0f - alpha);
589 if (!foreground(i, j))
594 int min_dist = numeric_limits<int>::max();
597 CT curVal = curFrame(i, j);
600 for (int k = 0; k < N2c; ++k)
602 float PV_C = c_stat.PV_C(i, j, k);
606 c_stat.PV_C(i, j, k) = 0;
607 c_stat.PVB_C(i, j, k) = 0;
611 // Exponential decay of memory
612 c_stat.PV_C(i, j, k) = PV_C * (1.0f - alpha);
613 c_stat.PVB_C(i, j, k) = c_stat.PVB_C(i, j, k) * (1.0f - alpha);
615 OT v = c_stat.V_C<OT>(i, j, k);
616 int3 val = make_int3(
617 ::abs(v.x - curVal.x),
618 ::abs(v.y - curVal.y),
619 ::abs(v.z - curVal.z)
622 int dist = val.x + val.y + val.z;
624 if (dist < min_dist && val.x <= deltaC && val.y <= deltaC && val.z <= deltaC)
633 //N2th elem in the table is replaced by a new features
636 c_stat.PV_C(i, j, indx) = alpha;
637 c_stat.PVB_C(i, j, indx) = alpha;
640 c_stat.V_C<OT>(i, j, indx) = Output<OT>::make(curVal.x, curVal.y, curVal.z);
645 c_stat.PV_C(i, j, indx) += alpha;
647 if (!foreground(i, j))
649 c_stat.PVB_C(i, j, indx) += alpha;
653 //re-sort Ct table by Pv
654 const float PV_C_indx = c_stat.PV_C(i, j, indx);
655 const float PVB_C_indx = c_stat.PVB_C(i, j, indx);
656 OT V_C_indx = c_stat.V_C<OT>(i, j, indx);
657 for (int k = 0; k < indx; ++k)
659 if (c_stat.PV_C(i, j, k) <= PV_C_indx)
663 float Pv_tmp2 = PV_C_indx;
666 float Pvb_tmp2 = PVB_C_indx;
669 OT v_tmp2 = V_C_indx;
671 for (int l = k; l <= indx; ++l)
673 Pv_tmp1 = c_stat.PV_C(i, j, l);
674 c_stat.PV_C(i, j, l) = Pv_tmp2;
677 Pvb_tmp1 = c_stat.PVB_C(i, j, l);
678 c_stat.PVB_C(i, j, l) = Pvb_tmp2;
681 v_tmp1 = c_stat.V_C<OT>(i, j, l);
682 c_stat.V_C<OT>(i, j, l) = v_tmp2;
690 // Check "once-off" changes:
693 for (int k = 0; k < N1c; ++k)
695 const float PV_C = c_stat.PV_C(i, j, k);
700 sum2 += c_stat.PVB_C(i, j, k);
704 c_stat.is_trained_st_model(i, j) = 1;
706 float diff = sum1 - Pbc * sum2;
708 // Update stat table:
711 //new BG features are discovered
712 for (int k = 0; k < N1c; ++k)
714 const float PV_C = c_stat.PV_C(i, j, k);
718 c_stat.PVB_C(i, j, k) = (PV_C - Pbc * c_stat.PVB_C(i, j, k)) / (1.0f - Pbc);
721 c_stat.Pbc(i, j) = 1.0f - Pbc;
725 c_stat.Pbc(i, j) = Pbc;
727 } // if !(change detection) at pixel (i,j)
729 // Update the reference BG image:
730 if (!foreground(i, j))
732 CT curVal = curFrame(i, j);
734 if (!Ftd(i, j) && !Fbd(i, j))
737 OT oldVal = background(i, j);
739 int3 newVal = make_int3(
740 __float2int_rn(oldVal.x * (1.0f - alpha1) + curVal.x * alpha1),
741 __float2int_rn(oldVal.y * (1.0f - alpha1) + curVal.y * alpha1),
742 __float2int_rn(oldVal.z * (1.0f - alpha1) + curVal.z * alpha1)
745 background(i, j) = Output<OT>::make(
746 static_cast<uchar>(newVal.x),
747 static_cast<uchar>(newVal.y),
748 static_cast<uchar>(newVal.z)
753 background(i, j) = Output<OT>::make(curVal.x, curVal.y, curVal.z);
758 template <typename PT, typename CT, typename OT>
759 struct UpdateBackgroundModel
761 static void call(PtrStepSz<PT> prevFrame, PtrStepSz<CT> curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSz<OT> background,
762 int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
766 dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
768 cudaSafeCall( cudaFuncSetCacheConfig(updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb>, cudaFuncCachePreferL1) );
770 updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb><<<grid, block, 0, stream>>>(
771 prevFrame.cols, prevFrame.rows,
773 Ftd, Fbd, foreground, background,
774 deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T);
775 cudaSafeCall( cudaGetLastError() );
778 cudaSafeCall( cudaDeviceSynchronize() );
782 template <typename PT, typename CT, typename OT>
783 void updateBackgroundModel_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background,
784 int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
787 UpdateBackgroundModel<PT, CT, OT>::call(PtrStepSz<PT>(prevFrame), PtrStepSz<CT>(curFrame), Ftd, Fbd, foreground, PtrStepSz<OT>(background),
788 deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T, stream);
791 template void updateBackgroundModel_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
792 template void updateBackgroundModel_gpu<uchar3, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
793 template void updateBackgroundModel_gpu<uchar3, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
794 template void updateBackgroundModel_gpu<uchar3, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
795 template void updateBackgroundModel_gpu<uchar4, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
796 template void updateBackgroundModel_gpu<uchar4, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
797 template void updateBackgroundModel_gpu<uchar4, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
798 template void updateBackgroundModel_gpu<uchar4, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
801 #endif /* CUDA_DISABLER */