#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/limits.hpp"
-namespace cv { namespace cuda { namespace device
-{
- namespace mog2
- {
- ///////////////////////////////////////////////////////////////
- // Utility
-
- __device__ __forceinline__ float cvt(uchar val)
- {
- return val;
- }
- __device__ __forceinline__ float3 cvt(const uchar3& val)
- {
- return make_float3(val.x, val.y, val.z);
- }
- __device__ __forceinline__ float4 cvt(const uchar4& val)
- {
- return make_float4(val.x, val.y, val.z, val.w);
- }
-
- __device__ __forceinline__ float sqr(float val)
- {
- return val * val;
- }
- __device__ __forceinline__ float sqr(const float3& val)
- {
- return val.x * val.x + val.y * val.y + val.z * val.z;
- }
- __device__ __forceinline__ float sqr(const float4& val)
- {
- return val.x * val.x + val.y * val.y + val.z * val.z;
- }
+#include "mog2.hpp"
- __device__ __forceinline__ float sum(float val)
- {
- return val;
- }
- __device__ __forceinline__ float sum(const float3& val)
- {
- return val.x + val.y + val.z;
- }
- __device__ __forceinline__ float sum(const float4& val)
- {
- return val.x + val.y + val.z;
- }
-
- template <class Ptr2D>
- __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
- {
- typename Ptr2D::elem_type val = ptr(k * rows + y, x);
- ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
- ptr((k + 1) * rows + y, x) = val;
- }
-
- ///////////////////////////////////////////////////////////////
- // MOG2
+namespace cv
+{
+namespace cuda
+{
+namespace device
+{
+namespace mog2
+{
+///////////////////////////////////////////////////////////////
+// Utility
- __constant__ int c_nmixtures;
- __constant__ float c_Tb;
- __constant__ float c_TB;
- __constant__ float c_Tg;
- __constant__ float c_varInit;
- __constant__ float c_varMin;
- __constant__ float c_varMax;
- __constant__ float c_tau;
- __constant__ unsigned char c_shadowVal;
+__device__ __forceinline__ float cvt(uchar val)
+{
+ return val;
+}
+__device__ __forceinline__ float3 cvt(const uchar3 &val)
+{
+ return make_float3(val.x, val.y, val.z);
+}
+__device__ __forceinline__ float4 cvt(const uchar4 &val)
+{
+ return make_float4(val.x, val.y, val.z, val.w);
+}
- void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
- {
- varMin = ::fminf(varMin, varMax);
- varMax = ::fmaxf(varMin, varMax);
-
- cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
- }
+__device__ __forceinline__ float sqr(float val)
+{
+ return val * val;
+}
+__device__ __forceinline__ float sqr(const float3 &val)
+{
+ return val.x * val.x + val.y * val.y + val.z * val.z;
+}
+__device__ __forceinline__ float sqr(const float4 &val)
+{
+ return val.x * val.x + val.y * val.y + val.z * val.z;
+}
- template <bool detectShadows, typename SrcT, typename WorkT>
- __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
- PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
- const float alphaT, const float alpha1, const float prune)
- {
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
+__device__ __forceinline__ float sum(float val)
+{
+ return val;
+}
+__device__ __forceinline__ float sum(const float3 &val)
+{
+ return val.x + val.y + val.z;
+}
+__device__ __forceinline__ float sum(const float4 &val)
+{
+ return val.x + val.y + val.z;
+}
- if (x >= frame.cols || y >= frame.rows)
- return;
+template <class Ptr2D>
+__device__ __forceinline__ void swap(Ptr2D &ptr, int x, int y, int k, int rows)
+{
+ typename Ptr2D::elem_type val = ptr(k * rows + y, x);
+ ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
+ ptr((k + 1) * rows + y, x) = val;
+}
+
+///////////////////////////////////////////////////////////////
+// MOG2
+
+template <bool detectShadows, typename SrcT, typename WorkT>
+__global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
+ PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
+ const float alphaT, const float alpha1, const float prune, const Constants *const constants)
+{
+ const int x = blockIdx.x * blockDim.x + threadIdx.x;
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;
- WorkT pix = cvt(frame(y, x));
+ if (x < frame.cols && y < frame.rows)
+ {
+ WorkT pix = cvt(frame(y, x));
- //calculate distances to the modes (+ sort)
- //here we need to go in descending order!!!
+ //calculate distances to the modes (+ sort)
+ //here we need to go in descending order!!!
- bool background = false; // true - the pixel classified as background
+ bool background = false; // true - the pixel classified as background
- //internal:
+ //internal:
- bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+ bool fitsPDF = false; //if it remains zero a new GMM mode will be added
- int nmodes = modesUsed(y, x);
- int nNewModes = nmodes; //current number of modes in GMM
+ int nmodes = modesUsed(y, x);
+ const int nNewModes = nmodes; //current number of modes in GMM
- float totalWeight = 0.0f;
+ float totalWeight = 0.0f;
- //go through all modes
+ //go through all modes
- for (int mode = 0; mode < nmodes; ++mode)
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+ //need only weight if fit is found
+ float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
+ int swap_count = 0;
+ //fit not found yet
+ if (!fitsPDF)
{
- //need only weight if fit is found
- float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
- int swap_count = 0;
- //fit not found yet
- if (!fitsPDF)
- {
- //check if it belongs to some of the remaining modes
- float var = gmm_variance(mode * frame.rows + y, x);
-
- WorkT mean = gmm_mean(mode * frame.rows + y, x);
+ //check if it belongs to some of the remaining modes
+ const float var = gmm_variance(mode * frame.rows + y, x);
- //calculate difference and distance
- WorkT diff = mean - pix;
- float dist2 = sqr(diff);
+ const WorkT mean = gmm_mean(mode * frame.rows + y, x);
- //background? - Tb - usually larger than Tg
- if (totalWeight < c_TB && dist2 < c_Tb * var)
- background = true;
+ //calculate difference and distance
+ const WorkT diff = mean - pix;
+ const float dist2 = sqr(diff);
- //check fit
- if (dist2 < c_Tg * var)
- {
- //belongs to the mode
- fitsPDF = true;
+ //background? - Tb - usually larger than Tg
+ if (totalWeight < constants->TB_ && dist2 < constants->Tb_ * var)
+ background = true;
- //update distribution
+ //check fit
+ if (dist2 < constants->Tg_ * var)
+ {
+ //belongs to the mode
+ fitsPDF = true;
- //update weight
- weight += alphaT;
- float k = alphaT / weight;
+ //update distribution
- //update mean
- gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
+ //update weight
+ weight += alphaT;
+ float k = alphaT / weight;
- //update variance
- float varnew = var + k * (dist2 - var);
+ //update mean
+ gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
- //limit the variance
- varnew = ::fmaxf(varnew, c_varMin);
- varnew = ::fminf(varnew, c_varMax);
+ //update variance
+ float varnew = var + k * (dist2 - var);
- gmm_variance(mode * frame.rows + y, x) = varnew;
+ //limit the variance
+ varnew = ::fmaxf(varnew, constants->varMin_);
+ varnew = ::fminf(varnew, constants->varMax_);
- //sort
- //all other weights are at the same place and
- //only the matched (iModes) is higher -> just find the new place for it
+ gmm_variance(mode * frame.rows + y, x) = varnew;
- for (int i = mode; i > 0; --i)
- {
- //check one up
- if (weight < gmm_weight((i - 1) * frame.rows + y, x))
- break;
+ //sort
+ //all other weights are at the same place and
+ //only the matched (iModes) is higher -> just find the new place for it
- swap_count++;
- //swap one up
- swap(gmm_weight, x, y, i - 1, frame.rows);
- swap(gmm_variance, x, y, i - 1, frame.rows);
- swap(gmm_mean, x, y, i - 1, frame.rows);
- }
+ for (int i = mode; i > 0; --i)
+ {
+ //check one up
+ if (weight < gmm_weight((i - 1) * frame.rows + y, x))
+ break;
- //belongs to the mode - bFitsPDF becomes 1
+ swap_count++;
+ //swap one up
+ swap(gmm_weight, x, y, i - 1, frame.rows);
+ swap(gmm_variance, x, y, i - 1, frame.rows);
+ swap(gmm_mean, x, y, i - 1, frame.rows);
}
- } // !fitsPDF
- //check prune
- if (weight < -prune)
- {
- weight = 0.0f;
- nmodes--;
+ //belongs to the mode - bFitsPDF becomes 1
}
+ } // !fitsPDF
- gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
- totalWeight += weight;
+ //check prune
+ if (weight < -prune)
+ {
+ weight = 0.0f;
+ nmodes--;
}
- //renormalize weights
+ gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
+ totalWeight += weight;
+ }
- totalWeight = 1.f / totalWeight;
- for (int mode = 0; mode < nmodes; ++mode)
- gmm_weight(mode * frame.rows + y, x) *= totalWeight;
+ //renormalize weights
- nmodes = nNewModes;
+ totalWeight = 1.f / totalWeight;
+ for (int mode = 0; mode < nmodes; ++mode)
+ gmm_weight(mode * frame.rows + y, x) *= totalWeight;
- //make new mode if needed and exit
+ nmodes = nNewModes;
- if (!fitsPDF)
- {
- // replace the weakest or add a new one
- int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
+ //make new mode if needed and exit
- if (nmodes == 1)
- gmm_weight(mode * frame.rows + y, x) = 1.f;
- else
- {
- gmm_weight(mode * frame.rows + y, x) = alphaT;
+ if (!fitsPDF)
+ {
+ // replace the weakest or add a new one
+ const int mode = nmodes == constants->nmixtures_ ? constants->nmixtures_ - 1 : nmodes++;
- // renormalize all other weights
+ if (nmodes == 1)
+ gmm_weight(mode * frame.rows + y, x) = 1.f;
+ else
+ {
+ gmm_weight(mode * frame.rows + y, x) = alphaT;
- for (int i = 0; i < nmodes - 1; ++i)
- gmm_weight(i * frame.rows + y, x) *= alpha1;
- }
+ // renormalize all other weights
- // init
+ for (int i = 0; i < nmodes - 1; ++i)
+ gmm_weight(i * frame.rows + y, x) *= alpha1;
+ }
- gmm_mean(mode * frame.rows + y, x) = pix;
- gmm_variance(mode * frame.rows + y, x) = c_varInit;
+ // init
- //sort
- //find the new place for it
+ gmm_mean(mode * frame.rows + y, x) = pix;
+ gmm_variance(mode * frame.rows + y, x) = constants->varInit_;
- for (int i = nmodes - 1; i > 0; --i)
- {
- // check one up
- if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
- break;
+ //sort
+ //find the new place for it
- //swap one up
- swap(gmm_weight, x, y, i - 1, frame.rows);
- swap(gmm_variance, x, y, i - 1, frame.rows);
- swap(gmm_mean, x, y, i - 1, frame.rows);
- }
+ for (int i = nmodes - 1; i > 0; --i)
+ {
+ // check one up
+ if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
+ break;
+
+ //swap one up
+ swap(gmm_weight, x, y, i - 1, frame.rows);
+ swap(gmm_variance, x, y, i - 1, frame.rows);
+ swap(gmm_mean, x, y, i - 1, frame.rows);
}
+ }
- //set the number of modes
- modesUsed(y, x) = nmodes;
+ //set the number of modes
+ modesUsed(y, x) = nmodes;
- bool isShadow = false;
- if (detectShadows && !background)
- {
- float tWeight = 0.0f;
+ bool isShadow = false;
+ if (detectShadows && !background)
+ {
+ float tWeight = 0.0f;
- // check all the components marked as background:
- for (int mode = 0; mode < nmodes; ++mode)
- {
- WorkT mean = gmm_mean(mode * frame.rows + y, x);
+ // check all the components marked as background:
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+ const WorkT mean = gmm_mean(mode * frame.rows + y, x);
- WorkT pix_mean = pix * mean;
+ const WorkT pix_mean = pix * mean;
- float numerator = sum(pix_mean);
- float denominator = sqr(mean);
+ const float numerator = sum(pix_mean);
+ const float denominator = sqr(mean);
- // no division by zero allowed
- if (denominator == 0)
- break;
-
- // if tau < a < 1 then also check the color distortion
- if (numerator <= denominator && numerator >= c_tau * denominator)
- {
- float a = numerator / denominator;
+ // no division by zero allowed
+ if (denominator == 0)
+ break;
- WorkT dD = a * mean - pix;
+ // if tau < a < 1 then also check the color distortion
+ else if (numerator <= denominator && numerator >= constants->tau_ * denominator)
+ {
+ const float a = numerator / denominator;
- if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
- {
- isShadow = true;
- break;
- }
- };
+ WorkT dD = a * mean - pix;
- tWeight += gmm_weight(mode * frame.rows + y, x);
- if (tWeight > c_TB)
+ if (sqr(dD) < constants->Tb_ * gmm_variance(mode * frame.rows + y, x) * a * a)
+ {
+ isShadow = true;
break;
- }
- }
+ }
+ };
- fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
+ tWeight += gmm_weight(mode * frame.rows + y, x);
+ if (tWeight > constants->TB_)
+ break;
+ }
}
- template <typename SrcT, typename WorkT>
- void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
- float alphaT, float prune, bool detectShadows, cudaStream_t stream)
- {
- dim3 block(32, 8);
- dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
-
- const float alpha1 = 1.0f - alphaT;
+ fgmask(y, x) = background ? 0 : isShadow ? constants->shadowVal_ : 255;
+ }
+}
- if (detectShadows)
- {
- cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
+template <typename SrcT, typename WorkT>
+void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
+ float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream)
+{
+ dim3 block(32, 8);
+ dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
- mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
- weight, variance, (PtrStepSz<WorkT>) mean,
- alphaT, alpha1, prune);
- }
- else
- {
- cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
+ const float alpha1 = 1.0f - alphaT;
- mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
- weight, variance, (PtrStepSz<WorkT>) mean,
- alphaT, alpha1, prune);
- }
+ if (detectShadows)
+ {
+ cudaSafeCall(cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1));
- cudaSafeCall( cudaGetLastError() );
+ mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>)frame, fgmask, modesUsed,
+ weight, variance, (PtrStepSz<WorkT>)mean,
+ alphaT, alpha1, prune, constants);
+ }
+ else
+ {
+ cudaSafeCall(cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1));
- if (stream == 0)
- cudaSafeCall( cudaDeviceSynchronize() );
- }
+ mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>)frame, fgmask, modesUsed,
+ weight, variance, (PtrStepSz<WorkT>)mean,
+ alphaT, alpha1, prune, constants);
+ }
- void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
- float alphaT, float prune, bool detectShadows, cudaStream_t stream)
- {
- typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
+ cudaSafeCall(cudaGetLastError());
- static const func_t funcs[] =
- {
- 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
- };
+ if (stream == 0)
+ cudaSafeCall(cudaDeviceSynchronize());
+}
- funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
- }
+void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
+ float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream)
+{
+ typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream);
- template <typename WorkT, typename OutT>
- __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
+ static const func_t funcs[] =
{
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
+ 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>};
- if (x >= modesUsed.cols || y >= modesUsed.rows)
- return;
+ funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, constants, stream);
+}
- int nmodes = modesUsed(y, x);
+template <typename WorkT, typename OutT>
+__global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst, const Constants *const constants)
+{
+ const int x = blockIdx.x * blockDim.x + threadIdx.x;
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;
- WorkT meanVal = VecTraits<WorkT>::all(0.0f);
- float totalWeight = 0.0f;
+ if (x >= modesUsed.cols || y >= modesUsed.rows)
+ return;
- for (int mode = 0; mode < nmodes; ++mode)
- {
- float weight = gmm_weight(mode * modesUsed.rows + y, x);
+ int nmodes = modesUsed(y, x);
- WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
- meanVal = meanVal + weight * mean;
+ WorkT meanVal = VecTraits<WorkT>::all(0.0f);
+ float totalWeight = 0.0f;
- totalWeight += weight;
+ for (int mode = 0; mode < nmodes; ++mode)
+ {
+ float weight = gmm_weight(mode * modesUsed.rows + y, x);
- if(totalWeight > c_TB)
- break;
- }
+ WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
+ meanVal = meanVal + weight * mean;
- meanVal = meanVal * (1.f / totalWeight);
+ totalWeight += weight;
- dst(y, x) = saturate_cast<OutT>(meanVal);
- }
+ if (totalWeight > constants->TB_)
+ break;
+ }
- template <typename WorkT, typename OutT>
- void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
- {
- dim3 block(32, 8);
- dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
+ meanVal = meanVal * (1.f / totalWeight);
- cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
+ dst(y, x) = saturate_cast<OutT>(meanVal);
+}
- getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
- cudaSafeCall( cudaGetLastError() );
+template <typename WorkT, typename OutT>
+void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream)
+{
+ dim3 block(32, 8);
+ dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
- if (stream == 0)
- cudaSafeCall( cudaDeviceSynchronize() );
- }
+ cudaSafeCall(cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1));
- void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
- {
- typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
+ getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>)mean, (PtrStepSz<OutT>)dst, constants);
+ cudaSafeCall(cudaGetLastError());
- static const func_t funcs[] =
- {
- 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
- };
+ if (stream == 0)
+ cudaSafeCall(cudaDeviceSynchronize());
+}
- funcs[cn](modesUsed, weight, mean, dst, stream);
- }
- }
-}}}
+void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream)
+{
+ typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream);
+ static const func_t funcs[] =
+ {
+ 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>};
+
+ funcs[cn](modesUsed, weight, mean, dst, constants, stream);
+}
+} // namespace mog2
+} // namespace device
+} // namespace cuda
+} // namespace cv
#endif /* CUDA_DISABLER */
//M*/
#include "precomp.hpp"
+#include "cuda/mog2.hpp"
using namespace cv;
using namespace cv::cuda;
+using namespace cv::cuda::device::mog2;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) { throw_no_cuda(); return Ptr<cuda::BackgroundSubtractorMOG2>(); }
+Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int, double, bool)
+{
+ throw_no_cuda();
+ return Ptr<cuda::BackgroundSubtractorMOG2>();
+}
#else
-namespace cv { namespace cuda { namespace device
+namespace
{
- namespace mog2
- {
- void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal);
- void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
- void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
- }
-}}}
+// default parameters of gaussian background detection algorithm
+const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
+const float defaultVarThreshold = 4.0f * 4.0f;
+const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
+const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
+const float defaultVarThresholdGen = 3.0f * 3.0f;
+const float defaultVarInit = 15.0f; // initial variance for new components
+const float defaultVarMax = 5.0f * defaultVarInit;
+const float defaultVarMin = 4.0f;
+
+// additional parameters
+const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
+const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation
+
+class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2
+{
+public:
+ MOG2Impl(int history, double varThreshold, bool detectShadows);
+ ~MOG2Impl();
-namespace
+ void apply(InputArray image, OutputArray fgmask, double learningRate = -1) CV_OVERRIDE;
+ void apply(InputArray image, OutputArray fgmask, double learningRate, Stream &stream) CV_OVERRIDE;
+
+ void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE;
+ void getBackgroundImage(OutputArray backgroundImage, Stream &stream) const CV_OVERRIDE;
+
+ int getHistory() const CV_OVERRIDE { return history_; }
+ void setHistory(int history) CV_OVERRIDE { history_ = history; }
+
+ int getNMixtures() const CV_OVERRIDE { return constantsHost_.nmixtures_; }
+ void setNMixtures(int nmixtures) CV_OVERRIDE { constantsHost_.nmixtures_ = nmixtures; }
+
+ double getBackgroundRatio() const CV_OVERRIDE { return constantsHost_.TB_; }
+ void setBackgroundRatio(double ratio) CV_OVERRIDE { constantsHost_.TB_ = (float)ratio; }
+
+ double getVarThreshold() const CV_OVERRIDE { return constantsHost_.Tb_; }
+ void setVarThreshold(double varThreshold) CV_OVERRIDE { constantsHost_.Tb_ = (float)varThreshold; }
+
+ double getVarThresholdGen() const CV_OVERRIDE { return constantsHost_.Tg_; }
+ void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { constantsHost_.Tg_ = (float)varThresholdGen; }
+
+ double getVarInit() const CV_OVERRIDE { return constantsHost_.varInit_; }
+ void setVarInit(double varInit) CV_OVERRIDE { constantsHost_.varInit_ = (float)varInit; }
+
+ double getVarMin() const CV_OVERRIDE { return constantsHost_.varMin_; }
+ void setVarMin(double varMin) CV_OVERRIDE { constantsHost_.varMin_ = ::fminf((float)varMin, constantsHost_.varMax_); }
+
+ double getVarMax() const CV_OVERRIDE { return constantsHost_.varMax_; }
+ void setVarMax(double varMax) CV_OVERRIDE { constantsHost_.varMax_ = ::fmaxf(constantsHost_.varMin_, (float)varMax); }
+
+ double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; }
+ void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float)ct; }
+
+ bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; }
+ void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; }
+
+ int getShadowValue() const CV_OVERRIDE { return constantsHost_.shadowVal_; }
+ void setShadowValue(int value) CV_OVERRIDE { constantsHost_.shadowVal_ = (uchar)value; }
+
+ double getShadowThreshold() const CV_OVERRIDE { return constantsHost_.tau_; }
+ void setShadowThreshold(double threshold) CV_OVERRIDE { constantsHost_.tau_ = (float)threshold; }
+
+private:
+ void initialize(Size frameSize, int frameType, Stream &stream);
+
+ Constants constantsHost_;
+ Constants *constantsDevice_;
+
+ int history_;
+ float ct_;
+ bool detectShadows_;
+
+ Size frameSize_;
+ int frameType_;
+ int nframes_;
+
+ GpuMat weight_;
+ GpuMat variance_;
+ GpuMat mean_;
+
+ //keep track of number of modes per pixel
+ GpuMat bgmodelUsedModes_;
+};
+
+MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) : frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+ history_ = history > 0 ? history : defaultHistory;
+ detectShadows_ = detectShadows;
+ ct_ = defaultCT;
+
+ setNMixtures(defaultNMixtures);
+ setBackgroundRatio(defaultBackgroundRatio);
+ setVarInit(defaultVarInit);
+ setVarMin(defaultVarMin);
+ setVarMax(defaultVarMax);
+ setVarThreshold(varThreshold > 0 ? (float)varThreshold : defaultVarThreshold);
+ setVarThresholdGen(defaultVarThresholdGen);
+
+ setShadowValue(defaultShadowValue);
+ setShadowThreshold(defaultShadowThreshold);
+
+ cudaSafeCall(cudaMalloc((void **)&constantsDevice_, sizeof(Constants)));
+}
+
+MOG2Impl::~MOG2Impl()
+{
+ cudaFree(constantsDevice_);
+}
+
+void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate)
+{
+ apply(image, fgmask, learningRate, Stream::Null());
+}
+
+void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream &stream)
{
- // default parameters of gaussian background detection algorithm
- const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
- const float defaultVarThreshold = 4.0f * 4.0f;
- const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
- const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
- const float defaultVarThresholdGen = 3.0f * 3.0f;
- const float defaultVarInit = 15.0f; // initial variance for new components
- const float defaultVarMax = 5.0f * defaultVarInit;
- const float defaultVarMin = 4.0f;
-
- // additional parameters
- const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
- const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
- const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation
-
- class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2
- {
- public:
- MOG2Impl(int history, double varThreshold, bool detectShadows);
-
- void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE;
- void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) CV_OVERRIDE;
-
- void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE;
- void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const CV_OVERRIDE;
-
- int getHistory() const CV_OVERRIDE { return history_; }
- void setHistory(int history) CV_OVERRIDE { history_ = history; }
-
- int getNMixtures() const CV_OVERRIDE { return nmixtures_; }
- void setNMixtures(int nmixtures) CV_OVERRIDE { nmixtures_ = nmixtures; }
-
- double getBackgroundRatio() const CV_OVERRIDE { return backgroundRatio_; }
- void setBackgroundRatio(double ratio) CV_OVERRIDE { backgroundRatio_ = (float) ratio; }
-
- double getVarThreshold() const CV_OVERRIDE { return varThreshold_; }
- void setVarThreshold(double varThreshold) CV_OVERRIDE { varThreshold_ = (float) varThreshold; }
-
- double getVarThresholdGen() const CV_OVERRIDE { return varThresholdGen_; }
- void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { varThresholdGen_ = (float) varThresholdGen; }
-
- double getVarInit() const CV_OVERRIDE { return varInit_; }
- void setVarInit(double varInit) CV_OVERRIDE { varInit_ = (float) varInit; }
-
- double getVarMin() const CV_OVERRIDE { return varMin_; }
- void setVarMin(double varMin) CV_OVERRIDE { varMin_ = (float) varMin; }
-
- double getVarMax() const CV_OVERRIDE { return varMax_; }
- void setVarMax(double varMax) CV_OVERRIDE { varMax_ = (float) varMax; }
-
- double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; }
- void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float) ct; }
-
- bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; }
- void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; }
-
- int getShadowValue() const CV_OVERRIDE { return shadowValue_; }
- void setShadowValue(int value) CV_OVERRIDE { shadowValue_ = (uchar) value; }
+ using namespace cv::cuda::device::mog2;
+
+ GpuMat frame = _frame.getGpuMat();
- double getShadowThreshold() const CV_OVERRIDE { return shadowThreshold_; }
- void setShadowThreshold(double threshold) CV_OVERRIDE { shadowThreshold_ = (float) threshold; }
-
- private:
- void initialize(Size frameSize, int frameType);
-
- int history_;
- int nmixtures_;
- float backgroundRatio_;
- float varThreshold_;
- float varThresholdGen_;
- float varInit_;
- float varMin_;
- float varMax_;
- float ct_;
- bool detectShadows_;
- uchar shadowValue_;
- float shadowThreshold_;
-
- Size frameSize_;
- int frameType_;
- int nframes_;
-
- GpuMat weight_;
- GpuMat variance_;
- GpuMat mean_;
-
- //keep track of number of modes per pixel
- GpuMat bgmodelUsedModes_;
- };
-
- MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) :
- frameSize_(0, 0), frameType_(0), nframes_(0)
- {
- history_ = history > 0 ? history : defaultHistory;
- varThreshold_ = varThreshold > 0 ? (float) varThreshold : defaultVarThreshold;
- detectShadows_ = detectShadows;
-
- nmixtures_ = defaultNMixtures;
- backgroundRatio_ = defaultBackgroundRatio;
- varInit_ = defaultVarInit;
- varMax_ = defaultVarMax;
- varMin_ = defaultVarMin;
- varThresholdGen_ = defaultVarThresholdGen;
- ct_ = defaultCT;
- shadowValue_ = defaultShadowValue;
- shadowThreshold_ = defaultShadowThreshold;
- }
+ int ch = frame.channels();
+ int work_ch = ch;
- void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate)
- {
- apply(image, fgmask, learningRate, Stream::Null());
- }
+ if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels())
+ initialize(frame.size(), frame.type(), stream);
- void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream)
- {
- using namespace cv::cuda::device::mog2;
+ _fgmask.create(frameSize_, CV_8UC1);
+ GpuMat fgmask = _fgmask.getGpuMat();
- GpuMat frame = _frame.getGpuMat();
+ fgmask.setTo(Scalar::all(0), stream);
- int ch = frame.channels();
- int work_ch = ch;
+ ++nframes_;
+ learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_);
+ CV_Assert(learningRate >= 0);
- if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels())
- initialize(frame.size(), frame.type());
+ mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_,
+ (float)learningRate, static_cast<float>(-learningRate * ct_), detectShadows_, constantsDevice_, StreamAccessor::getStream(stream));
+}
+
+void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const
+{
+ getBackgroundImage(backgroundImage, Stream::Null());
+}
+
+void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream &stream) const
+{
+ using namespace cv::cuda::device::mog2;
+
+ _backgroundImage.create(frameSize_, frameType_);
+ GpuMat backgroundImage = _backgroundImage.getGpuMat();
+
+ getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, constantsDevice_, StreamAccessor::getStream(stream));
+}
+
+void MOG2Impl::initialize(cv::Size frameSize, int frameType, Stream &stream)
+{
+ using namespace cv::cuda::device::mog2;
- _fgmask.create(frameSize_, CV_8UC1);
- GpuMat fgmask = _fgmask.getGpuMat();
+ CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
- fgmask.setTo(Scalar::all(0), stream);
+ frameSize_ = frameSize;
+ frameType_ = frameType;
+ nframes_ = 0;
- ++nframes_;
- learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_);
- CV_Assert( learningRate >= 0 );
+ const int ch = CV_MAT_CN(frameType);
+ const int work_ch = ch;
- mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_,
- (float) learningRate, static_cast<float>(-learningRate * ct_), detectShadows_, StreamAccessor::getStream(stream));
- }
+ // for each gaussian mixture of each pixel bg model we store ...
+ // the mixture weight (w),
+ // the mean (nchannels values) and
+ // the covariance
+ weight_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1);
+ variance_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1);
+ mean_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC(work_ch));
- void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const
- {
- getBackgroundImage(backgroundImage, Stream::Null());
- }
-
- void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const
- {
- using namespace cv::cuda::device::mog2;
-
- _backgroundImage.create(frameSize_, frameType_);
- GpuMat backgroundImage = _backgroundImage.getGpuMat();
-
- getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream));
- }
-
- void MOG2Impl::initialize(cv::Size frameSize, int frameType)
- {
- using namespace cv::cuda::device::mog2;
-
- CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 );
-
- frameSize_ = frameSize;
- frameType_ = frameType;
- nframes_ = 0;
-
- int ch = CV_MAT_CN(frameType);
- int work_ch = ch;
-
- // for each gaussian mixture of each pixel bg model we store ...
- // the mixture weight (w),
- // the mean (nchannels values) and
- // the covariance
- weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
- variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
- mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
-
- //make the array for keeping track of the used modes per pixel - all zeros at start
- bgmodelUsedModes_.create(frameSize_, CV_8UC1);
- bgmodelUsedModes_.setTo(Scalar::all(0));
+ //make the array for keeping track of the used modes per pixel - all zeros at start
+ bgmodelUsedModes_.create(frameSize_, CV_8UC1);
+ bgmodelUsedModes_.setTo(Scalar::all(0));
- loadConstants(nmixtures_, varThreshold_, backgroundRatio_, varThresholdGen_, varInit_, varMin_, varMax_, shadowThreshold_, shadowValue_);
- }
+ cudaSafeCall(cudaMemcpyAsync(constantsDevice_, &constantsHost_, sizeof(Constants), cudaMemcpyHostToDevice, StreamAccessor::getStream(stream)));
}
+} // namespace
Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int history, double varThreshold, bool detectShadows)
{