From 5bf73457431b7d2cb87ac8c107865388dbf66642 Mon Sep 17 00:00:00 2001 From: jeffeDurand Date: Thu, 19 Dec 2019 05:02:48 -0500 Subject: [PATCH] Merge pull request #16090 from jeffeDurand:cuda_mog2_issue_5296 * cuda_mog2_issue_5296 --- modules/cudabgsegm/src/cuda/mog2.cu | 581 +++++++++++++++++------------------ modules/cudabgsegm/src/cuda/mog2.hpp | 37 +++ modules/cudabgsegm/src/mog2.cpp | 354 +++++++++++---------- 3 files changed, 493 insertions(+), 479 deletions(-) create mode 100644 modules/cudabgsegm/src/cuda/mog2.hpp diff --git a/modules/cudabgsegm/src/cuda/mog2.cu b/modules/cudabgsegm/src/cuda/mog2.cu index 789afa4..46891c6 100644 --- a/modules/cudabgsegm/src/cuda/mog2.cu +++ b/modules/cudabgsegm/src/cuda/mog2.cu @@ -47,393 +47,372 @@ #include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/limits.hpp" -namespace cv { namespace cuda { namespace device -{ - namespace mog2 - { - /////////////////////////////////////////////////////////////// - // Utility - - __device__ __forceinline__ float cvt(uchar val) - { - return val; - } - __device__ __forceinline__ float3 cvt(const uchar3& val) - { - return make_float3(val.x, val.y, val.z); - } - __device__ __forceinline__ float4 cvt(const uchar4& val) - { - return make_float4(val.x, val.y, val.z, val.w); - } - - __device__ __forceinline__ float sqr(float val) - { - return val * val; - } - __device__ __forceinline__ float sqr(const float3& val) - { - return val.x * val.x + val.y * val.y + val.z * val.z; - } - __device__ __forceinline__ float sqr(const float4& val) - { - return val.x * val.x + val.y * val.y + val.z * val.z; - } +#include "mog2.hpp" - __device__ __forceinline__ float sum(float val) - { - return val; - } - __device__ __forceinline__ float sum(const float3& val) - { - return val.x + val.y + val.z; - } - __device__ __forceinline__ float sum(const float4& val) - { - return val.x + val.y + val.z; - } - - template - __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows) - { - typename Ptr2D::elem_type val = ptr(k * rows + y, x); - ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); - ptr((k + 1) * rows + y, x) = val; - } - - /////////////////////////////////////////////////////////////// - // MOG2 +namespace cv +{ +namespace cuda +{ +namespace device +{ +namespace mog2 +{ +/////////////////////////////////////////////////////////////// +// Utility - __constant__ int c_nmixtures; - __constant__ float c_Tb; - __constant__ float c_TB; - __constant__ float c_Tg; - __constant__ float c_varInit; - __constant__ float c_varMin; - __constant__ float c_varMax; - __constant__ float c_tau; - __constant__ unsigned char c_shadowVal; +__device__ __forceinline__ float cvt(uchar val) +{ + return val; +} +__device__ __forceinline__ float3 cvt(const uchar3 &val) +{ + return make_float3(val.x, val.y, val.z); +} +__device__ __forceinline__ float4 cvt(const uchar4 &val) +{ + return make_float4(val.x, val.y, val.z, val.w); +} - void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) - { - varMin = ::fminf(varMin, varMax); - varMax = ::fmaxf(varMin, varMax); - - cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); - } +__device__ __forceinline__ float sqr(float val) +{ + return val * val; +} +__device__ __forceinline__ float sqr(const float3 &val) +{ + return val.x * val.x + val.y * val.y + val.z * val.z; +} +__device__ __forceinline__ float sqr(const float4 &val) +{ + return val.x * val.x + val.y * val.y + val.z * val.z; +} - template - __global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, - PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, - const float alphaT, const float alpha1, const float prune) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; +__device__ __forceinline__ float sum(float val) +{ + return val; +} +__device__ __forceinline__ float sum(const float3 &val) +{ + return val.x + val.y + val.z; +} +__device__ __forceinline__ float sum(const float4 &val) +{ + return val.x + val.y + val.z; +} - if (x >= frame.cols || y >= frame.rows) - return; +template +__device__ __forceinline__ void swap(Ptr2D &ptr, int x, int y, int k, int rows) +{ + typename Ptr2D::elem_type val = ptr(k * rows + y, x); + ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); + ptr((k + 1) * rows + y, x) = val; +} + +/////////////////////////////////////////////////////////////// +// MOG2 + +template +__global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, + PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, + const float alphaT, const float alpha1, const float prune, const Constants *const constants) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; - WorkT pix = cvt(frame(y, x)); + if (x < frame.cols && y < frame.rows) + { + WorkT pix = cvt(frame(y, x)); - //calculate distances to the modes (+ sort) - //here we need to go in descending order!!! + //calculate distances to the modes (+ sort) + //here we need to go in descending order!!! - bool background = false; // true - the pixel classified as background + bool background = false; // true - the pixel classified as background - //internal: + //internal: - bool fitsPDF = false; //if it remains zero a new GMM mode will be added + bool fitsPDF = false; //if it remains zero a new GMM mode will be added - int nmodes = modesUsed(y, x); - int nNewModes = nmodes; //current number of modes in GMM + int nmodes = modesUsed(y, x); + const int nNewModes = nmodes; //current number of modes in GMM - float totalWeight = 0.0f; + float totalWeight = 0.0f; - //go through all modes + //go through all modes - for (int mode = 0; mode < nmodes; ++mode) + for (int mode = 0; mode < nmodes; ++mode) + { + //need only weight if fit is found + float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; + int swap_count = 0; + //fit not found yet + if (!fitsPDF) { - //need only weight if fit is found - float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; - int swap_count = 0; - //fit not found yet - if (!fitsPDF) - { - //check if it belongs to some of the remaining modes - float var = gmm_variance(mode * frame.rows + y, x); - - WorkT mean = gmm_mean(mode * frame.rows + y, x); + //check if it belongs to some of the remaining modes + const float var = gmm_variance(mode * frame.rows + y, x); - //calculate difference and distance - WorkT diff = mean - pix; - float dist2 = sqr(diff); + const WorkT mean = gmm_mean(mode * frame.rows + y, x); - //background? - Tb - usually larger than Tg - if (totalWeight < c_TB && dist2 < c_Tb * var) - background = true; + //calculate difference and distance + const WorkT diff = mean - pix; + const float dist2 = sqr(diff); - //check fit - if (dist2 < c_Tg * var) - { - //belongs to the mode - fitsPDF = true; + //background? - Tb - usually larger than Tg + if (totalWeight < constants->TB_ && dist2 < constants->Tb_ * var) + background = true; - //update distribution + //check fit + if (dist2 < constants->Tg_ * var) + { + //belongs to the mode + fitsPDF = true; - //update weight - weight += alphaT; - float k = alphaT / weight; + //update distribution - //update mean - gmm_mean(mode * frame.rows + y, x) = mean - k * diff; + //update weight + weight += alphaT; + float k = alphaT / weight; - //update variance - float varnew = var + k * (dist2 - var); + //update mean + gmm_mean(mode * frame.rows + y, x) = mean - k * diff; - //limit the variance - varnew = ::fmaxf(varnew, c_varMin); - varnew = ::fminf(varnew, c_varMax); + //update variance + float varnew = var + k * (dist2 - var); - gmm_variance(mode * frame.rows + y, x) = varnew; + //limit the variance + varnew = ::fmaxf(varnew, constants->varMin_); + varnew = ::fminf(varnew, constants->varMax_); - //sort - //all other weights are at the same place and - //only the matched (iModes) is higher -> just find the new place for it + gmm_variance(mode * frame.rows + y, x) = varnew; - for (int i = mode; i > 0; --i) - { - //check one up - if (weight < gmm_weight((i - 1) * frame.rows + y, x)) - break; + //sort + //all other weights are at the same place and + //only the matched (iModes) is higher -> just find the new place for it - swap_count++; - //swap one up - swap(gmm_weight, x, y, i - 1, frame.rows); - swap(gmm_variance, x, y, i - 1, frame.rows); - swap(gmm_mean, x, y, i - 1, frame.rows); - } + for (int i = mode; i > 0; --i) + { + //check one up + if (weight < gmm_weight((i - 1) * frame.rows + y, x)) + break; - //belongs to the mode - bFitsPDF becomes 1 + swap_count++; + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); } - } // !fitsPDF - //check prune - if (weight < -prune) - { - weight = 0.0f; - nmodes--; + //belongs to the mode - bFitsPDF becomes 1 } + } // !fitsPDF - gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value - totalWeight += weight; + //check prune + if (weight < -prune) + { + weight = 0.0f; + nmodes--; } - //renormalize weights + gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value + totalWeight += weight; + } - totalWeight = 1.f / totalWeight; - for (int mode = 0; mode < nmodes; ++mode) - gmm_weight(mode * frame.rows + y, x) *= totalWeight; + //renormalize weights - nmodes = nNewModes; + totalWeight = 1.f / totalWeight; + for (int mode = 0; mode < nmodes; ++mode) + gmm_weight(mode * frame.rows + y, x) *= totalWeight; - //make new mode if needed and exit + nmodes = nNewModes; - if (!fitsPDF) - { - // replace the weakest or add a new one - int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++; + //make new mode if needed and exit - if (nmodes == 1) - gmm_weight(mode * frame.rows + y, x) = 1.f; - else - { - gmm_weight(mode * frame.rows + y, x) = alphaT; + if (!fitsPDF) + { + // replace the weakest or add a new one + const int mode = nmodes == constants->nmixtures_ ? constants->nmixtures_ - 1 : nmodes++; - // renormalize all other weights + if (nmodes == 1) + gmm_weight(mode * frame.rows + y, x) = 1.f; + else + { + gmm_weight(mode * frame.rows + y, x) = alphaT; - for (int i = 0; i < nmodes - 1; ++i) - gmm_weight(i * frame.rows + y, x) *= alpha1; - } + // renormalize all other weights - // init + for (int i = 0; i < nmodes - 1; ++i) + gmm_weight(i * frame.rows + y, x) *= alpha1; + } - gmm_mean(mode * frame.rows + y, x) = pix; - gmm_variance(mode * frame.rows + y, x) = c_varInit; + // init - //sort - //find the new place for it + gmm_mean(mode * frame.rows + y, x) = pix; + gmm_variance(mode * frame.rows + y, x) = constants->varInit_; - for (int i = nmodes - 1; i > 0; --i) - { - // check one up - if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) - break; + //sort + //find the new place for it - //swap one up - swap(gmm_weight, x, y, i - 1, frame.rows); - swap(gmm_variance, x, y, i - 1, frame.rows); - swap(gmm_mean, x, y, i - 1, frame.rows); - } + for (int i = nmodes - 1; i > 0; --i) + { + // check one up + if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) + break; + + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); } + } - //set the number of modes - modesUsed(y, x) = nmodes; + //set the number of modes + modesUsed(y, x) = nmodes; - bool isShadow = false; - if (detectShadows && !background) - { - float tWeight = 0.0f; + bool isShadow = false; + if (detectShadows && !background) + { + float tWeight = 0.0f; - // check all the components marked as background: - for (int mode = 0; mode < nmodes; ++mode) - { - WorkT mean = gmm_mean(mode * frame.rows + y, x); + // check all the components marked as background: + for (int mode = 0; mode < nmodes; ++mode) + { + const WorkT mean = gmm_mean(mode * frame.rows + y, x); - WorkT pix_mean = pix * mean; + const WorkT pix_mean = pix * mean; - float numerator = sum(pix_mean); - float denominator = sqr(mean); + const float numerator = sum(pix_mean); + const float denominator = sqr(mean); - // no division by zero allowed - if (denominator == 0) - break; - - // if tau < a < 1 then also check the color distortion - if (numerator <= denominator && numerator >= c_tau * denominator) - { - float a = numerator / denominator; + // no division by zero allowed + if (denominator == 0) + break; - WorkT dD = a * mean - pix; + // if tau < a < 1 then also check the color distortion + else if (numerator <= denominator && numerator >= constants->tau_ * denominator) + { + const float a = numerator / denominator; - if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a) - { - isShadow = true; - break; - } - }; + WorkT dD = a * mean - pix; - tWeight += gmm_weight(mode * frame.rows + y, x); - if (tWeight > c_TB) + if (sqr(dD) < constants->Tb_ * gmm_variance(mode * frame.rows + y, x) * a * a) + { + isShadow = true; break; - } - } + } + }; - fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255; + tWeight += gmm_weight(mode * frame.rows + y, x); + if (tWeight > constants->TB_) + break; + } } - template - void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, - float alphaT, float prune, bool detectShadows, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - - const float alpha1 = 1.0f - alphaT; + fgmask(y, x) = background ? 0 : isShadow ? constants->shadowVal_ : 255; + } +} - if (detectShadows) - { - cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); +template +void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream) +{ + dim3 block(32, 8); + dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, - weight, variance, (PtrStepSz) mean, - alphaT, alpha1, prune); - } - else - { - cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); + const float alpha1 = 1.0f - alphaT; - mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, - weight, variance, (PtrStepSz) mean, - alphaT, alpha1, prune); - } + if (detectShadows) + { + cudaSafeCall(cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1)); - cudaSafeCall( cudaGetLastError() ); + mog2<<>>((PtrStepSz)frame, fgmask, modesUsed, + weight, variance, (PtrStepSz)mean, + alphaT, alpha1, prune, constants); + } + else + { + cudaSafeCall(cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1)); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + mog2<<>>((PtrStepSz)frame, fgmask, modesUsed, + weight, variance, (PtrStepSz)mean, + alphaT, alpha1, prune, constants); + } - void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, - float alphaT, float prune, bool detectShadows, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); + cudaSafeCall(cudaGetLastError()); - static const func_t funcs[] = - { - 0, mog2_caller, 0, mog2_caller, mog2_caller - }; + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); +} - funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream); - } +void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream) +{ + typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream); - template - __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst) + static const func_t funcs[] = { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; + 0, mog2_caller, 0, mog2_caller, mog2_caller}; - if (x >= modesUsed.cols || y >= modesUsed.rows) - return; + funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, constants, stream); +} - int nmodes = modesUsed(y, x); +template +__global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst, const Constants *const constants) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; - WorkT meanVal = VecTraits::all(0.0f); - float totalWeight = 0.0f; + if (x >= modesUsed.cols || y >= modesUsed.rows) + return; - for (int mode = 0; mode < nmodes; ++mode) - { - float weight = gmm_weight(mode * modesUsed.rows + y, x); + int nmodes = modesUsed(y, x); - WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); - meanVal = meanVal + weight * mean; + WorkT meanVal = VecTraits::all(0.0f); + float totalWeight = 0.0f; - totalWeight += weight; + for (int mode = 0; mode < nmodes; ++mode) + { + float weight = gmm_weight(mode * modesUsed.rows + y, x); - if(totalWeight > c_TB) - break; - } + WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); + meanVal = meanVal + weight * mean; - meanVal = meanVal * (1.f / totalWeight); + totalWeight += weight; - dst(y, x) = saturate_cast(meanVal); - } + if (totalWeight > constants->TB_) + break; + } - template - void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); + meanVal = meanVal * (1.f / totalWeight); - cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1) ); + dst(y, x) = saturate_cast(meanVal); +} - getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz) mean, (PtrStepSz) dst); - cudaSafeCall( cudaGetLastError() ); +template +void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream) +{ + dim3 block(32, 8); + dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + cudaSafeCall(cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1)); - void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); + getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz)mean, (PtrStepSz)dst, constants); + cudaSafeCall(cudaGetLastError()); - static const func_t funcs[] = - { - 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller - }; + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); +} - funcs[cn](modesUsed, weight, mean, dst, stream); - } - } -}}} +void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream) +{ + typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream); + static const func_t funcs[] = + { + 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller}; + + funcs[cn](modesUsed, weight, mean, dst, constants, stream); +} +} // namespace mog2 +} // namespace device +} // namespace cuda +} // namespace cv #endif /* CUDA_DISABLER */ diff --git a/modules/cudabgsegm/src/cuda/mog2.hpp b/modules/cudabgsegm/src/cuda/mog2.hpp new file mode 100644 index 0000000..5b21551 --- /dev/null +++ b/modules/cudabgsegm/src/cuda/mog2.hpp @@ -0,0 +1,37 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CUDA_MOG2_H +#define OPENCV_CUDA_MOG2_H + +#include "opencv2/core/cuda.hpp" + +struct CUstream_st; +typedef struct CUstream_st *cudaStream_t; + +namespace cv { namespace cuda { + +class Stream; + +namespace device { namespace mog2 { + +typedef struct +{ + float Tb_; + float TB_; + float Tg_; + float varInit_; + float varMin_; + float varMax_; + float tau_; + int nmixtures_; + unsigned char shadowVal_; +} Constants; + +void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream); +void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream); + +} } } } + +#endif /* OPENCV_CUDA_MOG2_H */ diff --git a/modules/cudabgsegm/src/mog2.cpp b/modules/cudabgsegm/src/mog2.cpp index e727dcf..47135a0 100644 --- a/modules/cudabgsegm/src/mog2.cpp +++ b/modules/cudabgsegm/src/mog2.cpp @@ -41,209 +41,207 @@ //M*/ #include "precomp.hpp" +#include "cuda/mog2.hpp" using namespace cv; using namespace cv::cuda; +using namespace cv::cuda::device::mog2; #if !defined HAVE_CUDA || defined(CUDA_DISABLER) -Ptr cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) { throw_no_cuda(); return Ptr(); } +Ptr cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) +{ + throw_no_cuda(); + return Ptr(); +} #else -namespace cv { namespace cuda { namespace device +namespace { - namespace mog2 - { - void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal); - void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); - void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); - } -}}} +// default parameters of gaussian background detection algorithm +const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 +const float defaultVarThreshold = 4.0f * 4.0f; +const int defaultNMixtures = 5; // maximal number of Gaussians in mixture +const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test +const float defaultVarThresholdGen = 3.0f * 3.0f; +const float defaultVarInit = 15.0f; // initial variance for new components +const float defaultVarMax = 5.0f * defaultVarInit; +const float defaultVarMin = 4.0f; + +// additional parameters +const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components +const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection +const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation + +class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2 +{ +public: + MOG2Impl(int history, double varThreshold, bool detectShadows); + ~MOG2Impl(); -namespace + void apply(InputArray image, OutputArray fgmask, double learningRate = -1) CV_OVERRIDE; + void apply(InputArray image, OutputArray fgmask, double learningRate, Stream &stream) CV_OVERRIDE; + + void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE; + void getBackgroundImage(OutputArray backgroundImage, Stream &stream) const CV_OVERRIDE; + + int getHistory() const CV_OVERRIDE { return history_; } + void setHistory(int history) CV_OVERRIDE { history_ = history; } + + int getNMixtures() const CV_OVERRIDE { return constantsHost_.nmixtures_; } + void setNMixtures(int nmixtures) CV_OVERRIDE { constantsHost_.nmixtures_ = nmixtures; } + + double getBackgroundRatio() const CV_OVERRIDE { return constantsHost_.TB_; } + void setBackgroundRatio(double ratio) CV_OVERRIDE { constantsHost_.TB_ = (float)ratio; } + + double getVarThreshold() const CV_OVERRIDE { return constantsHost_.Tb_; } + void setVarThreshold(double varThreshold) CV_OVERRIDE { constantsHost_.Tb_ = (float)varThreshold; } + + double getVarThresholdGen() const CV_OVERRIDE { return constantsHost_.Tg_; } + void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { constantsHost_.Tg_ = (float)varThresholdGen; } + + double getVarInit() const CV_OVERRIDE { return constantsHost_.varInit_; } + void setVarInit(double varInit) CV_OVERRIDE { constantsHost_.varInit_ = (float)varInit; } + + double getVarMin() const CV_OVERRIDE { return constantsHost_.varMin_; } + void setVarMin(double varMin) CV_OVERRIDE { constantsHost_.varMin_ = ::fminf((float)varMin, constantsHost_.varMax_); } + + double getVarMax() const CV_OVERRIDE { return constantsHost_.varMax_; } + void setVarMax(double varMax) CV_OVERRIDE { constantsHost_.varMax_ = ::fmaxf(constantsHost_.varMin_, (float)varMax); } + + double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; } + void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float)ct; } + + bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; } + void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; } + + int getShadowValue() const CV_OVERRIDE { return constantsHost_.shadowVal_; } + void setShadowValue(int value) CV_OVERRIDE { constantsHost_.shadowVal_ = (uchar)value; } + + double getShadowThreshold() const CV_OVERRIDE { return constantsHost_.tau_; } + void setShadowThreshold(double threshold) CV_OVERRIDE { constantsHost_.tau_ = (float)threshold; } + +private: + void initialize(Size frameSize, int frameType, Stream &stream); + + Constants constantsHost_; + Constants *constantsDevice_; + + int history_; + float ct_; + bool detectShadows_; + + Size frameSize_; + int frameType_; + int nframes_; + + GpuMat weight_; + GpuMat variance_; + GpuMat mean_; + + //keep track of number of modes per pixel + GpuMat bgmodelUsedModes_; +}; + +MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) : frameSize_(0, 0), frameType_(0), nframes_(0) +{ + history_ = history > 0 ? history : defaultHistory; + detectShadows_ = detectShadows; + ct_ = defaultCT; + + setNMixtures(defaultNMixtures); + setBackgroundRatio(defaultBackgroundRatio); + setVarInit(defaultVarInit); + setVarMin(defaultVarMin); + setVarMax(defaultVarMax); + setVarThreshold(varThreshold > 0 ? (float)varThreshold : defaultVarThreshold); + setVarThresholdGen(defaultVarThresholdGen); + + setShadowValue(defaultShadowValue); + setShadowThreshold(defaultShadowThreshold); + + cudaSafeCall(cudaMalloc((void **)&constantsDevice_, sizeof(Constants))); +} + +MOG2Impl::~MOG2Impl() +{ + cudaFree(constantsDevice_); +} + +void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate) +{ + apply(image, fgmask, learningRate, Stream::Null()); +} + +void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream &stream) { - // default parameters of gaussian background detection algorithm - const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 - const float defaultVarThreshold = 4.0f * 4.0f; - const int defaultNMixtures = 5; // maximal number of Gaussians in mixture - const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test - const float defaultVarThresholdGen = 3.0f * 3.0f; - const float defaultVarInit = 15.0f; // initial variance for new components - const float defaultVarMax = 5.0f * defaultVarInit; - const float defaultVarMin = 4.0f; - - // additional parameters - const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components - const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection - const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation - - class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2 - { - public: - MOG2Impl(int history, double varThreshold, bool detectShadows); - - void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE; - void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) CV_OVERRIDE; - - void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE; - void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const CV_OVERRIDE; - - int getHistory() const CV_OVERRIDE { return history_; } - void setHistory(int history) CV_OVERRIDE { history_ = history; } - - int getNMixtures() const CV_OVERRIDE { return nmixtures_; } - void setNMixtures(int nmixtures) CV_OVERRIDE { nmixtures_ = nmixtures; } - - double getBackgroundRatio() const CV_OVERRIDE { return backgroundRatio_; } - void setBackgroundRatio(double ratio) CV_OVERRIDE { backgroundRatio_ = (float) ratio; } - - double getVarThreshold() const CV_OVERRIDE { return varThreshold_; } - void setVarThreshold(double varThreshold) CV_OVERRIDE { varThreshold_ = (float) varThreshold; } - - double getVarThresholdGen() const CV_OVERRIDE { return varThresholdGen_; } - void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { varThresholdGen_ = (float) varThresholdGen; } - - double getVarInit() const CV_OVERRIDE { return varInit_; } - void setVarInit(double varInit) CV_OVERRIDE { varInit_ = (float) varInit; } - - double getVarMin() const CV_OVERRIDE { return varMin_; } - void setVarMin(double varMin) CV_OVERRIDE { varMin_ = (float) varMin; } - - double getVarMax() const CV_OVERRIDE { return varMax_; } - void setVarMax(double varMax) CV_OVERRIDE { varMax_ = (float) varMax; } - - double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; } - void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float) ct; } - - bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; } - void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; } - - int getShadowValue() const CV_OVERRIDE { return shadowValue_; } - void setShadowValue(int value) CV_OVERRIDE { shadowValue_ = (uchar) value; } + using namespace cv::cuda::device::mog2; + + GpuMat frame = _frame.getGpuMat(); - double getShadowThreshold() const CV_OVERRIDE { return shadowThreshold_; } - void setShadowThreshold(double threshold) CV_OVERRIDE { shadowThreshold_ = (float) threshold; } - - private: - void initialize(Size frameSize, int frameType); - - int history_; - int nmixtures_; - float backgroundRatio_; - float varThreshold_; - float varThresholdGen_; - float varInit_; - float varMin_; - float varMax_; - float ct_; - bool detectShadows_; - uchar shadowValue_; - float shadowThreshold_; - - Size frameSize_; - int frameType_; - int nframes_; - - GpuMat weight_; - GpuMat variance_; - GpuMat mean_; - - //keep track of number of modes per pixel - GpuMat bgmodelUsedModes_; - }; - - MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) : - frameSize_(0, 0), frameType_(0), nframes_(0) - { - history_ = history > 0 ? history : defaultHistory; - varThreshold_ = varThreshold > 0 ? (float) varThreshold : defaultVarThreshold; - detectShadows_ = detectShadows; - - nmixtures_ = defaultNMixtures; - backgroundRatio_ = defaultBackgroundRatio; - varInit_ = defaultVarInit; - varMax_ = defaultVarMax; - varMin_ = defaultVarMin; - varThresholdGen_ = defaultVarThresholdGen; - ct_ = defaultCT; - shadowValue_ = defaultShadowValue; - shadowThreshold_ = defaultShadowThreshold; - } + int ch = frame.channels(); + int work_ch = ch; - void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate) - { - apply(image, fgmask, learningRate, Stream::Null()); - } + if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels()) + initialize(frame.size(), frame.type(), stream); - void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream) - { - using namespace cv::cuda::device::mog2; + _fgmask.create(frameSize_, CV_8UC1); + GpuMat fgmask = _fgmask.getGpuMat(); - GpuMat frame = _frame.getGpuMat(); + fgmask.setTo(Scalar::all(0), stream); - int ch = frame.channels(); - int work_ch = ch; + ++nframes_; + learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_); + CV_Assert(learningRate >= 0); - if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels()) - initialize(frame.size(), frame.type()); + mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, + (float)learningRate, static_cast(-learningRate * ct_), detectShadows_, constantsDevice_, StreamAccessor::getStream(stream)); +} + +void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const +{ + getBackgroundImage(backgroundImage, Stream::Null()); +} + +void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream &stream) const +{ + using namespace cv::cuda::device::mog2; + + _backgroundImage.create(frameSize_, frameType_); + GpuMat backgroundImage = _backgroundImage.getGpuMat(); + + getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, constantsDevice_, StreamAccessor::getStream(stream)); +} + +void MOG2Impl::initialize(cv::Size frameSize, int frameType, Stream &stream) +{ + using namespace cv::cuda::device::mog2; - _fgmask.create(frameSize_, CV_8UC1); - GpuMat fgmask = _fgmask.getGpuMat(); + CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4); - fgmask.setTo(Scalar::all(0), stream); + frameSize_ = frameSize; + frameType_ = frameType; + nframes_ = 0; - ++nframes_; - learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_); - CV_Assert( learningRate >= 0 ); + const int ch = CV_MAT_CN(frameType); + const int work_ch = ch; - mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, - (float) learningRate, static_cast(-learningRate * ct_), detectShadows_, StreamAccessor::getStream(stream)); - } + // for each gaussian mixture of each pixel bg model we store ... + // the mixture weight (w), + // the mean (nchannels values) and + // the covariance + weight_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1); + variance_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1); + mean_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC(work_ch)); - void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const - { - getBackgroundImage(backgroundImage, Stream::Null()); - } - - void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const - { - using namespace cv::cuda::device::mog2; - - _backgroundImage.create(frameSize_, frameType_); - GpuMat backgroundImage = _backgroundImage.getGpuMat(); - - getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream)); - } - - void MOG2Impl::initialize(cv::Size frameSize, int frameType) - { - using namespace cv::cuda::device::mog2; - - CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 ); - - frameSize_ = frameSize; - frameType_ = frameType; - nframes_ = 0; - - int ch = CV_MAT_CN(frameType); - int work_ch = ch; - - // for each gaussian mixture of each pixel bg model we store ... - // the mixture weight (w), - // the mean (nchannels values) and - // the covariance - weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); - variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); - mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); - - //make the array for keeping track of the used modes per pixel - all zeros at start - bgmodelUsedModes_.create(frameSize_, CV_8UC1); - bgmodelUsedModes_.setTo(Scalar::all(0)); + //make the array for keeping track of the used modes per pixel - all zeros at start + bgmodelUsedModes_.create(frameSize_, CV_8UC1); + bgmodelUsedModes_.setTo(Scalar::all(0)); - loadConstants(nmixtures_, varThreshold_, backgroundRatio_, varThresholdGen_, varInit_, varMin_, varMax_, shadowThreshold_, shadowValue_); - } + cudaSafeCall(cudaMemcpyAsync(constantsDevice_, &constantsHost_, sizeof(Constants), cudaMemcpyHostToDevice, StreamAccessor::getStream(stream))); } +} // namespace Ptr cv::cuda::createBackgroundSubtractorMOG2(int history, double varThreshold, bool detectShadows) { -- 2.7.4