From: Roman Donchenko Date: Mon, 24 Mar 2014 11:47:56 +0000 (+0400) Subject: Merge remote-tracking branch 'origin/2.4' into merge-2.4 X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~524^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cc054937303dd90915569e9864f8618de9be51c3;p=profile%2Fivi%2Fopencv.git Merge remote-tracking branch 'origin/2.4' into merge-2.4 Conflicts: cmake/OpenCVDetectAndroidSDK.cmake cmake/OpenCVGenAndroidMK.cmake cmake/OpenCVModule.cmake cmake/templates/OpenCV.mk.in cmake/templates/OpenCVConfig.cmake.in doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.rst modules/cudabgsegm/src/cuda/mog.cu modules/imgproc/perf/opencl/perf_filters.cpp modules/imgproc/src/opencl/filterSep_singlePass.cl modules/nonfree/CMakeLists.txt modules/nonfree/perf/perf_precomp.hpp modules/ocl/perf/perf_haar.cpp modules/ocl/src/filtering.cpp modules/ocl/src/opencl/bgfg_mog.cl modules/superres/CMakeLists.txt modules/superres/src/btv_l1_cuda.cpp modules/superres/src/cuda/btv_l1_gpu.cu modules/superres/src/frame_source.cpp modules/superres/src/input_array_utility.cpp modules/superres/src/optical_flow.cpp modules/superres/src/precomp.hpp samples/gpu/CMakeLists.txt samples/gpu/brox_optical_flow.cpp samples/gpu/super_resolution.cpp --- cc054937303dd90915569e9864f8618de9be51c3 diff --cc cmake/OpenCVDetectCUDA.cmake index 89602ac,24fbb03..2685171 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@@ -217,3 -219,42 +217,42 @@@ else( unset(CUDA_ARCH_BIN CACHE) unset(CUDA_ARCH_PTX CACHE) endif() + + if(HAVE_CUDA) + set(CUDA_LIBS_PATH "") + foreach(p ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) + get_filename_component(_tmp ${p} PATH) + list(APPEND CUDA_LIBS_PATH ${_tmp}) + endforeach() + + if(HAVE_CUBLAS) + foreach(p ${CUDA_cublas_LIBRARY}) + get_filename_component(_tmp ${p} PATH) + list(APPEND CUDA_LIBS_PATH ${_tmp}) + endforeach() + endif() + + if(HAVE_CUFFT) + foreach(p ${CUDA_cufft_LIBRARY}) + get_filename_component(_tmp ${p} PATH) + list(APPEND CUDA_LIBS_PATH ${_tmp}) + endforeach() + endif() + + list(REMOVE_DUPLICATES CUDA_LIBS_PATH) + link_directories(${CUDA_LIBS_PATH}) + + set(CUDA_LIBRARIES_ABS ${CUDA_LIBRARIES}) + ocv_convert_to_lib_name(CUDA_LIBRARIES ${CUDA_LIBRARIES}) + set(CUDA_npp_LIBRARY_ABS ${CUDA_npp_LIBRARY}) + ocv_convert_to_lib_name(CUDA_npp_LIBRARY ${CUDA_npp_LIBRARY}) + if(HAVE_CUBLAS) + set(CUDA_cublas_LIBRARY_ABS ${CUDA_cublas_LIBRARY}) + ocv_convert_to_lib_name(CUDA_cublas_LIBRARY ${CUDA_cublas_LIBRARY}) + endif() + + if(HAVE_CUFFT) + set(CUDA_cufft_LIBRARY_ABS ${CUDA_cufft_LIBRARY}) + ocv_convert_to_lib_name(CUDA_cufft_LIBRARY ${CUDA_cufft_LIBRARY}) + endif() -endif() ++endif() diff --cc cmake/OpenCVModule.cmake index 372d450,79e5086..e6fa199 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@@ -478,46 -479,58 +479,58 @@@ endmacro( # finds and sets headers and sources for the standard OpenCV module # Usage: - # ocv_glob_module_sources() + # ocv_glob_module_sources([EXCLUDE_CUDA] ) macro(ocv_glob_module_sources) - file(GLOB_RECURSE lib_srcs "src/*.cpp") + set(_argn ${ARGN}) + list(FIND _argn "EXCLUDE_CUDA" exclude_cuda) + if(NOT exclude_cuda EQUAL -1) + list(REMOVE_AT _argn ${exclude_cuda}) + endif() + + file(GLOB_RECURSE lib_srcs "src/*.cpp") file(GLOB_RECURSE lib_int_hdrs "src/*.hpp" "src/*.h") - file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h") + file(GLOB lib_hdrs "include/opencv2/*.hpp" "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h") file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h") file(GLOB_RECURSE lib_srcs_apple "src/*.mm") if (APPLE) list(APPEND lib_srcs ${lib_srcs_apple}) endif() + ocv_source_group("Src" DIRBASE "${CMAKE_CURRENT_SOURCE_DIR}/src" FILES ${lib_srcs} ${lib_int_hdrs}) + ocv_source_group("Include" DIRBASE "${CMAKE_CURRENT_SOURCE_DIR}/include" FILES ${lib_hdrs} ${lib_hdrs_detail}) + - file(GLOB lib_cuda_srcs "src/cuda/*.cu") - set(cuda_objs "") - set(lib_cuda_hdrs "") - if(HAVE_CUDA AND lib_cuda_srcs) - ocv_include_directories(${CUDA_INCLUDE_DIRS}) - file(GLOB lib_cuda_hdrs "src/cuda/*.hpp") + if (exclude_cuda EQUAL -1) + file(GLOB lib_cuda_srcs "src/cuda/*.cu") + set(cuda_objs "") + set(lib_cuda_hdrs "") + if(HAVE_CUDA) + ocv_include_directories(${CUDA_INCLUDE_DIRS}) + file(GLOB lib_cuda_hdrs "src/cuda/*.hpp") - ocv_cuda_compile(cuda_objs ${lib_cuda_srcs} ${lib_cuda_hdrs}) - source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs}) + ocv_cuda_compile(cuda_objs ${lib_cuda_srcs} ${lib_cuda_hdrs}) + source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs}) + endif() + else() + set(cuda_objs "") + set(lib_cuda_srcs "") + set(lib_cuda_hdrs "") endif() - source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) - file(GLOB cl_kernels "src/opencl/*.cl") - if(HAVE_opencv_ocl AND cl_kernels) + if(cl_kernels) ocv_include_directories(${OPENCL_INCLUDE_DIRS}) + string(REGEX REPLACE "opencv_" "" the_module_barename "${the_module}") add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp" - COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake" + COMMAND ${CMAKE_COMMAND} -DMODULE_NAME="${the_module_barename}" -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake" DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake") - source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") + ocv_source_group("Src\\opencl\\kernels" FILES ${cl_kernels}) + ocv_source_group("Src\\opencl\\kernels\\autogenerated" FILES "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") endif() - ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail} - SOURCES ${lib_srcs} ${lib_int_hdrs} ${cuda_objs} ${lib_cuda_srcs} ${lib_cuda_hdrs}) - source_group("Include" FILES ${lib_hdrs}) - source_group("Include\\detail" FILES ${lib_hdrs_detail}) - + ocv_set_module_sources(${_argn} HEADERS ${lib_hdrs} ${lib_hdrs_detail} + SOURCES ${lib_srcs} ${lib_int_hdrs} ${cuda_objs} ${lib_cuda_srcs} ${lib_cuda_hdrs}) endmacro() # creates OpenCV module in current folder diff --cc cmake/templates/OpenCVConfig.cmake.in index 88eed8e,3222048..2d5a14c --- a/cmake/templates/OpenCVConfig.cmake.in +++ b/cmake/templates/OpenCVConfig.cmake.in @@@ -19,30 -18,31 +19,34 @@@ # This file will define the following variables: # - OpenCV_LIBS : The list of all imported targets for OpenCV modules. # - OpenCV_INCLUDE_DIRS : The OpenCV include directories. - # - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability - # - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API + # - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability. + # - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API. -# - OpenCV_VERSION : The version of this OpenCV build: "@OPENCV_VERSION@" +# - OpenCV_VERSION : The version of this OpenCV build: "@OPENCV_VERSION_PLAIN@" # - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION: "@OPENCV_VERSION_MAJOR@" # - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION: "@OPENCV_VERSION_MINOR@" # - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION: "@OPENCV_VERSION_PATCH@" -# - OpenCV_VERSION_TWEAK : Tweak version part of OpenCV_VERSION: "@OPENCV_VERSION_TWEAK@" +# - OpenCV_VERSION_STATUS : Development status of this build: "@OPENCV_VERSION_STATUS@" # # Advanced variables: - # - OpenCV_SHARED - # - OpenCV_CONFIG_PATH - # - OpenCV_INSTALL_PATH (not set on Windows) - # - OpenCV_LIB_COMPONENTS - # - OpenCV_USE_MANGLED_PATHS - # - OpenCV_HAVE_ANDROID_CAMERA + # - OpenCV_SHARED : Use OpenCV as shared library + # - OpenCV_CONFIG_PATH : Path to this OpenCVConfig.cmake + # - OpenCV_INSTALL_PATH : OpenCV location (not set on Windows) + # - OpenCV_LIB_COMPONENTS : Present OpenCV modules list + # - OpenCV_USE_MANGLED_PATHS : Mangled OpenCV path flag + # - OpenCV_MODULES_SUFFIX : The suffix for OpenCVModules-XXX.cmake file + # - OpenCV_HAVE_ANDROID_CAMERA : Presence of Android native camera wrappers # +# Deprecated variables: +# - OpenCV_VERSION_TWEAK : Always "0" +# # =================================================================================== - set(modules_file_suffix "") - if(ANDROID) - string(REPLACE - _ modules_file_suffix "_${ANDROID_NDK_ABI_NAME}") + if(NOT DEFINED OpenCV_MODULES_SUFFIX) + if(ANDROID) + string(REPLACE - _ OpenCV_MODULES_SUFFIX "_${ANDROID_NDK_ABI_NAME}") + else() + set(OpenCV_MODULES_SUFFIX "") + endif() endif() if(NOT TARGET opencv_core) diff --cc modules/cudabgsegm/src/cuda/mog2.cu index de8df6c,0000000..789afa4 mode 100644,000000..100644 --- a/modules/cudabgsegm/src/cuda/mog2.cu +++ b/modules/cudabgsegm/src/cuda/mog2.cu @@@ -1,438 -1,0 +1,439 @@@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/vec_traits.hpp" +#include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/limits.hpp" + +namespace cv { namespace cuda { namespace device +{ + namespace mog2 + { + /////////////////////////////////////////////////////////////// + // Utility + + __device__ __forceinline__ float cvt(uchar val) + { + return val; + } + __device__ __forceinline__ float3 cvt(const uchar3& val) + { + return make_float3(val.x, val.y, val.z); + } + __device__ __forceinline__ float4 cvt(const uchar4& val) + { + return make_float4(val.x, val.y, val.z, val.w); + } + + __device__ __forceinline__ float sqr(float val) + { + return val * val; + } + __device__ __forceinline__ float sqr(const float3& val) + { + return val.x * val.x + val.y * val.y + val.z * val.z; + } + __device__ __forceinline__ float sqr(const float4& val) + { + return val.x * val.x + val.y * val.y + val.z * val.z; + } + + __device__ __forceinline__ float sum(float val) + { + return val; + } + __device__ __forceinline__ float sum(const float3& val) + { + return val.x + val.y + val.z; + } + __device__ __forceinline__ float sum(const float4& val) + { + return val.x + val.y + val.z; + } + + template + __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows) + { + typename Ptr2D::elem_type val = ptr(k * rows + y, x); + ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); + ptr((k + 1) * rows + y, x) = val; + } + + /////////////////////////////////////////////////////////////// + // MOG2 + + __constant__ int c_nmixtures; + __constant__ float c_Tb; + __constant__ float c_TB; + __constant__ float c_Tg; + __constant__ float c_varInit; + __constant__ float c_varMin; + __constant__ float c_varMax; + __constant__ float c_tau; + __constant__ unsigned char c_shadowVal; + + void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) + { + varMin = ::fminf(varMin, varMax); + varMax = ::fmaxf(varMin, varMax); + + cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); + } + + template + __global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, + PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, + const float alphaT, const float alpha1, const float prune) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= frame.cols || y >= frame.rows) + return; + + WorkT pix = cvt(frame(y, x)); + + //calculate distances to the modes (+ sort) + //here we need to go in descending order!!! + + bool background = false; // true - the pixel classified as background + + //internal: + + bool fitsPDF = false; //if it remains zero a new GMM mode will be added + + int nmodes = modesUsed(y, x); + int nNewModes = nmodes; //current number of modes in GMM + + float totalWeight = 0.0f; + + //go through all modes + + for (int mode = 0; mode < nmodes; ++mode) + { + //need only weight if fit is found + float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; - ++ int swap_count = 0; + //fit not found yet + if (!fitsPDF) + { + //check if it belongs to some of the remaining modes + float var = gmm_variance(mode * frame.rows + y, x); + + WorkT mean = gmm_mean(mode * frame.rows + y, x); + + //calculate difference and distance + WorkT diff = mean - pix; + float dist2 = sqr(diff); + + //background? - Tb - usually larger than Tg + if (totalWeight < c_TB && dist2 < c_Tb * var) + background = true; + + //check fit + if (dist2 < c_Tg * var) + { + //belongs to the mode + fitsPDF = true; + + //update distribution + + //update weight + weight += alphaT; + float k = alphaT / weight; + + //update mean + gmm_mean(mode * frame.rows + y, x) = mean - k * diff; + + //update variance + float varnew = var + k * (dist2 - var); + + //limit the variance + varnew = ::fmaxf(varnew, c_varMin); + varnew = ::fminf(varnew, c_varMax); + + gmm_variance(mode * frame.rows + y, x) = varnew; + + //sort + //all other weights are at the same place and + //only the matched (iModes) is higher -> just find the new place for it + + for (int i = mode; i > 0; --i) + { + //check one up + if (weight < gmm_weight((i - 1) * frame.rows + y, x)) + break; + ++ swap_count++; + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); + } + + //belongs to the mode - bFitsPDF becomes 1 + } + } // !fitsPDF + + //check prune + if (weight < -prune) + { + weight = 0.0f; + nmodes--; + } + - gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value ++ gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value + totalWeight += weight; + } + + //renormalize weights + + totalWeight = 1.f / totalWeight; + for (int mode = 0; mode < nmodes; ++mode) + gmm_weight(mode * frame.rows + y, x) *= totalWeight; + + nmodes = nNewModes; + + //make new mode if needed and exit + + if (!fitsPDF) + { + // replace the weakest or add a new one + int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++; + + if (nmodes == 1) + gmm_weight(mode * frame.rows + y, x) = 1.f; + else + { + gmm_weight(mode * frame.rows + y, x) = alphaT; + + // renormalize all other weights + + for (int i = 0; i < nmodes - 1; ++i) + gmm_weight(i * frame.rows + y, x) *= alpha1; + } + + // init + + gmm_mean(mode * frame.rows + y, x) = pix; + gmm_variance(mode * frame.rows + y, x) = c_varInit; + + //sort + //find the new place for it + + for (int i = nmodes - 1; i > 0; --i) + { + // check one up + if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) + break; + + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); + } + } + + //set the number of modes + modesUsed(y, x) = nmodes; + + bool isShadow = false; + if (detectShadows && !background) + { + float tWeight = 0.0f; + + // check all the components marked as background: + for (int mode = 0; mode < nmodes; ++mode) + { + WorkT mean = gmm_mean(mode * frame.rows + y, x); + + WorkT pix_mean = pix * mean; + + float numerator = sum(pix_mean); + float denominator = sqr(mean); + + // no division by zero allowed + if (denominator == 0) + break; + + // if tau < a < 1 then also check the color distortion + if (numerator <= denominator && numerator >= c_tau * denominator) + { + float a = numerator / denominator; + + WorkT dD = a * mean - pix; + + if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a) + { + isShadow = true; + break; + } + }; + + tWeight += gmm_weight(mode * frame.rows + y, x); + if (tWeight > c_TB) + break; + } + } + + fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255; + } + + template + void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, cudaStream_t stream) + { + dim3 block(32, 8); + dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); + + const float alpha1 = 1.0f - alphaT; + + if (detectShadows) + { + cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); + + mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, + weight, variance, (PtrStepSz) mean, + alphaT, alpha1, prune); + } + else + { + cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); + + mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, + weight, variance, (PtrStepSz) mean, + alphaT, alpha1, prune); + } + + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, cudaStream_t stream) + { + typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); + + static const func_t funcs[] = + { + 0, mog2_caller, 0, mog2_caller, mog2_caller + }; + + funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream); + } + + template + __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= modesUsed.cols || y >= modesUsed.rows) + return; + + int nmodes = modesUsed(y, x); + + WorkT meanVal = VecTraits::all(0.0f); + float totalWeight = 0.0f; + + for (int mode = 0; mode < nmodes; ++mode) + { + float weight = gmm_weight(mode * modesUsed.rows + y, x); + + WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); + meanVal = meanVal + weight * mean; + + totalWeight += weight; + + if(totalWeight > c_TB) + break; + } + + meanVal = meanVal * (1.f / totalWeight); + + dst(y, x) = saturate_cast(meanVal); + } + + template + void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) + { + dim3 block(32, 8); + dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1) ); + + getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz) mean, (PtrStepSz) dst); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) + { + typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); + + static const func_t funcs[] = + { + 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller + }; + + funcs[cn](modesUsed, weight, mean, dst, stream); + } + } +}}} + + +#endif /* CUDA_DISABLER */ diff --cc modules/nonfree/perf/perf_precomp.hpp index 240bb65,57bbe16..45478eb --- a/modules/nonfree/perf/perf_precomp.hpp +++ b/modules/nonfree/perf/perf_precomp.hpp @@@ -9,12 -9,13 +9,13 @@@ #ifndef __OPENCV_PERF_PRECOMP_HPP__ #define __OPENCV_PERF_PRECOMP_HPP__ + #include "cvconfig.h" + -#include "opencv2/ts/ts.hpp" +#include "opencv2/ts.hpp" +#include "opencv2/nonfree.hpp" +#include "opencv2/highgui.hpp" -#include "opencv2/nonfree/nonfree.hpp" -#include "opencv2/highgui/highgui.hpp" #include "opencv2/opencv_modules.hpp" - #include "cvconfig.h" #ifdef HAVE_OPENCV_OCL # include "opencv2/nonfree/ocl.hpp" diff --cc modules/video/src/bgfg_gaussmix2.cpp index 1e6ee0d,b14bc8e..098310f --- a/modules/video/src/bgfg_gaussmix2.cpp +++ b/modules/video/src/bgfg_gaussmix2.cpp @@@ -910,12 -627,6 +911,12 @@@ void BackgroundSubtractorMOG2Impl::getB } } +Ptr createBackgroundSubtractorMOG2(int _history, double _varThreshold, + bool _bShadowDetection) +{ + return makePtr(_history, (float)_varThreshold, _bShadowDetection); +} + } - /* End of file. */ + /* End of file. */ diff --cc modules/video/src/opencl/bgfg_mog2.cl index f895b5b,0000000..9bc18b2 mode 100644,000000..100644 --- a/modules/video/src/opencl/bgfg_mog2.cl +++ b/modules/video/src/opencl/bgfg_mog2.cl @@@ -1,272 -1,0 +1,273 @@@ +#if CN==1 + +#define T_MEAN float +#define F_ZERO (0.0f) +#define cnMode 1 + +#define frameToMean(a, b) (b) = *(a); +#define meanToFrame(a, b) *b = convert_uchar_sat(a); + +inline float sqr(float val) +{ + return val * val; +} + +inline float sum(float val) +{ + return val; +} + +#else + +#define T_MEAN float4 +#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f) +#define cnMode 4 + +#define meanToFrame(a, b)\ + b[0] = convert_uchar_sat(a.x); \ + b[1] = convert_uchar_sat(a.y); \ + b[2] = convert_uchar_sat(a.z); + +#define frameToMean(a, b)\ + b.x = a[0]; \ + b.y = a[1]; \ + b.z = a[2]; \ + b.w = 0.0f; + +inline float sqr(const float4 val) +{ + return val.x * val.x + val.y * val.y + val.z * val.z; +} + +inline float sum(const float4 val) +{ + return (val.x + val.y + val.z); +} + +inline void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step) +{ + float4 val = ptr[(k * rows + y) * ptr_step + x]; + ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x]; + ptr[((k + 1) * rows + y) * ptr_step + x] = val; +} + +#endif + +inline void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step) +{ + float val = ptr[(k * rows + y) * ptr_step + x]; + ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x]; + ptr[((k + 1) * rows + y) * ptr_step + x] = val; +} + +__kernel void mog2_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col, //uchar || uchar3 + __global uchar* modesUsed, int modesUsed_step, int modesUsed_offset, //int + __global uchar* weight, int weight_step, int weight_offset, //float + __global uchar* mean, int mean_step, int mean_offset, //T_MEAN=float || float4 + __global uchar* variance, int var_step, int var_offset, //float + __global uchar* fgmask, int fgmask_step, int fgmask_offset, //int + float alphaT, float alpha1, float prune, + int detectShadows_flag, + float c_Tb, float c_TB, float c_Tg, float c_varMin, //constants + float c_varMax, float c_varInit, float c_tau, uchar c_shadowVal) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + weight_step/= sizeof(float); + var_step /= sizeof(float); + mean_step /= (sizeof(float)*cnMode); + + if( x < frame_col && y < frame_row) + { + __global const uchar* _frame = (frame + mad24( y, frame_step, x*CN + frame_offset)); + T_MEAN pix; + frameToMean(_frame, pix); + + bool background = false; // true - the pixel classified as background + + bool fitsPDF = false; //if it remains zero a new GMM mode will be added + + __global int* _modesUsed = (__global int*)(modesUsed + mad24( y, modesUsed_step, x*(int)(sizeof(int)))); + int nmodes = _modesUsed[0]; + int nNewModes = nmodes; //current number of modes in GMM + + float totalWeight = 0.0f; + + __global float* _weight = (__global float*)(weight); + __global float* _variance = (__global float*)(variance); + __global T_MEAN* _mean = (__global T_MEAN*)(mean); + + for (int mode = 0; mode < nmodes; ++mode) + { + + float c_weight = alpha1 * _weight[(mode * frame_row + y) * weight_step + x] + prune; - ++ int swap_count = 0; + if (!fitsPDF) + { + float c_var = _variance[(mode * frame_row + y) * var_step + x]; + + T_MEAN c_mean = _mean[(mode * frame_row + y) * mean_step + x]; + + T_MEAN diff = c_mean - pix; + float dist2 = sqr(diff); + + if (totalWeight < c_TB && dist2 < c_Tb * c_var) + background = true; + + if (dist2 < c_Tg * c_var) + { + fitsPDF = true; + c_weight += alphaT; + float k = alphaT / c_weight; + + _mean[(mode * frame_row + y) * mean_step + x] = c_mean - k * diff; + + float varnew = c_var + k * (dist2 - c_var); + varnew = fmax(varnew, c_varMin); + varnew = fmin(varnew, c_varMax); + + _variance[(mode * frame_row + y) * var_step + x] = varnew; + for (int i = mode; i > 0; --i) + { + if (c_weight < _weight[((i - 1) * frame_row + y) * weight_step + x]) + break; ++ swap_count++; + swap(_weight, x, y, i - 1, frame_row, weight_step); + swap(_variance, x, y, i - 1, frame_row, var_step); + #if (CN==1) + swap(_mean, x, y, i - 1, frame_row, mean_step); + #else + swap4(_mean, x, y, i - 1, frame_row, mean_step); + #endif + } + } + } // !fitsPDF + + if (c_weight < -prune) + { + c_weight = 0.0f; + nmodes--; + } + - _weight[(mode * frame_row + y) * weight_step + x] = c_weight; //update weight by the calculated value ++ _weight[((mode - swap_count) * frame_row + y) * weight_step + x] = c_weight; //update weight by the calculated value + totalWeight += c_weight; + } + + totalWeight = 1.f / totalWeight; + for (int mode = 0; mode < nmodes; ++mode) + _weight[(mode * frame_row + y) * weight_step + x] *= totalWeight; + + nmodes = nNewModes; + + if (!fitsPDF) + { + int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++; + + if (nmodes == 1) + _weight[(mode * frame_row + y) * weight_step + x] = 1.f; + else + { + _weight[(mode * frame_row + y) * weight_step + x] = alphaT; + + for (int i = 0; i < nmodes - 1; ++i) + _weight[(i * frame_row + y) * weight_step + x] *= alpha1; + } + + _mean[(mode * frame_row + y) * mean_step + x] = pix; + _variance[(mode * frame_row + y) * var_step + x] = c_varInit; + + for (int i = nmodes - 1; i > 0; --i) + { + if (alphaT < _weight[((i - 1) * frame_row + y) * weight_step + x]) + break; + + swap(_weight, x, y, i - 1, frame_row, weight_step); + swap(_variance, x, y, i - 1, frame_row, var_step); + #if (CN==1) + swap(_mean, x, y, i - 1, frame_row, mean_step); + #else + swap4(_mean, x, y, i - 1, frame_row, mean_step); + #endif + } + } + + _modesUsed[0] = nmodes; + bool isShadow = false; + if (detectShadows_flag && !background) + { + float tWeight = 0.0f; + + for (int mode = 0; mode < nmodes; ++mode) + { + T_MEAN c_mean = _mean[(mode * frame_row + y) * mean_step + x]; + + T_MEAN pix_mean = pix * c_mean; + + float numerator = sum(pix_mean); + float denominator = sqr(c_mean); + + if (denominator == 0) + break; + + if (numerator <= denominator && numerator >= c_tau * denominator) + { + float a = numerator / denominator; + + T_MEAN dD = a * c_mean - pix; + + if (sqr(dD) < c_Tb * _variance[(mode * frame_row + y) * var_step + x] * a * a) + { + isShadow = true; + break; + } + } + + tWeight += _weight[(mode * frame_row + y) * weight_step + x]; + if (tWeight > c_TB) + break; + } + } + __global int* _fgmask = (__global int*)(fgmask + mad24(y, fgmask_step, x*(int)(sizeof(int)) + fgmask_offset)); + *_fgmask = background ? 0 : isShadow ? c_shadowVal : 255; + } +} + +__kernel void getBackgroundImage2_kernel(__global const uchar* modesUsed, int modesUsed_step, int modesUsed_offset, int modesUsed_row, int modesUsed_col, + __global const uchar* weight, int weight_step, int weight_offset, + __global const uchar* mean, int mean_step, int mean_offset, + __global uchar* dst, int dst_step, int dst_offset, + float c_TB) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < modesUsed_col && y < modesUsed_row) + { + __global int* _modesUsed = (__global int*)(modesUsed + mad24( y, modesUsed_step, x*(int)(sizeof(int)))); + int nmodes = _modesUsed[0]; + + T_MEAN meanVal = (T_MEAN)F_ZERO; + + float totalWeight = 0.0f; + + for (int mode = 0; mode < nmodes; ++mode) + { + __global const float* _weight = (__global const float*)(weight + mad24(mode * modesUsed_row + y, weight_step, x*(int)(sizeof(float)))); + float c_weight = _weight[0]; + + __global const T_MEAN* _mean = (__global const T_MEAN*)(mean + mad24(mode * modesUsed_row + y, mean_step, x*(int)(sizeof(float))*cnMode)); + T_MEAN c_mean = _mean[0]; + meanVal = meanVal + c_weight * c_mean; + + totalWeight += c_weight; + + if(totalWeight > c_TB) + break; + } + + meanVal = meanVal * (1.f / totalWeight); + __global uchar* _dst = dst + y * dst_step + x*CN + dst_offset; + meanToFrame(meanVal, _dst); + } +} diff --cc samples/gpu/CMakeLists.txt index 46b465a,d25c3a6..ca5243a --- a/samples/gpu/CMakeLists.txt +++ b/samples/gpu/CMakeLists.txt @@@ -47,10 -39,10 +47,10 @@@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_ set(the_target "example_${project}_${name}") add_executable(${the_target} ${srcs}) - target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_GPU_SAMPLES_REQUIRED_DEPS}) + target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS}) - if(HAVE_CUDA) - target_link_libraries(${the_target} ${CUDA_CUDA_LIBRARY}) + if(HAVE_CUDA AND NOT ANDROID) + target_link_libraries(${the_target} ${CUDA_CUDA_LIBRARY}) endif() if(HAVE_opencv_nonfree) diff --cc samples/gpu/brox_optical_flow.cpp index 0897386,7cd5089..638aade --- a/samples/gpu/brox_optical_flow.cpp +++ b/samples/gpu/brox_optical_flow.cpp @@@ -1,14 -1,12 +1,14 @@@ #include #include #include - #include + #include -#include "cvconfig.h" -#include "opencv2/core/core.hpp" -#include "opencv2/highgui/highgui.hpp" -#include "opencv2/gpu/gpu.hpp" +#include "opencv2/core.hpp" +#include "opencv2/core/utility.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/cudaoptflow.hpp" +#include "opencv2/cudaarithm.hpp" using namespace std; using namespace cv; diff --cc samples/gpu/super_resolution.cpp index 3066e8f,85cb6cf..4e3de21 --- a/samples/gpu/super_resolution.cpp +++ b/samples/gpu/super_resolution.cpp @@@ -1,12 -1,13 +1,14 @@@ #include #include #include + #include + -#include "opencv2/core/core.hpp" -#include "opencv2/highgui/highgui.hpp" -#include "opencv2/imgproc/imgproc.hpp" -#include "opencv2/contrib/contrib.hpp" -#include "opencv2/superres/superres.hpp" +#include "opencv2/core.hpp" +#include "opencv2/core/utility.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/contrib.hpp" +#include "opencv2/superres.hpp" #include "opencv2/superres/optical_flow.hpp" #include "opencv2/opencv_modules.hpp"