Merge remote-tracking branch 'origin/2.4' into merge-2.4
authorRoman Donchenko <roman.donchenko@itseez.com>
Mon, 24 Mar 2014 11:47:56 +0000 (15:47 +0400)
committerRoman Donchenko <roman.donchenko@itseez.com>
Mon, 24 Mar 2014 15:02:16 +0000 (19:02 +0400)
Conflicts:
cmake/OpenCVDetectAndroidSDK.cmake
cmake/OpenCVGenAndroidMK.cmake
cmake/OpenCVModule.cmake
cmake/templates/OpenCV.mk.in
cmake/templates/OpenCVConfig.cmake.in
doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.rst
modules/cudabgsegm/src/cuda/mog.cu
modules/imgproc/perf/opencl/perf_filters.cpp
modules/imgproc/src/opencl/filterSep_singlePass.cl
modules/nonfree/CMakeLists.txt
modules/nonfree/perf/perf_precomp.hpp
modules/ocl/perf/perf_haar.cpp
modules/ocl/src/filtering.cpp
modules/ocl/src/opencl/bgfg_mog.cl
modules/superres/CMakeLists.txt
modules/superres/src/btv_l1_cuda.cpp
modules/superres/src/cuda/btv_l1_gpu.cu
modules/superres/src/frame_source.cpp
modules/superres/src/input_array_utility.cpp
modules/superres/src/optical_flow.cpp
modules/superres/src/precomp.hpp
samples/gpu/CMakeLists.txt
samples/gpu/brox_optical_flow.cpp
samples/gpu/super_resolution.cpp

20 files changed:
1  2 
CMakeLists.txt
cmake/OpenCVDetectCUDA.cmake
cmake/OpenCVModule.cmake
cmake/templates/OpenCVConfig.cmake.in
doc/tutorials/core/adding_images/adding_images.rst
modules/core/src/persistence.cpp
modules/cudabgsegm/src/cuda/mog2.cu
modules/flann/include/opencv2/flann/kmeans_index.h
modules/highgui/doc/reading_and_writing_images_and_video.rst
modules/highgui/include/opencv2/highgui/highgui_c.h
modules/highgui/src/cap_android.cpp
modules/nonfree/perf/perf_precomp.hpp
modules/superres/CMakeLists.txt
modules/video/src/bgfg_gaussmix2.cpp
modules/video/src/opencl/bgfg_mog2.cl
modules/viz/doc/widget.rst
samples/gpu/CMakeLists.txt
samples/gpu/brox_optical_flow.cpp
samples/gpu/opticalflow_nvidia_api.cpp
samples/gpu/super_resolution.cpp

diff --cc CMakeLists.txt
Simple merge
@@@ -217,3 -219,42 +217,42 @@@ else(
    unset(CUDA_ARCH_BIN CACHE)
    unset(CUDA_ARCH_PTX CACHE)
  endif()
 -endif()
+ if(HAVE_CUDA)
+   set(CUDA_LIBS_PATH "")
+   foreach(p ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
+     get_filename_component(_tmp ${p} PATH)
+     list(APPEND CUDA_LIBS_PATH ${_tmp})
+   endforeach()
+   if(HAVE_CUBLAS)
+     foreach(p ${CUDA_cublas_LIBRARY})
+       get_filename_component(_tmp ${p} PATH)
+       list(APPEND CUDA_LIBS_PATH ${_tmp})
+     endforeach()
+   endif()
+   if(HAVE_CUFFT)
+     foreach(p ${CUDA_cufft_LIBRARY})
+       get_filename_component(_tmp ${p} PATH)
+       list(APPEND CUDA_LIBS_PATH ${_tmp})
+     endforeach()
+   endif()
+   list(REMOVE_DUPLICATES CUDA_LIBS_PATH)
+   link_directories(${CUDA_LIBS_PATH})
+   set(CUDA_LIBRARIES_ABS ${CUDA_LIBRARIES})
+   ocv_convert_to_lib_name(CUDA_LIBRARIES ${CUDA_LIBRARIES})
+   set(CUDA_npp_LIBRARY_ABS ${CUDA_npp_LIBRARY})
+   ocv_convert_to_lib_name(CUDA_npp_LIBRARY ${CUDA_npp_LIBRARY})
+   if(HAVE_CUBLAS)
+     set(CUDA_cublas_LIBRARY_ABS ${CUDA_cublas_LIBRARY})
+     ocv_convert_to_lib_name(CUDA_cublas_LIBRARY ${CUDA_cublas_LIBRARY})
+   endif()
+   if(HAVE_CUFFT)
+     set(CUDA_cufft_LIBRARY_ABS ${CUDA_cufft_LIBRARY})
+     ocv_convert_to_lib_name(CUDA_cufft_LIBRARY ${CUDA_cufft_LIBRARY})
+   endif()
++endif()
@@@ -478,46 -479,58 +479,58 @@@ endmacro(
  
  # finds and sets headers and sources for the standard OpenCV module
  # Usage:
- # ocv_glob_module_sources(<extra sources&headers in the same format as used in ocv_set_module_sources>)
+ # ocv_glob_module_sources([EXCLUDE_CUDA] <extra sources&headers in the same format as used in ocv_set_module_sources>)
  macro(ocv_glob_module_sources)
-   file(GLOB_RECURSE lib_srcs     "src/*.cpp")
+   set(_argn ${ARGN})
+   list(FIND _argn "EXCLUDE_CUDA" exclude_cuda)
+   if(NOT exclude_cuda EQUAL -1)
+     list(REMOVE_AT _argn ${exclude_cuda})
+   endif()
+   file(GLOB_RECURSE lib_srcs "src/*.cpp")
    file(GLOB_RECURSE lib_int_hdrs "src/*.hpp" "src/*.h")
 -  file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
 +  file(GLOB lib_hdrs     "include/opencv2/*.hpp" "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
    file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h")
    file(GLOB_RECURSE lib_srcs_apple "src/*.mm")
    if (APPLE)
      list(APPEND lib_srcs ${lib_srcs_apple})
    endif()
  
-   file(GLOB lib_cuda_srcs "src/cuda/*.cu")
-   set(cuda_objs "")
-   set(lib_cuda_hdrs "")
-   if(HAVE_CUDA AND lib_cuda_srcs)
-     ocv_include_directories(${CUDA_INCLUDE_DIRS})
-     file(GLOB lib_cuda_hdrs "src/cuda/*.hpp")
 +  ocv_source_group("Src" DIRBASE "${CMAKE_CURRENT_SOURCE_DIR}/src" FILES ${lib_srcs} ${lib_int_hdrs})
 +  ocv_source_group("Include" DIRBASE "${CMAKE_CURRENT_SOURCE_DIR}/include" FILES ${lib_hdrs} ${lib_hdrs_detail})
 +
+   if (exclude_cuda EQUAL -1)
+     file(GLOB lib_cuda_srcs "src/cuda/*.cu")
+     set(cuda_objs "")
+     set(lib_cuda_hdrs "")
+     if(HAVE_CUDA)
+       ocv_include_directories(${CUDA_INCLUDE_DIRS})
+       file(GLOB lib_cuda_hdrs "src/cuda/*.hpp")
  
-     ocv_cuda_compile(cuda_objs ${lib_cuda_srcs} ${lib_cuda_hdrs})
-     source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs})
+       ocv_cuda_compile(cuda_objs ${lib_cuda_srcs} ${lib_cuda_hdrs})
+       source_group("Src\\Cuda"      FILES ${lib_cuda_srcs} ${lib_cuda_hdrs})
+     endif()
+   else()
+     set(cuda_objs "")
+     set(lib_cuda_srcs "")
+     set(lib_cuda_hdrs "")
    endif()
  
 -  source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
 -
    file(GLOB cl_kernels "src/opencl/*.cl")
 -  if(HAVE_opencv_ocl AND cl_kernels)
 +  if(cl_kernels)
      ocv_include_directories(${OPENCL_INCLUDE_DIRS})
 +    string(REGEX REPLACE "opencv_" "" the_module_barename "${the_module}")
      add_custom_command(
        OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp"
 -      COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
 +      COMMAND ${CMAKE_COMMAND} -DMODULE_NAME="${the_module_barename}" -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
        DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
 -    source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
 +    ocv_source_group("Src\\opencl\\kernels" FILES ${cl_kernels})
 +    ocv_source_group("Src\\opencl\\kernels\\autogenerated" FILES "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
      list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
    endif()
  
-   ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail}
-                                  SOURCES ${lib_srcs} ${lib_int_hdrs} ${cuda_objs} ${lib_cuda_srcs} ${lib_cuda_hdrs})
 -  source_group("Include" FILES ${lib_hdrs})
 -  source_group("Include\\detail" FILES ${lib_hdrs_detail})
 -
+   ocv_set_module_sources(${_argn} HEADERS ${lib_hdrs} ${lib_hdrs_detail}
+                          SOURCES ${lib_srcs} ${lib_int_hdrs} ${cuda_objs} ${lib_cuda_srcs} ${lib_cuda_hdrs})
  endmacro()
  
  # creates OpenCV module in current folder
  #    This file will define the following variables:
  #      - OpenCV_LIBS                     : The list of all imported targets for OpenCV modules.
  #      - OpenCV_INCLUDE_DIRS             : The OpenCV include directories.
- #      - OpenCV_COMPUTE_CAPABILITIES     : The version of compute capability
- #      - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API
+ #      - OpenCV_COMPUTE_CAPABILITIES     : The version of compute capability.
+ #      - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API.
 -#      - OpenCV_VERSION                  : The version of this OpenCV build: "@OPENCV_VERSION@"
 +#      - OpenCV_VERSION                  : The version of this OpenCV build: "@OPENCV_VERSION_PLAIN@"
  #      - OpenCV_VERSION_MAJOR            : Major version part of OpenCV_VERSION: "@OPENCV_VERSION_MAJOR@"
  #      - OpenCV_VERSION_MINOR            : Minor version part of OpenCV_VERSION: "@OPENCV_VERSION_MINOR@"
  #      - OpenCV_VERSION_PATCH            : Patch version part of OpenCV_VERSION: "@OPENCV_VERSION_PATCH@"
 -#      - OpenCV_VERSION_TWEAK            : Tweak version part of OpenCV_VERSION: "@OPENCV_VERSION_TWEAK@"
 +#      - OpenCV_VERSION_STATUS           : Development status of this build: "@OPENCV_VERSION_STATUS@"
  #
  #    Advanced variables:
- #      - OpenCV_SHARED
- #      - OpenCV_CONFIG_PATH
- #      - OpenCV_INSTALL_PATH  (not set on Windows)
- #      - OpenCV_LIB_COMPONENTS
- #      - OpenCV_USE_MANGLED_PATHS
- #      - OpenCV_HAVE_ANDROID_CAMERA
+ #      - OpenCV_SHARED                   : Use OpenCV as shared library
+ #      - OpenCV_CONFIG_PATH              : Path to this OpenCVConfig.cmake
+ #      - OpenCV_INSTALL_PATH             : OpenCV location (not set on Windows)
+ #      - OpenCV_LIB_COMPONENTS           : Present OpenCV modules list
+ #      - OpenCV_USE_MANGLED_PATHS        : Mangled OpenCV path flag
+ #      - OpenCV_MODULES_SUFFIX           : The suffix for OpenCVModules-XXX.cmake file
+ #      - OpenCV_HAVE_ANDROID_CAMERA      : Presence of Android native camera wrappers
  #
 +#    Deprecated variables:
 +#      - OpenCV_VERSION_TWEAK            : Always "0"
 +#
  # ===================================================================================
  
- set(modules_file_suffix "")
- if(ANDROID)
-   string(REPLACE - _ modules_file_suffix "_${ANDROID_NDK_ABI_NAME}")
+ if(NOT DEFINED OpenCV_MODULES_SUFFIX)
+   if(ANDROID)
+     string(REPLACE - _ OpenCV_MODULES_SUFFIX "_${ANDROID_NDK_ABI_NAME}")
+   else()
+     set(OpenCV_MODULES_SUFFIX "")
+   endif()
  endif()
  
  if(NOT TARGET opencv_core)
Simple merge
index de8df6c,0000000..789afa4
mode 100644,000000..100644
--- /dev/null
@@@ -1,438 -1,0 +1,439 @@@
 +/*M///////////////////////////////////////////////////////////////////////////////////////
 +//
 +//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 +//
 +//  By downloading, copying, installing or using the software you agree to this license.
 +//  If you do not agree to this license, do not download, install,
 +//  copy or use the software.
 +//
 +//
 +//                           License Agreement
 +//                For Open Source Computer Vision Library
 +//
 +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 +// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 +// Third party copyrights are property of their respective owners.
 +//
 +// Redistribution and use in source and binary forms, with or without modification,
 +// are permitted provided that the following conditions are met:
 +//
 +//   * Redistribution's of source code must retain the above copyright notice,
 +//     this list of conditions and the following disclaimer.
 +//
 +//   * Redistribution's in binary form must reproduce the above copyright notice,
 +//     this list of conditions and the following disclaimer in the documentation
 +//     and/or other materials provided with the distribution.
 +//
 +//   * The name of the copyright holders may not be used to endorse or promote products
 +//     derived from this software without specific prior written permission.
 +//
 +// This software is provided by the copyright holders and contributors "as is" and
 +// any express or implied warranties, including, but not limited to, the implied
 +// warranties of merchantability and fitness for a particular purpose are disclaimed.
 +// In no event shall the Intel Corporation or contributors be liable for any direct,
 +// indirect, incidental, special, exemplary, or consequential damages
 +// (including, but not limited to, procurement of substitute goods or services;
 +// loss of use, data, or profits; or business interruption) however caused
 +// and on any theory of liability, whether in contract, strict liability,
 +// or tort (including negligence or otherwise) arising in any way out of
 +// the use of this software, even if advised of the possibility of such damage.
 +//
 +//M*/
 +
 +#if !defined CUDA_DISABLER
 +
 +#include "opencv2/core/cuda/common.hpp"
 +#include "opencv2/core/cuda/vec_traits.hpp"
 +#include "opencv2/core/cuda/vec_math.hpp"
 +#include "opencv2/core/cuda/limits.hpp"
 +
 +namespace cv { namespace cuda { namespace device
 +{
 +    namespace mog2
 +    {
 +        ///////////////////////////////////////////////////////////////
 +        // Utility
 +
 +        __device__ __forceinline__ float cvt(uchar val)
 +        {
 +            return val;
 +        }
 +        __device__ __forceinline__ float3 cvt(const uchar3& val)
 +        {
 +            return make_float3(val.x, val.y, val.z);
 +        }
 +        __device__ __forceinline__ float4 cvt(const uchar4& val)
 +        {
 +            return make_float4(val.x, val.y, val.z, val.w);
 +        }
 +
 +        __device__ __forceinline__ float sqr(float val)
 +        {
 +            return val * val;
 +        }
 +        __device__ __forceinline__ float sqr(const float3& val)
 +        {
 +            return val.x * val.x + val.y * val.y + val.z * val.z;
 +        }
 +        __device__ __forceinline__ float sqr(const float4& val)
 +        {
 +            return val.x * val.x + val.y * val.y + val.z * val.z;
 +        }
 +
 +        __device__ __forceinline__ float sum(float val)
 +        {
 +            return val;
 +        }
 +        __device__ __forceinline__ float sum(const float3& val)
 +        {
 +            return val.x + val.y + val.z;
 +        }
 +        __device__ __forceinline__ float sum(const float4& val)
 +        {
 +            return val.x + val.y + val.z;
 +        }
 +
 +        template <class Ptr2D>
 +        __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
 +        {
 +            typename Ptr2D::elem_type val = ptr(k * rows + y, x);
 +            ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
 +            ptr((k + 1) * rows + y, x) = val;
 +        }
 +
 +        ///////////////////////////////////////////////////////////////
 +        // MOG2
 +
 +        __constant__ int           c_nmixtures;
 +        __constant__ float         c_Tb;
 +        __constant__ float         c_TB;
 +        __constant__ float         c_Tg;
 +        __constant__ float         c_varInit;
 +        __constant__ float         c_varMin;
 +        __constant__ float         c_varMax;
 +        __constant__ float         c_tau;
 +        __constant__ unsigned char c_shadowVal;
 +
 +        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
 +        {
 +            varMin = ::fminf(varMin, varMax);
 +            varMax = ::fmaxf(varMin, varMax);
 +
 +            cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
 +            cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
 +            cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
 +            cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
 +            cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
 +            cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
 +            cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
 +            cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
 +            cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
 +        }
 +
 +        template <bool detectShadows, typename SrcT, typename WorkT>
 +        __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
 +                             PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
 +                             const float alphaT, const float alpha1, const float prune)
 +        {
 +            const int x = blockIdx.x * blockDim.x + threadIdx.x;
 +            const int y = blockIdx.y * blockDim.y + threadIdx.y;
 +
 +            if (x >= frame.cols || y >= frame.rows)
 +                return;
 +
 +            WorkT pix = cvt(frame(y, x));
 +
 +            //calculate distances to the modes (+ sort)
 +            //here we need to go in descending order!!!
 +
 +            bool background = false; // true - the pixel classified as background
 +
 +            //internal:
 +
 +            bool fitsPDF = false; //if it remains zero a new GMM mode will be added
 +
 +            int nmodes = modesUsed(y, x);
 +            int nNewModes = nmodes; //current number of modes in GMM
 +
 +            float totalWeight = 0.0f;
 +
 +            //go through all modes
 +
 +            for (int mode = 0; mode < nmodes; ++mode)
 +            {
 +                //need only weight if fit is found
 +                float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
-                 gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value
++                int swap_count = 0;
 +                //fit not found yet
 +                if (!fitsPDF)
 +                {
 +                    //check if it belongs to some of the remaining modes
 +                    float var = gmm_variance(mode * frame.rows + y, x);
 +
 +                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
 +
 +                    //calculate difference and distance
 +                    WorkT diff = mean - pix;
 +                    float dist2 = sqr(diff);
 +
 +                    //background? - Tb - usually larger than Tg
 +                    if (totalWeight < c_TB && dist2 < c_Tb * var)
 +                        background = true;
 +
 +                    //check fit
 +                    if (dist2 < c_Tg * var)
 +                    {
 +                        //belongs to the mode
 +                        fitsPDF = true;
 +
 +                        //update distribution
 +
 +                        //update weight
 +                        weight += alphaT;
 +                        float k = alphaT / weight;
 +
 +                        //update mean
 +                        gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
 +
 +                        //update variance
 +                        float varnew = var + k * (dist2 - var);
 +
 +                        //limit the variance
 +                        varnew = ::fmaxf(varnew, c_varMin);
 +                        varnew = ::fminf(varnew, c_varMax);
 +
 +                        gmm_variance(mode * frame.rows + y, x) = varnew;
 +
 +                        //sort
 +                        //all other weights are at the same place and
 +                        //only the matched (iModes) is higher -> just find the new place for it
 +
 +                        for (int i = mode; i > 0; --i)
 +                        {
 +                            //check one up
 +                            if (weight < gmm_weight((i - 1) * frame.rows + y, x))
 +                                break;
 +
++                            swap_count++;
 +                            //swap one up
 +                            swap(gmm_weight, x, y, i - 1, frame.rows);
 +                            swap(gmm_variance, x, y, i - 1, frame.rows);
 +                            swap(gmm_mean, x, y, i - 1, frame.rows);
 +                        }
 +
 +                        //belongs to the mode - bFitsPDF becomes 1
 +                    }
 +                } // !fitsPDF
 +
 +                //check prune
 +                if (weight < -prune)
 +                {
 +                    weight = 0.0f;
 +                    nmodes--;
 +                }
 +
++                gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
 +                totalWeight += weight;
 +            }
 +
 +            //renormalize weights
 +
 +            totalWeight = 1.f / totalWeight;
 +            for (int mode = 0; mode < nmodes; ++mode)
 +                gmm_weight(mode * frame.rows + y, x) *= totalWeight;
 +
 +            nmodes = nNewModes;
 +
 +            //make new mode if needed and exit
 +
 +            if (!fitsPDF)
 +            {
 +                // replace the weakest or add a new one
 +                int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
 +
 +                if (nmodes == 1)
 +                    gmm_weight(mode * frame.rows + y, x) = 1.f;
 +                else
 +                {
 +                    gmm_weight(mode * frame.rows + y, x) = alphaT;
 +
 +                    // renormalize all other weights
 +
 +                    for (int i = 0; i < nmodes - 1; ++i)
 +                        gmm_weight(i * frame.rows + y, x) *= alpha1;
 +                }
 +
 +                // init
 +
 +                gmm_mean(mode * frame.rows + y, x) = pix;
 +                gmm_variance(mode * frame.rows + y, x) = c_varInit;
 +
 +                //sort
 +                //find the new place for it
 +
 +                for (int i = nmodes - 1; i > 0; --i)
 +                {
 +                    // check one up
 +                    if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
 +                        break;
 +
 +                    //swap one up
 +                    swap(gmm_weight, x, y, i - 1, frame.rows);
 +                    swap(gmm_variance, x, y, i - 1, frame.rows);
 +                    swap(gmm_mean, x, y, i - 1, frame.rows);
 +                }
 +            }
 +
 +            //set the number of modes
 +            modesUsed(y, x) = nmodes;
 +
 +            bool isShadow = false;
 +            if (detectShadows && !background)
 +            {
 +                float tWeight = 0.0f;
 +
 +                // check all the components  marked as background:
 +                for (int mode = 0; mode < nmodes; ++mode)
 +                {
 +                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
 +
 +                    WorkT pix_mean = pix * mean;
 +
 +                    float numerator = sum(pix_mean);
 +                    float denominator = sqr(mean);
 +
 +                    // no division by zero allowed
 +                    if (denominator == 0)
 +                        break;
 +
 +                    // if tau < a < 1 then also check the color distortion
 +                    if (numerator <= denominator && numerator >= c_tau * denominator)
 +                    {
 +                        float a = numerator / denominator;
 +
 +                        WorkT dD = a * mean - pix;
 +
 +                        if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
 +                        {
 +                            isShadow = true;
 +                            break;
 +                        }
 +                    };
 +
 +                    tWeight += gmm_weight(mode * frame.rows + y, x);
 +                    if (tWeight > c_TB)
 +                        break;
 +                }
 +            }
 +
 +            fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
 +        }
 +
 +        template <typename SrcT, typename WorkT>
 +        void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
 +                         float alphaT, float prune, bool detectShadows, cudaStream_t stream)
 +        {
 +            dim3 block(32, 8);
 +            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
 +
 +            const float alpha1 = 1.0f - alphaT;
 +
 +            if (detectShadows)
 +            {
 +                cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
 +
 +                mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
 +                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
 +                                                                    alphaT, alpha1, prune);
 +            }
 +            else
 +            {
 +                cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
 +
 +                mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
 +                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
 +                                                                    alphaT, alpha1, prune);
 +            }
 +
 +            cudaSafeCall( cudaGetLastError() );
 +
 +            if (stream == 0)
 +                cudaSafeCall( cudaDeviceSynchronize() );
 +        }
 +
 +        void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
 +                      float alphaT, float prune, bool detectShadows, cudaStream_t stream)
 +        {
 +            typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
 +
 +            static const func_t funcs[] =
 +            {
 +                0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
 +            };
 +
 +            funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
 +        }
 +
 +        template <typename WorkT, typename OutT>
 +        __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
 +        {
 +            const int x = blockIdx.x * blockDim.x + threadIdx.x;
 +            const int y = blockIdx.y * blockDim.y + threadIdx.y;
 +
 +            if (x >= modesUsed.cols || y >= modesUsed.rows)
 +                return;
 +
 +            int nmodes = modesUsed(y, x);
 +
 +            WorkT meanVal = VecTraits<WorkT>::all(0.0f);
 +            float totalWeight = 0.0f;
 +
 +            for (int mode = 0; mode < nmodes; ++mode)
 +            {
 +                float weight = gmm_weight(mode * modesUsed.rows + y, x);
 +
 +                WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
 +                meanVal = meanVal + weight * mean;
 +
 +                totalWeight += weight;
 +
 +                if(totalWeight > c_TB)
 +                    break;
 +            }
 +
 +            meanVal = meanVal * (1.f / totalWeight);
 +
 +            dst(y, x) = saturate_cast<OutT>(meanVal);
 +        }
 +
 +        template <typename WorkT, typename OutT>
 +        void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
 +        {
 +            dim3 block(32, 8);
 +            dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
 +
 +            cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
 +
 +            getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
 +            cudaSafeCall( cudaGetLastError() );
 +
 +            if (stream == 0)
 +                cudaSafeCall( cudaDeviceSynchronize() );
 +        }
 +
 +        void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
 +        {
 +            typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
 +
 +            static const func_t funcs[] =
 +            {
 +                0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
 +            };
 +
 +            funcs[cn](modesUsed, weight, mean, dst, stream);
 +        }
 +    }
 +}}}
 +
 +
 +#endif /* CUDA_DISABLER */
Simple merge
@@@ -9,12 -9,13 +9,13 @@@
  #ifndef __OPENCV_PERF_PRECOMP_HPP__
  #define __OPENCV_PERF_PRECOMP_HPP__
  
 -#include "opencv2/ts/ts.hpp"
+ #include "cvconfig.h"
 +#include "opencv2/ts.hpp"
 +#include "opencv2/nonfree.hpp"
 +#include "opencv2/highgui.hpp"
  
 -#include "opencv2/nonfree/nonfree.hpp"
 -#include "opencv2/highgui/highgui.hpp"
  #include "opencv2/opencv_modules.hpp"
- #include "cvconfig.h"
  
  #ifdef HAVE_OPENCV_OCL
  #  include "opencv2/nonfree/ocl.hpp"
Simple merge
@@@ -910,12 -627,6 +911,12 @@@ void BackgroundSubtractorMOG2Impl::getB
      }
  }
  
 +Ptr<BackgroundSubtractorMOG2> createBackgroundSubtractorMOG2(int _history, double _varThreshold,
 +                                                             bool _bShadowDetection)
 +{
 +    return makePtr<BackgroundSubtractorMOG2Impl>(_history, (float)_varThreshold, _bShadowDetection);
 +}
 +
  }
  
- /* End of file. */
+ /* End of file. */
index f895b5b,0000000..9bc18b2
mode 100644,000000..100644
--- /dev/null
@@@ -1,272 -1,0 +1,273 @@@
 +#if CN==1
 +
 +#define T_MEAN float
 +#define F_ZERO (0.0f)
 +#define cnMode 1
 +
 +#define frameToMean(a, b) (b) = *(a);
 +#define meanToFrame(a, b) *b = convert_uchar_sat(a);
 +
 +inline float sqr(float val)
 +{
 +    return val * val;
 +}
 +
 +inline float sum(float val)
 +{
 +    return val;
 +}
 +
 +#else
 +
 +#define T_MEAN float4
 +#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
 +#define cnMode 4
 +
 +#define meanToFrame(a, b)\
 +    b[0] = convert_uchar_sat(a.x); \
 +    b[1] = convert_uchar_sat(a.y); \
 +    b[2] = convert_uchar_sat(a.z);
 +
 +#define frameToMean(a, b)\
 +    b.x = a[0]; \
 +    b.y = a[1]; \
 +    b.z = a[2]; \
 +    b.w = 0.0f;
 +
 +inline float sqr(const float4 val)
 +{
 +    return val.x * val.x + val.y * val.y + val.z * val.z;
 +}
 +
 +inline float sum(const float4 val)
 +{
 +    return (val.x + val.y + val.z);
 +}
 +
 +inline void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
 +{
 +    float4 val = ptr[(k * rows + y) * ptr_step + x];
 +    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
 +    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
 +}
 +
 +#endif
 +
 +inline void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
 +{
 +    float val = ptr[(k * rows + y) * ptr_step + x];
 +    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
 +    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
 +}
 +
 +__kernel void mog2_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col, //uchar || uchar3
 +                          __global uchar* modesUsed, int modesUsed_step, int modesUsed_offset,                         //int
 +                          __global uchar* weight, int weight_step, int weight_offset,                                  //float
 +                          __global uchar* mean, int mean_step, int mean_offset,                                        //T_MEAN=float || float4
 +                          __global uchar* variance, int var_step, int var_offset,                                      //float
 +                          __global uchar* fgmask, int fgmask_step, int fgmask_offset,                                  //int
 +                          float alphaT, float alpha1, float prune,
 +                          int detectShadows_flag,
 +                          float c_Tb, float c_TB, float c_Tg, float c_varMin,                     //constants
 +                          float c_varMax, float c_varInit, float c_tau, uchar c_shadowVal)
 +{
 +    int x = get_global_id(0);
 +    int y = get_global_id(1);
 +
 +    weight_step/= sizeof(float);
 +    var_step   /= sizeof(float);
 +    mean_step  /= (sizeof(float)*cnMode);
 +
 +    if( x < frame_col && y < frame_row)
 +    {
 +        __global const uchar* _frame = (frame + mad24( y, frame_step, x*CN + frame_offset));
 +        T_MEAN pix;
 +        frameToMean(_frame, pix);
 +
 +        bool background = false; // true - the pixel classified as background
 +
 +        bool fitsPDF = false; //if it remains zero a new GMM mode will be added
 +
 +        __global int* _modesUsed = (__global int*)(modesUsed + mad24( y, modesUsed_step, x*(int)(sizeof(int))));
 +        int nmodes = _modesUsed[0];
 +        int nNewModes = nmodes; //current number of modes in GMM
 +
 +        float totalWeight = 0.0f;
 +
 +        __global float* _weight = (__global float*)(weight);
 +        __global float* _variance = (__global float*)(variance);
 +        __global T_MEAN* _mean = (__global T_MEAN*)(mean);
 +
 +        for (int mode = 0; mode < nmodes; ++mode)
 +        {
 +
 +            float c_weight = alpha1 * _weight[(mode * frame_row + y) * weight_step + x] + prune;
-             _weight[(mode * frame_row + y) * weight_step + x] = c_weight; //update weight by the calculated value
++            int swap_count = 0;
 +            if (!fitsPDF)
 +            {
 +                float c_var = _variance[(mode * frame_row + y) * var_step + x];
 +
 +                T_MEAN c_mean = _mean[(mode * frame_row + y) * mean_step + x];
 +
 +                T_MEAN diff = c_mean - pix;
 +                float dist2 = sqr(diff);
 +
 +                if (totalWeight < c_TB && dist2 < c_Tb * c_var)
 +                    background = true;
 +
 +                if (dist2 < c_Tg * c_var)
 +                {
 +                    fitsPDF = true;
 +                    c_weight += alphaT;
 +                    float k = alphaT / c_weight;
 +
 +                    _mean[(mode * frame_row + y) * mean_step + x] = c_mean - k * diff;
 +
 +                    float varnew = c_var + k * (dist2 - c_var);
 +                    varnew = fmax(varnew, c_varMin);
 +                    varnew = fmin(varnew, c_varMax);
 +
 +                    _variance[(mode * frame_row + y) * var_step + x] = varnew;
 +                    for (int i = mode; i > 0; --i)
 +                    {
 +                        if (c_weight < _weight[((i - 1) * frame_row + y) * weight_step + x])
 +                            break;
++                        swap_count++;
 +                        swap(_weight, x, y, i - 1, frame_row, weight_step);
 +                        swap(_variance, x, y, i - 1, frame_row, var_step);
 +                        #if (CN==1)
 +                        swap(_mean, x, y, i - 1, frame_row, mean_step);
 +                        #else
 +                        swap4(_mean, x, y, i - 1, frame_row, mean_step);
 +                        #endif
 +                    }
 +                }
 +            } // !fitsPDF
 +
 +            if (c_weight < -prune)
 +            {
 +                c_weight = 0.0f;
 +                nmodes--;
 +            }
 +
++            _weight[((mode - swap_count) * frame_row + y) * weight_step + x] = c_weight; //update weight by the calculated value
 +            totalWeight += c_weight;
 +        }
 +
 +        totalWeight = 1.f / totalWeight;
 +        for (int mode = 0; mode < nmodes; ++mode)
 +            _weight[(mode * frame_row + y) * weight_step + x] *= totalWeight;
 +
 +        nmodes = nNewModes;
 +
 +        if (!fitsPDF)
 +        {
 +            int mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;
 +
 +            if (nmodes == 1)
 +                _weight[(mode * frame_row + y) * weight_step + x] = 1.f;
 +            else
 +            {
 +                _weight[(mode * frame_row + y) * weight_step + x] = alphaT;
 +
 +                for (int i = 0; i < nmodes - 1; ++i)
 +                    _weight[(i * frame_row + y) * weight_step + x] *= alpha1;
 +            }
 +
 +            _mean[(mode * frame_row + y) * mean_step + x] = pix;
 +            _variance[(mode * frame_row + y) * var_step + x] = c_varInit;
 +
 +            for (int i = nmodes - 1; i > 0; --i)
 +            {
 +                if (alphaT < _weight[((i - 1) * frame_row + y) * weight_step + x])
 +                    break;
 +
 +                swap(_weight, x, y, i - 1, frame_row, weight_step);
 +                swap(_variance, x, y, i - 1, frame_row, var_step);
 +                #if (CN==1)
 +                swap(_mean, x, y, i - 1, frame_row, mean_step);
 +                #else
 +                swap4(_mean, x, y, i - 1, frame_row, mean_step);
 +                #endif
 +            }
 +        }
 +
 +        _modesUsed[0] = nmodes;
 +        bool isShadow = false;
 +        if (detectShadows_flag && !background)
 +        {
 +            float tWeight = 0.0f;
 +
 +            for (int mode = 0; mode < nmodes; ++mode)
 +            {
 +                T_MEAN c_mean = _mean[(mode * frame_row + y) * mean_step + x];
 +
 +                T_MEAN pix_mean = pix * c_mean;
 +
 +                float numerator = sum(pix_mean);
 +                float denominator = sqr(c_mean);
 +
 +                if (denominator == 0)
 +                    break;
 +
 +                if (numerator <= denominator && numerator >= c_tau * denominator)
 +                {
 +                    float a = numerator / denominator;
 +
 +                    T_MEAN dD = a * c_mean - pix;
 +
 +                    if (sqr(dD) < c_Tb * _variance[(mode * frame_row + y) * var_step + x] * a * a)
 +                    {
 +                        isShadow = true;
 +                        break;
 +                    }
 +                }
 +
 +                tWeight += _weight[(mode * frame_row + y) * weight_step + x];
 +                if (tWeight > c_TB)
 +                    break;
 +            }
 +        }
 +        __global int* _fgmask = (__global int*)(fgmask + mad24(y, fgmask_step, x*(int)(sizeof(int)) + fgmask_offset));
 +        *_fgmask = background ? 0 : isShadow ? c_shadowVal : 255;
 +    }
 +}
 +
 +__kernel void getBackgroundImage2_kernel(__global const uchar* modesUsed, int modesUsed_step, int modesUsed_offset, int modesUsed_row, int modesUsed_col,
 +                                         __global const uchar* weight, int weight_step, int weight_offset,
 +                                         __global const uchar* mean, int mean_step, int mean_offset,
 +                                         __global uchar* dst, int dst_step, int dst_offset,
 +                                         float c_TB)
 +{
 +    int x = get_global_id(0);
 +    int y = get_global_id(1);
 +
 +    if(x < modesUsed_col && y < modesUsed_row)
 +    {
 +        __global int* _modesUsed = (__global int*)(modesUsed + mad24( y, modesUsed_step, x*(int)(sizeof(int))));
 +        int nmodes = _modesUsed[0];
 +
 +        T_MEAN meanVal = (T_MEAN)F_ZERO;
 +
 +        float totalWeight = 0.0f;
 +
 +        for (int mode = 0; mode < nmodes; ++mode)
 +        {
 +            __global const float* _weight = (__global const float*)(weight + mad24(mode * modesUsed_row + y, weight_step, x*(int)(sizeof(float))));
 +            float c_weight = _weight[0];
 +
 +            __global const T_MEAN* _mean = (__global const T_MEAN*)(mean + mad24(mode * modesUsed_row + y, mean_step, x*(int)(sizeof(float))*cnMode));
 +            T_MEAN c_mean = _mean[0];
 +            meanVal = meanVal + c_weight * c_mean;
 +
 +            totalWeight += c_weight;
 +
 +            if(totalWeight > c_TB)
 +                break;
 +        }
 +
 +        meanVal = meanVal * (1.f / totalWeight);
 +        __global uchar* _dst = dst + y * dst_step + x*CN + dst_offset;
 +        meanToFrame(meanVal, _dst);
 +    }
 +}
Simple merge
@@@ -47,10 -39,10 +47,10 @@@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_
      set(the_target "example_${project}_${name}")
      add_executable(${the_target} ${srcs})
  
 -    target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})
 +    target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
  
-     if(HAVE_CUDA)
-         target_link_libraries(${the_target} ${CUDA_CUDA_LIBRARY})
+     if(HAVE_CUDA AND NOT ANDROID)
+       target_link_libraries(${the_target} ${CUDA_CUDA_LIBRARY})
      endif()
  
      if(HAVE_opencv_nonfree)
@@@ -1,14 -1,12 +1,14 @@@
  #include <iostream>
  #include <iomanip>
  #include <string>
- #include <cctype>
+ #include <ctype.h>
  
 -#include "cvconfig.h"
 -#include "opencv2/core/core.hpp"
 -#include "opencv2/highgui/highgui.hpp"
 -#include "opencv2/gpu/gpu.hpp"
 +#include "opencv2/core.hpp"
 +#include "opencv2/core/utility.hpp"
 +#include "opencv2/highgui.hpp"
 +#include "opencv2/imgproc.hpp"
 +#include "opencv2/cudaoptflow.hpp"
 +#include "opencv2/cudaarithm.hpp"
  
  using namespace std;
  using namespace cv;
@@@ -1,12 -1,13 +1,14 @@@
  #include <iostream>
  #include <iomanip>
  #include <string>
 -#include "opencv2/core/core.hpp"
 -#include "opencv2/highgui/highgui.hpp"
 -#include "opencv2/imgproc/imgproc.hpp"
 -#include "opencv2/contrib/contrib.hpp"
 -#include "opencv2/superres/superres.hpp"
+ #include <ctype.h>
 +#include "opencv2/core.hpp"
 +#include "opencv2/core/utility.hpp"
 +#include "opencv2/highgui.hpp"
 +#include "opencv2/imgproc.hpp"
 +#include "opencv2/contrib.hpp"
 +#include "opencv2/superres.hpp"
  #include "opencv2/superres/optical_flow.hpp"
  #include "opencv2/opencv_modules.hpp"