From: Marina Kolpakova Date: Thu, 28 Jun 2012 15:41:27 +0000 (+0000) Subject: merged the trunk r8669:8702 X-Git-Tag: accepted/2.0/20130307.220821~465 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6cca6a45482ae0650d13ee1550724b32c9a76b7c;p=profile%2Fivi%2Fopencv.git merged the trunk r8669:8702 --- diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt index 6ccd5b0..63d6455 100644 --- a/3rdparty/tbb/CMakeLists.txt +++ b/3rdparty/tbb/CMakeLists.txt @@ -114,6 +114,7 @@ add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 #required if(tbb_need_GENERIC_DWORD_LOAD_STORE) #needed by TBB 4.0 update 1,2; fixed in TBB 4.0 update 3 but it has 2 new problems add_definitions(-D__TBB_USE_GENERIC_DWORD_LOAD_STORE=1) + set(tbb_need_GENERIC_DWORD_LOAD_STORE ON PARENT_SCOPE) endif() add_library(tbb STATIC ${lib_srcs} ${lib_hdrs} "${CMAKE_CURRENT_SOURCE_DIR}/android_additional.h" "${CMAKE_CURRENT_SOURCE_DIR}/${tbb_version_file}") diff --git a/cmake/OpenCVConfig.cmake b/cmake/OpenCVConfig.cmake new file mode 100644 index 0000000..cb055cf --- /dev/null +++ b/cmake/OpenCVConfig.cmake @@ -0,0 +1,155 @@ +# =================================================================================== +# The OpenCV CMake configuration file +# +# ** File generated automatically, do not modify ** +# +# Usage from an external project: +# In your CMakeLists.txt, add these lines: +# +# FIND_PACKAGE(OpenCV REQUIRED) +# TARGET_LINK_LIBRARIES(MY_TARGET_NAME ${OpenCV_LIBS}) +# +# Or you can search for specific OpenCV modules: +# +# FIND_PACKAGE(OpenCV REQUIRED core highgui) +# +# If the module is found then OPENCV__FOUND is set to TRUE. +# +# This file will define the following variables: +# - OpenCV_LIBS : The list of libraries to links against. +# - OpenCV_LIB_DIR : The directory(es) where lib files are. Calling LINK_DIRECTORIES +# with this path is NOT needed. +# - OpenCV_INCLUDE_DIRS : The OpenCV include directories. +# - OpenCV_COMPUTE_CAPABILITIES : The version of compute capability +# - OpenCV_ANDROID_NATIVE_API_LEVEL : Minimum required level of Android API +# - OpenCV_VERSION : The version of this OpenCV build. Example: "2.4.0" +# - OpenCV_VERSION_MAJOR : Major version part of OpenCV_VERSION. Example: "2" +# - OpenCV_VERSION_MINOR : Minor version part of OpenCV_VERSION. Example: "4" +# - OpenCV_VERSION_PATCH : Patch version part of OpenCV_VERSION. Example: "0" +# +# Advanced variables: +# - OpenCV_SHARED +# - OpenCV_CONFIG_PATH +# - OpenCV_INSTALL_PATH (not set on Windows) +# - OpenCV_LIB_COMPONENTS +# - OpenCV_USE_MANGLED_PATHS +# - OpenCV_HAVE_ANDROID_CAMERA +# +# =================================================================================== +# +# Windows pack specific options: +# - OpenCV_STATIC +# - OpenCV_CUDA + +if(NOT DEFINED OpenCV_STATIC) + # look for global setting + if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) + set(OpenCV_STATIC OFF) + else() + set(OpenCV_STATIC ON) + endif() +endif() + +if(NOT DEFINED OpenCV_CUDA) + # if user' app uses CUDA, then it probably wants CUDA-enabled OpenCV binaries + if(CUDA_FOUND) + set(OpenCV_CUDA ON) + endif() +endif() + +if(MSVC) + if(CMAKE_CL_64) + set(OpenCV_ARCH x64) + set(OpenCV_TBB_ARCH intel64) + else() + set(OpenCV_ARCH x86) + set(OpenCV_TBB_ARCH ia32) + endif() + if(MSVC_VERSION EQUAL 1400) + set(OpenCV_RUNTIME vc8) + elseif(MSVC_VERSION EQUAL 1500) + set(OpenCV_RUNTIME vc9) + elseif(MSVC_VERSION EQUAL 1600) + set(OpenCV_RUNTIME vc10) + elseif(MSVC_VERSION EQUAL 1700) + set(OpenCV_RUNTIME vc11) + endif() +elseif(MINGW) + set(OpenCV_RUNTIME mingw) + + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine + OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64") + set(MINGW64 1) + set(OpenCV_ARCH x64) + else() + set(OpenCV_ARCH x86) + endif() +endif() + +if(CMAKE_VERSION VERSION_GREATER 2.6.2) + unset(OpenCV_CONFIG_PATH CACHE) +endif() + +get_filename_component(OpenCV_CONFIG_PATH "${CMAKE_CURRENT_LIST_FILE}" PATH CACHE) +if(OpenCV_RUNTIME AND OpenCV_ARCH) + if(OpenCV_STATIC AND EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake") + if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib/OpenCVConfig.cmake") + set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib") + else() + set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/staticlib") + endif() + elseif(EXISTS "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake") + if(OpenCV_CUDA AND EXISTS "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib/OpenCVConfig.cmake") + set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib") + else() + set(OpenCV_LIB_PATH "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}/lib") + endif() + endif() +endif() + +if(OpenCV_LIB_PATH AND EXISTS "${OpenCV_LIB_PATH}/OpenCVConfig.cmake") + set(OpenCV_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release OpenCV libraries are located" FORCE) + set(OpenCV_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug OpenCV libraries are located" FORCE) + set(OpenCV_3RDPARTY_LIB_DIR_OPT "${OpenCV_LIB_PATH}" CACHE PATH "Path where release 3rdpaty OpenCV dependencies are located" FORCE) + set(OpenCV_3RDPARTY_LIB_DIR_DBG "${OpenCV_LIB_PATH}" CACHE PATH "Path where debug 3rdpaty OpenCV dependencies are located" FORCE) + + include("${OpenCV_LIB_PATH}/OpenCVConfig.cmake") + + if(OpenCV_CUDA) + set(_OpenCV_LIBS "") + foreach(_lib ${OpenCV_LIBS}) + string(REPLACE "${OpenCV_CONFIG_PATH}/gpu/${OpenCV_ARCH}/${OpenCV_RUNTIME}" "${OpenCV_CONFIG_PATH}/${OpenCV_ARCH}/${OpenCV_RUNTIME}" _lib2 "${_lib}") + if(NOT EXISTS "${_lib}" AND EXISTS "${_lib2}") + list(APPEND _OpenCV_LIBS "${_lib2}") + else() + list(APPEND _OpenCV_LIBS "${_lib}") + endif() + endforeach() + set(OpenCV_LIBS ${_OpenCV_LIBS}) + endif() + set(OpenCV_FOUND TRUE CACHE BOOL "" FORCE) + set(OPENCV_FOUND TRUE CACHE BOOL "" FORCE) + + if(NOT OpenCV_FIND_QUIETLY) + message(STATUS "Found OpenCV ${OpenCV_VERSION} in ${OpenCV_LIB_PATH}") + if(NOT OpenCV_LIB_PATH MATCHES "/staticlib") + get_filename_component(_OpenCV_LIB_PATH "${OpenCV_LIB_PATH}/../bin" ABSOLUTE) + file(TO_NATIVE_PATH "${_OpenCV_LIB_PATH}" _OpenCV_LIB_PATH) + message(STATUS "You might need to add ${_OpenCV_LIB_PATH} to your PATH to be able to run your applications.") + if(OpenCV_LIB_PATH MATCHES "/gpu/") + string(REPLACE "\\gpu" "" _OpenCV_LIB_PATH2 "${_OpenCV_LIB_PATH}") + message(STATUS "GPU support is enabled so you might also need ${_OpenCV_LIB_PATH2} in your PATH (it must go after the ${_OpenCV_LIB_PATH}).") + endif() + endif() + endif() +else() + if(NOT OpenCV_FIND_QUIETLY) + message(WARNING "Found OpenCV 2.4.0 Windows Super Pack but it has not binaries compatible with your configuration. + You should manually point CMake variable OpenCV_DIR to your build of OpenCV library.") + endif() + set(OpenCV_FOUND FALSE CACHE BOOL "" FORCE) + set(OPENCV_FOUND FALSE CACHE BOOL "" FORCE) +endif() + diff --git a/cmake/OpenCVDetectTBB.cmake b/cmake/OpenCVDetectTBB.cmake index b15f9f7..c37a8c3 100644 --- a/cmake/OpenCVDetectTBB.cmake +++ b/cmake/OpenCVDetectTBB.cmake @@ -2,7 +2,10 @@ if(ANDROID) add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/tbb") include_directories(SYSTEM ${TBB_INCLUDE_DIRS}) set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} tbb) - add_definitions(-DTBB_USE_GCC_BUILTINS=1 -D__TBB_GCC_BUILTIN_ATOMICS_PRESENT=1 -D__TBB_USE_GENERIC_DWORD_LOAD_STORE=1) + add_definitions(-DTBB_USE_GCC_BUILTINS=1 -D__TBB_GCC_BUILTIN_ATOMICS_PRESENT=1) + if(tbb_need_GENERIC_DWORD_LOAD_STORE) + add_definitions(-D__TBB_USE_GENERIC_DWORD_LOAD_STORE=1) + endif() set(HAVE_TBB 1) elseif(UNIX AND NOT APPLE) PKG_CHECK_MODULES(TBB tbb) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 317ea4c..573d7eb 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -72,17 +72,13 @@ macro(ocv_add_dependencies full_modname) list(APPEND ${__depsvar} "${d}") endif() endforeach() + unset(__depsvar) + + ocv_list_unique(OPENCV_MODULE_${full_modname}_REQ_DEPS) + ocv_list_unique(OPENCV_MODULE_${full_modname}_OPT_DEPS) - if(OPENCV_MODULE_${full_modname}_REQ_DEPS) - list(REMOVE_DUPLICATES OPENCV_MODULE_${full_modname}_REQ_DEPS) - endif() - if(OPENCV_MODULE_${full_modname}_OPT_DEPS) - list(REMOVE_DUPLICATES OPENCV_MODULE_${full_modname}_OPT_DEPS) - endif() set(OPENCV_MODULE_${full_modname}_REQ_DEPS ${OPENCV_MODULE_${full_modname}_REQ_DEPS} CACHE INTERNAL "Required dependencies of ${full_modname} module") set(OPENCV_MODULE_${full_modname}_OPT_DEPS ${OPENCV_MODULE_${full_modname}_OPT_DEPS} CACHE INTERNAL "Optional dependencies of ${full_modname} module") - - unset(__depsvar) endmacro() # declare new OpenCV module in current folder @@ -105,19 +101,22 @@ macro(ocv_add_module _name) ") endif() - #remember module details if(NOT DEFINED the_description) set(the_description "The ${name} OpenCV module") endif() - set(OPENCV_MODULE_${the_module}_DESCRIPTION "${the_description}" CACHE INTERNAL "Brief description of ${the_module} module") - set(OPENCV_MODULE_${the_module}_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "Location of ${the_module} module sources") - #create option to enable/disable this module if(NOT DEFINED BUILD_${the_module}_INIT) set(BUILD_${the_module}_INIT ON) endif() + + # create option to enable/disable this module option(BUILD_${the_module} "Include ${the_module} module into the OpenCV build" ${BUILD_${the_module}_INIT}) + # remember the module details + set(OPENCV_MODULE_${the_module}_DESCRIPTION "${the_description}" CACHE INTERNAL "Brief description of ${the_module} module") + set(OPENCV_MODULE_${the_module}_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "Location of ${the_module} module sources") + + # parse list of dependencies if("${ARGV1}" STREQUAL "INTERNAL" OR "${ARGV1}" STREQUAL "BINDINGS") set(OPENCV_MODULE_${the_module}_CLASS "${ARGV1}" CACHE INTERNAL "The cathegory of the module") set(__ocv_argn__ ${ARGN}) @@ -143,28 +142,19 @@ macro(ocv_add_module _name) set(OPENCV_MODULES_DISABLED_USER ${OPENCV_MODULES_DISABLED_USER} "${the_module}" CACHE INTERNAL "List of OpenCV modules explicitly disabled by user") endif() - #TODO: add submodules if any + # TODO: add submodules if any - #stop processing of current file + # stop processing of current file return() else(OPENCV_INITIAL_PASS) if(NOT BUILD_${the_module}) - #extra protection from redefinition - return() + return() # extra protection from redefinition endif() project(${the_module}) endif(OPENCV_INITIAL_PASS) endmacro() -# Internal macro; disables OpenCV module -# ocv_module_turn_off() -macro(__ocv_module_turn_off the_module) - list(APPEND OPENCV_MODULES_DISABLED_AUTO "${the_module}") - list(REMOVE_ITEM OPENCV_MODULES_BUILD "${the_module}") - list(REMOVE_ITEM OPENCV_MODULES_PUBLIC "${the_module}") - set(HAVE_${the_module} OFF CACHE INTERNAL "Module ${the_module} can not be built in current configuration") -endmacro() - +# excludes module from current configuration macro(ocv_module_disable module) set(__modname ${module}) if(NOT __modname MATCHES "^opencv_") @@ -175,41 +165,46 @@ macro(ocv_module_disable module) set(OPENCV_MODULE_${__modname}_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}" CACHE INTERNAL "Location of ${__modname} module sources") set(OPENCV_MODULES_DISABLED_FORCE "${OPENCV_MODULES_DISABLED_FORCE}" CACHE INTERNAL "List of OpenCV modules which can not be build in current configuration") unset(__modname) - return()#leave the current folder + return() # leave the current folder endmacro() +# Internal macro; partly disables OpenCV module +macro(__ocv_module_turn_off the_module) + list(APPEND OPENCV_MODULES_DISABLED_AUTO "${the_module}") + list(REMOVE_ITEM OPENCV_MODULES_BUILD "${the_module}") + list(REMOVE_ITEM OPENCV_MODULES_PUBLIC "${the_module}") + set(HAVE_${the_module} OFF CACHE INTERNAL "Module ${the_module} can not be built in current configuration") +endmacro() + +# Internal macro for dependencies tracking macro(__ocv_flatten_module_required_dependencies the_module) set(__flattened_deps "") set(__resolved_deps "") set(__req_depends ${OPENCV_MODULE_${the_module}_REQ_DEPS}) while(__req_depends) - list(GET __req_depends 0 __dep) - list(REMOVE_AT __req_depends 0) + ocv_list_pop_front(__req_depends __dep) if(__dep STREQUAL the_module) - #TODO: think how to deal with cyclic dependency - __ocv_module_turn_off(${the_module}) + __ocv_module_turn_off(${the_module}) # TODO: think how to deal with cyclic dependency break() - elseif("${OPENCV_MODULES_DISABLED_USER};${OPENCV_MODULES_DISABLED_AUTO}" MATCHES "(^|;)${__dep}(;|$)") - #depends on disabled module - __ocv_module_turn_off(${the_module}) + elseif(";${OPENCV_MODULES_DISABLED_USER};${OPENCV_MODULES_DISABLED_AUTO};" MATCHES ";${__dep};") + __ocv_module_turn_off(${the_module}) # depends on disabled module break() - elseif("${OPENCV_MODULES_BUILD}" MATCHES "(^|;)${__dep}(;|$)") - if(__resolved_deps MATCHES "(^|;)${__dep}(;|$)") - #all dependencies of this module are already resolved - list(APPEND __flattened_deps "${__dep}") + elseif(";${OPENCV_MODULES_BUILD};" MATCHES ";${__dep};") + if(";${__resolved_deps};" MATCHES ";${__dep};") + list(APPEND __flattened_deps "${__dep}") # all dependencies of this module are already resolved else() - #put all required subdependencies before this dependency and mark it as resolved + # put all required subdependencies before this dependency and mark it as resolved list(APPEND __resolved_deps "${__dep}") list(INSERT __req_depends 0 ${OPENCV_MODULE_${__dep}_REQ_DEPS} ${__dep}) endif() elseif(__dep MATCHES "^opencv_") - #depends on missing module - __ocv_module_turn_off(${the_module}) + __ocv_module_turn_off(${the_module}) # depends on missing module + message(WARNING "Unknown \"${__dep}\" module is listened in the dependencies of \"${the_module}\" module") break() else() - #skip non-modules + # skip non-modules endif() endwhile() @@ -220,37 +215,33 @@ macro(__ocv_flatten_module_required_dependencies the_module) set(OPENCV_MODULE_${the_module}_DEPS "") endif() - unset(__resolved_deps) - unset(__flattened_deps) - unset(__req_depends) - unset(__dep) + ocv_clear_vars(__resolved_deps __flattened_deps __req_depends __dep) endmacro() +# Internal macro for dependencies tracking macro(__ocv_flatten_module_optional_dependencies the_module) - set(__flattened_deps ${OPENCV_MODULE_${the_module}_DEPS}) - set(__resolved_deps ${OPENCV_MODULE_${the_module}_DEPS}) - set(__opt_depends ${OPENCV_MODULE_${the_module}_OPT_DEPS}) + set(__flattened_deps "") + set(__resolved_deps "") + set(__opt_depends ${OPENCV_MODULE_${the_module}_REQ_DEPS} ${OPENCV_MODULE_${the_module}_OPT_DEPS}) while(__opt_depends) - list(GET __opt_depends 0 __dep) - list(REMOVE_AT __opt_depends 0) + ocv_list_pop_front(__opt_depends __dep) if(__dep STREQUAL the_module) - #TODO: think how to deal with cyclic dependency - __ocv_module_turn_off(${the_module}) + __ocv_module_turn_off(${the_module}) # TODO: think how to deal with cyclic dependency break() - elseif("${OPENCV_MODULES_BUILD}" MATCHES "(^|;)${__dep}(;|$)") - if(__resolved_deps MATCHES "(^|;)${__dep}(;|$)") - #all dependencies of this module are already resolved - list(APPEND __flattened_deps "${__dep}") + elseif(";${OPENCV_MODULES_BUILD};" MATCHES ";${__dep};") + if(";${__resolved_deps};" MATCHES ";${__dep};") + list(APPEND __flattened_deps "${__dep}") # all dependencies of this module are already resolved else() - #put all subdependencies before this dependency and mark it as resolved + # put all subdependencies before this dependency and mark it as resolved list(APPEND __resolved_deps "${__dep}") list(INSERT __opt_depends 0 ${OPENCV_MODULE_${__dep}_REQ_DEPS} ${OPENCV_MODULE_${__dep}_OPT_DEPS} ${__dep}) endif() else() - #skip non-modules or missing modules + # skip non-modules or missing modules endif() endwhile() + if(__flattened_deps) list(REMOVE_DUPLICATES __flattened_deps) set(OPENCV_MODULE_${the_module}_DEPS ${__flattened_deps}) @@ -258,10 +249,7 @@ macro(__ocv_flatten_module_optional_dependencies the_module) set(OPENCV_MODULE_${the_module}_DEPS "") endif() - unset(__resolved_deps) - unset(__flattened_deps) - unset(__opt_depends) - unset(__dep) + ocv_clear_vars(__resolved_deps __flattened_deps __opt_depends __dep) endmacro() macro(__ocv_flatten_module_dependencies) @@ -269,16 +257,16 @@ macro(__ocv_flatten_module_dependencies) set(HAVE_${m} OFF CACHE INTERNAL "Module ${m} will not be built in current configuration") endforeach() foreach(m ${OPENCV_MODULES_BUILD}) - set(HAVE_${m} ON CACHE INTERNAL "Module ${m} will not be built in current configuration") + set(HAVE_${m} ON CACHE INTERNAL "Module ${m} will be built in current configuration") __ocv_flatten_module_required_dependencies(${m}) endforeach() foreach(m ${OPENCV_MODULES_BUILD}) __ocv_flatten_module_optional_dependencies(${m}) - #dependencies from other modules + # save dependencies from other modules set(OPENCV_MODULE_${m}_DEPS ${OPENCV_MODULE_${m}_DEPS} CACHE INTERNAL "Flattened dependencies of ${m} module") - #extra dependencies + # save extra dependencies set(OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_REQ_DEPS} ${OPENCV_MODULE_${m}_OPT_DEPS}) if(OPENCV_MODULE_${m}_DEPS_EXT AND OPENCV_MODULE_${m}_DEPS) list(REMOVE_ITEM OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_DEPS}) @@ -287,8 +275,15 @@ macro(__ocv_flatten_module_dependencies) set(OPENCV_MODULE_${m}_DEPS_EXT ${OPENCV_MODULE_${m}_DEPS_EXT} CACHE INTERNAL "Extra dependencies of ${m} module") endforeach() + # order modules by dependencies + set(OPENCV_MODULES_BUILD_ "") + foreach(m ${OPENCV_MODULES_BUILD}) + list(APPEND OPENCV_MODULES_BUILD_ ${OPENCV_MODULE_${m}_DEPS} ${m}) + endforeach() + ocv_list_unique(OPENCV_MODULES_BUILD_) + set(OPENCV_MODULES_PUBLIC ${OPENCV_MODULES_PUBLIC} CACHE INTERNAL "List of OpenCV modules marked for export") - set(OPENCV_MODULES_BUILD ${OPENCV_MODULES_BUILD} CACHE INTERNAL "List of OpenCV modules included into the build") + set(OPENCV_MODULES_BUILD ${OPENCV_MODULES_BUILD_} CACHE INTERNAL "List of OpenCV modules included into the build") set(OPENCV_MODULES_DISABLED_AUTO ${OPENCV_MODULES_DISABLED_AUTO} CACHE INTERNAL "List of OpenCV modules implicitly disabled due to dependencies") endmacro() @@ -300,10 +295,11 @@ macro(ocv_glob_modules) endif() set(__directories_observed "") - #collect modules + # collect modules set(OPENCV_INITIAL_PASS ON) foreach(__path ${ARGN}) ocv_get_real_path(__path "${__path}") + list(FIND __directories_observed "${__path}" __pathIdx) if(__pathIdx GREATER -1) message(FATAL_ERROR "The directory ${__path} is observed for OpenCV modules second time.") @@ -316,37 +312,36 @@ macro(ocv_glob_modules) foreach(mod ${__ocvmodules}) ocv_get_real_path(__modpath "${__path}/${mod}") if(EXISTS "${__modpath}/CMakeLists.txt") + list(FIND __directories_observed "${__modpath}" __pathIdx) if(__pathIdx GREATER -1) message(FATAL_ERROR "The module from ${__modpath} is already loaded.") endif() list(APPEND __directories_observed "${__modpath}") - add_subdirectory("${__modpath}" "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}") + if(OCV_MODULE_RELOCATE_ON_INITIAL_PASS) + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}") + file(COPY "${__modpath}/CMakeLists.txt" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}") + add_subdirectory("${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}" "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}") + if("${OPENCV_MODULE_opencv_${mod}_LOCATION}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}") + set(OPENCV_MODULE_opencv_${mod}_LOCATION "${__modpath}" CACHE PATH "" FORCE) + endif() + else() + add_subdirectory("${__modpath}" "${CMAKE_CURRENT_BINARY_DIR}/${mod}/.${mod}") + endif() endif() endforeach() endif() endforeach() - unset(__ocvmodules) - unset(__directories_observed) - unset(__path) - unset(__modpath) - unset(__pathIdx) + ocv_clear_vars(__ocvmodules __directories_observed __path __modpath __pathIdx) - #resolve dependencies + # resolve dependencies __ocv_flatten_module_dependencies() - #order modules by dependencies - set(OPENCV_MODULES_BUILD_ "") - foreach(m ${OPENCV_MODULES_BUILD}) - list(APPEND OPENCV_MODULES_BUILD_ ${OPENCV_MODULE_${m}_DEPS} ${m}) - endforeach() - ocv_list_unique(OPENCV_MODULES_BUILD_) - - #create modules + # create modules set(OPENCV_INITIAL_PASS OFF PARENT_SCOPE) set(OPENCV_INITIAL_PASS OFF) - foreach(m ${OPENCV_MODULES_BUILD_}) + foreach(m ${OPENCV_MODULES_BUILD}) if(m MATCHES "^opencv_") string(REGEX REPLACE "^opencv_" "" __shortname "${m}") add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/${__shortname}") @@ -389,7 +384,7 @@ endmacro() macro(ocv_module_include_directories) ocv_include_directories("${OPENCV_MODULE_${the_module}_LOCATION}/include" "${OPENCV_MODULE_${the_module}_LOCATION}/src" - "${CMAKE_CURRENT_BINARY_DIR}"#for precompiled headers + "${CMAKE_CURRENT_BINARY_DIR}" # for precompiled headers ) ocv_include_modules(${OPENCV_MODULE_${the_module}_DEPS} ${ARGN}) endmacro() @@ -417,7 +412,7 @@ macro(ocv_set_module_sources) endif() # use full paths for module to be independent from the module location - ocv_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS) + ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS) set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}") set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}") @@ -446,9 +441,11 @@ endmacro() # ocv_create_module(SKIP_LINK) macro(ocv_create_module) add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES}) + if(NOT "${ARGN}" STREQUAL "SKIP_LINK") target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN}) endif() + add_dependencies(opencv_modules ${the_module}) if(ENABLE_SOLUTION_FOLDERS) @@ -485,7 +482,7 @@ macro(ocv_create_module) if(CMAKE_CROSSCOMPILING) set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:secchk") endif() - set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:libc /DEBUG") + set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:libc /DEBUG") endif() install(TARGETS ${the_module} @@ -495,7 +492,7 @@ macro(ocv_create_module) ) # only "public" headers need to be installed - if(OPENCV_MODULE_${the_module}_HEADERS AND OPENCV_MODULES_PUBLIC MATCHES "(^|;)${the_module}(;|$)") + if(OPENCV_MODULE_${the_module}_HEADERS AND ";${OPENCV_MODULES_PUBLIC};" MATCHES ";${the_module};") foreach(hdr ${OPENCV_MODULE_${the_module}_HEADERS}) string(REGEX REPLACE "^.*opencv2/" "opencv2/" hdr2 "${hdr}") if(hdr2 MATCHES "^(opencv2/.*)/[^/]+.h(..)?$") @@ -510,30 +507,17 @@ endmacro() # Usage: # ocv_add_precompiled_headers(${the_module}) macro(ocv_add_precompiled_headers the_target) - if("${the_target}" MATCHES "^opencv_test_.*$") - SET(pch_path "test/test_") + if("${the_target}" MATCHES "^opencv_test_.*$") + SET(pch_path "test/test_") elseif("${the_target}" MATCHES "opencv_perf_gpu_cpu") - SET(pch_path "perf_cpu/perf_cpu_") - elseif("${the_target}" MATCHES "^opencv_perf_.*$") - SET(pch_path "perf/perf_") - else() - SET(pch_path "src/") - endif() - set(pch_header "${CMAKE_CURRENT_SOURCE_DIR}/${pch_path}precomp.hpp") - - if(PCHSupport_FOUND AND ENABLE_PRECOMPILED_HEADERS AND EXISTS "${pch_header}") - if(CMAKE_GENERATOR MATCHES Visual) - set(${the_target}_pch "${CMAKE_CURRENT_SOURCE_DIR}/${pch_path}precomp.cpp") - add_native_precompiled_header(${the_target} ${pch_header}) - elseif(CMAKE_GENERATOR MATCHES Xcode) - add_native_precompiled_header(${the_target} ${pch_header}) - elseif(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_GENERATOR MATCHES "Makefiles|Ninja") - add_precompiled_header(${the_target} ${pch_header}) - endif() - endif() - unset(pch_header) - unset(pch_path) - unset(${the_target}_pch) + SET(pch_path "perf_cpu/perf_cpu_") + elseif("${the_target}" MATCHES "^opencv_perf_.*$") + SET(pch_path "perf/perf_") + else() + SET(pch_path "src/") + endif() + ocv_add_precompiled_header_to_target(${the_target} "${CMAKE_CURRENT_SOURCE_DIR}/${pch_path}precomp.hpp") + unset(pch_path) endmacro() # short command for adding simple OpenCV module @@ -563,7 +547,7 @@ macro(ocv_check_dependencies) endforeach() endmacro() -#auxiliary macro to parse arguments of ocv_add_accuracy_tests and ocv_add_perf_tests commands +# auxiliary macro to parse arguments of ocv_add_accuracy_tests and ocv_add_perf_tests commands macro(__ocv_parse_test_sources tests_type) set(OPENCV_${tests_type}_${the_module}_SOURCES "") set(OPENCV_${tests_type}_${the_module}_DEPS "") @@ -593,7 +577,7 @@ endmacro() # this is a command for adding OpenCV performance tests to the module # ocv_add_perf_tests() -macro(ocv_add_perf_tests) +function(ocv_add_perf_tests) set(perf_path "${CMAKE_CURRENT_SOURCE_DIR}/perf") if(BUILD_PERF_TESTS AND EXISTS "${perf_path}") __ocv_parse_test_sources(PERF ${ARGN}) @@ -604,7 +588,7 @@ macro(ocv_add_perf_tests) if(OCV_DEPENDENCIES_FOUND) set(the_target "opencv_perf_${name}") - #project(${the_target}) + # project(${the_target}) ocv_module_include_directories(${perf_deps} "${perf_path}") @@ -636,14 +620,14 @@ macro(ocv_add_perf_tests) add_dependencies(perf ${the_target}) endif() else(OCV_DEPENDENCIES_FOUND) - #TODO: warn about unsatisfied dependencies + # TODO: warn about unsatisfied dependencies endif(OCV_DEPENDENCIES_FOUND) endif() -endmacro() +endfunction() # this is a command for adding OpenCV accuracy/regression tests to the module # ocv_add_accuracy_tests([FILES ] [DEPENDS_ON] ) -macro(ocv_add_accuracy_tests) +function(ocv_add_accuracy_tests) set(test_path "${CMAKE_CURRENT_SOURCE_DIR}/test") ocv_check_dependencies(${test_deps}) if(BUILD_TESTS AND EXISTS "${test_path}") @@ -655,7 +639,7 @@ macro(ocv_add_accuracy_tests) if(OCV_DEPENDENCIES_FOUND) set(the_target "opencv_test_${name}") - #project(${the_target}) + # project(${the_target}) ocv_module_include_directories(${test_deps} "${test_path}") @@ -687,12 +671,12 @@ macro(ocv_add_accuracy_tests) ocv_add_precompiled_headers(${the_target}) else(OCV_DEPENDENCIES_FOUND) - #TODO: warn about unsatisfied dependencies + # TODO: warn about unsatisfied dependencies endif(OCV_DEPENDENCIES_FOUND) endif() -endmacro() +endfunction() -# internal macro; finds all link dependencies of module +# internal macro; finds all link dependencies of the module # should be used at the end of CMake processing macro(__ocv_track_module_link_dependencies the_module optkind) set(${the_module}_MODULE_DEPS_${optkind} "") @@ -742,7 +726,7 @@ macro(__ocv_track_module_link_dependencies the_module optkind) #ocv_list_reverse(${the_module}_EXTRA_DEPS_${optkind}) if(__has_cycle) - #not sure if it can work + # not sure if it can work list(APPEND ${the_module}_MODULE_DEPS_${optkind} "${the_module}") endif() @@ -754,13 +738,13 @@ macro(__ocv_track_module_link_dependencies the_module optkind) endif()#STATIC_LIBRARY unset(__module_type) -#message("${the_module}_MODULE_DEPS_${optkind}") -#message(" ${${the_module}_MODULE_DEPS_${optkind}}") -#message(" ${OPENCV_MODULE_${the_module}_DEPS}") -#message("") -#message("${the_module}_EXTRA_DEPS_${optkind}") -#message(" ${${the_module}_EXTRA_DEPS_${optkind}}") -#message("") + #message("${the_module}_MODULE_DEPS_${optkind}") + #message(" ${${the_module}_MODULE_DEPS_${optkind}}") + #message(" ${OPENCV_MODULE_${the_module}_DEPS}") + #message("") + #message("${the_module}_EXTRA_DEPS_${optkind}") + #message(" ${${the_module}_EXTRA_DEPS_${optkind}}") + #message("") endmacro() # creates lists of build dependencies needed for external projects diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake index 55b712c..16dbb06 100644 --- a/cmake/OpenCVPCHSupport.cmake +++ b/cmake/OpenCVPCHSupport.cmake @@ -1,4 +1,4 @@ -# taken from http://www.vtk.org/Bug/view.php?id=1260 and slightly adjusted +# taken from http://public.kitware.com/Bug/view.php?id=1260 and slightly adjusted # - Try to find precompiled headers support for GCC 3.4 and 4.x # Once done this will define: @@ -26,8 +26,8 @@ IF(CMAKE_COMPILER_IS_GNUCXX) SET(_PCH_include_prefix "-I") SET(_PCH_isystem_prefix "-isystem") -ELSEIF(WIN32) - SET(PCHSupport_FOUND TRUE) # for experimental msvc support +ELSEIF(CMAKE_GENERATOR MATCHES "^Visual.*$") + SET(PCHSupport_FOUND TRUE) SET(_PCH_include_prefix "/I") SET(_PCH_isystem_prefix "/I") ELSE() @@ -324,3 +324,17 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input) endif() ENDMACRO(ADD_NATIVE_PRECOMPILED_HEADER) + +macro(ocv_add_precompiled_header_to_target the_target pch_header) + if(PCHSupport_FOUND AND ENABLE_PRECOMPILED_HEADERS AND EXISTS "${pch_header}") + if(CMAKE_GENERATOR MATCHES Visual) + string(REGEX REPLACE "hpp$" "cpp" ${the_target}_pch "${pch_header}") + add_native_precompiled_header(${the_target} ${pch_header}) + unset(${the_target}_pch) + elseif(CMAKE_GENERATOR MATCHES Xcode) + add_native_precompiled_header(${the_target} ${pch_header}) + elseif(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_GENERATOR MATCHES "Makefiles|Ninja") + add_precompiled_header(${the_target} ${pch_header}) + endif() + endif() +endmacro() diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index aef7525..8c20906 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -384,6 +384,17 @@ macro(ocv_list_add_suffix LST SUFFIX) endmacro() +# gets and removes the first element from list +macro(ocv_list_pop_front LST VAR) + if(${LST}) + list(GET ${LST} 0 ${VAR}) + list(REMOVE_AT ${LST} 0) + else() + set(${VAR} "") + endif() +endmacro() + + # simple regex escaping routine (does not cover all cases!!!) macro(ocv_regex_escape var regex) string(REGEX REPLACE "([+.*^$])" "\\\\1" ${var} "${regex}") @@ -401,7 +412,7 @@ endmacro() # convert list of paths to full paths -macro(ocv_to_full_paths VAR) +macro(ocv_convert_to_full_paths VAR) if(${VAR}) set(__tmp "") foreach(path ${${VAR}}) diff --git a/doc/tutorials/introduction/android_binary_package/android_binary_package.rst b/doc/tutorials/introduction/android_binary_package/android_binary_package.rst index 65592ff..c25f32e 100644 --- a/doc/tutorials/introduction/android_binary_package/android_binary_package.rst +++ b/doc/tutorials/introduction/android_binary_package/android_binary_package.rst @@ -102,7 +102,7 @@ You need the following tools to be installed: #. **Eclipse IDE** Check the `Android SDK System Requirements `_ document for a list of Eclipse versions that are compatible with the Android SDK. - For OpenCV 2.4.0 we recommend Eclipse 3.7 (Indigo) or later versions. They work well for OpenCV under both Windows and Linux. + For OpenCV 2.4.x we recommend Eclipse 3.7 (Indigo) or later versions. They work well for OpenCV under both Windows and Linux. If you have no Eclipse installed, you can get it from the `download page `_. @@ -154,12 +154,12 @@ Get the OpenCV package for Android development .. code-block:: bash - tar -jxvf ~/Downloads/OpenCV-2.4.0-android-bin.tar.bz2 + tar -jxvf ~/Downloads/OpenCV-2.4.1-android-bin2.tar.bz2 For this tutorial I have unpacked OpenCV to the :file:`C:\\Work\\android-opencv\\` directory. -.. |opencv_android_bin_pack| replace:: OpenCV-2.4.0-android-bin.tar.bz2 -.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.0/OpenCV-2.4.0-android-bin.tar.bz2/download +.. |opencv_android_bin_pack| replace:: OpenCV-2.4.1-android-bin2.tar.bz2 +.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.1/OpenCV-2.4.1-android-bin2.tar.bz2/download .. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack| .. |seven_zip| replace:: 7-Zip .. _seven_zip: http://www.7-zip.org/ @@ -214,7 +214,7 @@ Open OpenCV library and samples in Eclipse :align: center * Click :guilabel:`OK` to close preferences dialog. - + #. Import OpenCV and samples into workspace. OpenCV library is packed as a ready-for-use `Android Library Project diff --git a/modules/contrib/src/rgbdodometry.cpp b/modules/contrib/src/rgbdodometry.cpp index 81525f5..59f3254 100644 --- a/modules/contrib/src/rgbdodometry.cpp +++ b/modules/contrib/src/rgbdodometry.cpp @@ -48,10 +48,12 @@ #include #if defined(HAVE_EIGEN) && EIGEN_WORLD_VERSION == 3 -#include -#include - -#include +# include +# ifdef ANDROID + template Scalar log2(Scalar v) { using std::log; return log(v)/log(Scalar(2)); } +# endif +# include +# include #endif #include @@ -581,7 +583,7 @@ bool cv::RGBDOdometry( cv::Mat& Rt, const Mat& initRt, const double fy = levelCameraMatrix.at(1,1); const double determinantThreshold = 1e-6; - Mat corresps( levelImage0.size(), levelImage0.type(), CV_32SC1 ); + Mat corresps( levelImage0.size(), levelImage0.type() ); // Run transformation search on current level iteratively. for( int iter = 0; iter < (*iterCountsPtr)[level]; iter ++ ) diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp index 469186a..f4b60fc 100644 --- a/modules/core/include/opencv2/core/core.hpp +++ b/modules/core/include/opencv2/core/core.hpp @@ -85,7 +85,6 @@ template class CV_EXPORTS Vec; template class CV_EXPORTS Matx; typedef std::string String; -typedef std::basic_string WString; class Mat; class SparseMat; @@ -110,8 +109,12 @@ template class CV_EXPORTS MatIterator_; template class CV_EXPORTS MatConstIterator_; template class CV_EXPORTS MatCommaInitializer_; +#if !defined(ANDROID) || (defined(_GLIBCXX_USE_WCHAR_T) && _GLIBCXX_USE_WCHAR_T) +typedef std::basic_string WString; + CV_EXPORTS string fromUtf16(const WString& str); CV_EXPORTS WString toUtf16(const string& str); +#endif CV_EXPORTS string format( const char* fmt, ... ); CV_EXPORTS string tempfile( const char* suffix CV_DEFAULT(0)); diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index d27fc07..edaadc8 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -151,7 +151,7 @@ cv::string cv::FileStorage::getDefaultObjectName(const string& _filename) namespace cv { -#if !defined(ANDROID) || defined(_GLIBCXX_USE_WCHAR_T) +#if !defined(ANDROID) || (defined(_GLIBCXX_USE_WCHAR_T) && _GLIBCXX_USE_WCHAR_T) string fromUtf16(const WString& str) { cv::AutoBuffer _buf(str.size()*4 + 1); diff --git a/modules/gpu/perf/perf_core.cpp b/modules/gpu/perf/perf_core.cpp new file mode 100644 index 0000000..f413432 --- /dev/null +++ b/modules/gpu/perf/perf_core.cpp @@ -0,0 +1,1883 @@ +#include "perf_precomp.hpp" + +#ifdef HAVE_CUDA + +////////////////////////////////////////////////////////////////////// +// Merge + +GPU_PERF_TEST(Merge, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int channels = GET_PARAM(3); + + std::vector src(channels); + for (int i = 0; i < channels; ++i) + src[i] = cv::gpu::GpuMat(size, depth, cv::Scalar::all(i)); + + cv::gpu::GpuMat dst; + + cv::gpu::merge(src, dst); + + TEST_CYCLE() + { + cv::gpu::merge(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Merge, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F), + testing::Values(2, 3, 4))); + +////////////////////////////////////////////////////////////////////// +// Split + +GPU_PERF_TEST(Split, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int channels = GET_PARAM(3); + + cv::gpu::GpuMat src(size, CV_MAKE_TYPE(depth, channels), cv::Scalar(1, 2, 3, 4)); + + std::vector dst; + + cv::gpu::split(src, dst); + + TEST_CYCLE() + { + cv::gpu::split(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Split, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F), + testing::Values(2, 3, 4))); + +////////////////////////////////////////////////////////////////////// +// Add_Mat + +GPU_PERF_TEST(Add_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0.0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::add(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::add(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Add_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Add_Scalar + +GPU_PERF_TEST(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s(1, 2, 3, 4); + cv::gpu::GpuMat dst; + + cv::gpu::add(src, s, dst); + + TEST_CYCLE() + { + cv::gpu::add(src, s, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Add_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Subtract_Mat + +GPU_PERF_TEST(Subtract_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0.0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::subtract(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::subtract(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Subtract_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Subtract_Scalar + +GPU_PERF_TEST(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s(1, 2, 3, 4); + cv::gpu::GpuMat dst; + + cv::gpu::subtract(src, s, dst); + + TEST_CYCLE() + { + cv::gpu::subtract(src, s, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Subtract_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Multiply_Mat + +GPU_PERF_TEST(Multiply_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0.0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::multiply(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::multiply(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Multiply_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Multiply_Scalar + +GPU_PERF_TEST(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s(1, 2, 3, 4); + cv::gpu::GpuMat dst; + + cv::gpu::multiply(src, s, dst); + + TEST_CYCLE() + { + cv::gpu::multiply(src, s, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Multiply_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Divide_Mat + +GPU_PERF_TEST(Divide_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0.0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::divide(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::divide(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Divide_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Divide_Scalar + +GPU_PERF_TEST(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s(1, 2, 3, 4); + cv::gpu::GpuMat dst; + + cv::gpu::divide(src, s, dst); + + TEST_CYCLE() + { + cv::gpu::divide(src, s, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Divide_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Divide_Scalar_Inv + +GPU_PERF_TEST(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + double scale = 100.0; + cv::gpu::GpuMat dst; + + cv::gpu::divide(scale, src, dst); + + TEST_CYCLE() + { + cv::gpu::divide(scale, src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Divide_Scalar_Inv, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// AbsDiff_Mat + +GPU_PERF_TEST(AbsDiff_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0.0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::absdiff(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::absdiff(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, AbsDiff_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// AbsDiff_Scalar + +GPU_PERF_TEST(AbsDiff_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s(1, 2, 3, 4); + cv::gpu::GpuMat dst; + + cv::gpu::absdiff(src, s, dst); + + TEST_CYCLE() + { + cv::gpu::absdiff(src, s, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, AbsDiff_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Abs + +GPU_PERF_TEST(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::abs(src, dst); + + TEST_CYCLE() + { + cv::gpu::abs(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Abs, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_16S, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Sqr + +GPU_PERF_TEST(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::sqr(src, dst); + + TEST_CYCLE() + { + cv::gpu::sqr(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Sqr, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16S, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Sqrt + +GPU_PERF_TEST(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::sqrt(src, dst); + + TEST_CYCLE() + { + cv::gpu::sqrt(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Sqrt, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16S, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Log + +GPU_PERF_TEST(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 1.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::log(src, dst); + + TEST_CYCLE() + { + cv::gpu::log(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Log, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16S, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Exp + +GPU_PERF_TEST(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 1.0, 10.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::exp(src, dst); + + TEST_CYCLE() + { + cv::gpu::exp(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Exp, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16S, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Pow + +GPU_PERF_TEST(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 1.0, 10.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::pow(src, 2.3, dst); + + TEST_CYCLE() + { + cv::gpu::pow(src, 2.3, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Pow, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16S, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Compare_Mat + +CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE) +#define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE)) + +GPU_PERF_TEST(Compare_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int cmp_code = GET_PARAM(3); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::compare(src1, src2, dst, cmp_code); + + TEST_CYCLE() + { + cv::gpu::compare(src1, src2, dst, cmp_code); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Compare_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F), + ALL_CMP_CODES)); + +////////////////////////////////////////////////////////////////////// +// Compare_Scalar + +GPU_PERF_TEST(Compare_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int cmp_code = GET_PARAM(3); + + cv::Mat src_host(size, depth); + fill(src_host, 0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s = cv::Scalar::all(50); + cv::gpu::GpuMat dst; + + cv::gpu::compare(src, s, dst, cmp_code); + + TEST_CYCLE() + { + cv::gpu::compare(src, s, dst, cmp_code); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Compare_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F), + ALL_CMP_CODES)); + +////////////////////////////////////////////////////////////////////// +// Bitwise_Not + +GPU_PERF_TEST(Bitwise_Not, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::bitwise_not(src, dst); + + TEST_CYCLE() + { + cv::gpu::bitwise_not(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Bitwise_Not, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S))); + +////////////////////////////////////////////////////////////////////// +// Bitwise_And_Mat + +GPU_PERF_TEST(Bitwise_And_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::bitwise_and(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::bitwise_and(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Bitwise_And_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S))); + +////////////////////////////////////////////////////////////////////// +// Bitwise_And_Scalar + +GPU_PERF_TEST(Bitwise_And_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int channels = GET_PARAM(3); + + int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src_host(size, type); + fill(src_host, 0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s = cv::Scalar(50, 50, 50, 50); + cv::gpu::GpuMat dst; + + cv::gpu::bitwise_and(src, s, dst); + + TEST_CYCLE() + { + cv::gpu::bitwise_and(src, s, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Bitwise_And_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S), + testing::Values(1, 3, 4))); + +////////////////////////////////////////////////////////////////////// +// Bitwise_Or_Mat + +GPU_PERF_TEST(Bitwise_Or_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::bitwise_or(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::bitwise_or(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Bitwise_Or_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S))); + +////////////////////////////////////////////////////////////////////// +// Bitwise_Or_Scalar + +GPU_PERF_TEST(Bitwise_Or_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int channels = GET_PARAM(3); + + int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src_host(size, type); + fill(src_host, 0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s = cv::Scalar(50, 50, 50, 50); + cv::gpu::GpuMat dst; + + cv::gpu::bitwise_or(src, s, dst); + + TEST_CYCLE() + { + cv::gpu::bitwise_or(src, s, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Bitwise_Or_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S), + testing::Values(1, 3, 4))); + +////////////////////////////////////////////////////////////////////// +// Bitwise_Xor_Mat + +GPU_PERF_TEST(Bitwise_Xor_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0, 100.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::bitwise_xor(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::bitwise_xor(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Bitwise_Xor_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S))); + +////////////////////////////////////////////////////////////////////// +// Bitwise_Xor_Scalar + +GPU_PERF_TEST(Bitwise_Xor_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int channels = GET_PARAM(3); + + int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src_host(size, type); + fill(src_host, 0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar s = cv::Scalar(50, 50, 50, 50); + cv::gpu::GpuMat dst; + + cv::gpu::bitwise_xor(src, s, dst); + + TEST_CYCLE() + { + cv::gpu::bitwise_xor(src, s, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Bitwise_Xor_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S), + testing::Values(1, 3, 4))); + +////////////////////////////////////////////////////////////////////// +// RShift + +GPU_PERF_TEST(RShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int channels = GET_PARAM(3); + + int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src_host(size, type); + fill(src_host, 0, 255.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar_ val = cv::Scalar_::all(4); + cv::gpu::GpuMat dst; + + cv::gpu::rshift(src, val, dst); + + TEST_CYCLE() + { + cv::gpu::rshift(src, val, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, RShift, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S), + testing::Values(1, 3, 4))); + +////////////////////////////////////////////////////////////////////// +// LShift + +GPU_PERF_TEST(LShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int channels = GET_PARAM(3); + + int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src_host(size, type); + fill(src_host, 0, 255.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar_ val = cv::Scalar_::all(4); + cv::gpu::GpuMat dst; + + cv::gpu::lshift(src, val, dst); + + TEST_CYCLE() + { + cv::gpu::lshift(src, val, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, LShift, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S), + testing::Values(1, 3, 4))); + +////////////////////////////////////////////////////////////////////// +// Min_Mat + +GPU_PERF_TEST(Min_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0, 255.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0, 255.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::min(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::min(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Min_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Min_Scalar + +GPU_PERF_TEST(Min_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0, 255.0); + + cv::gpu::GpuMat src(src_host); + double val = 50.0; + cv::gpu::GpuMat dst; + + cv::gpu::min(src, val, dst); + + TEST_CYCLE() + { + cv::gpu::min(src, val, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Min_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Max_Mat + +GPU_PERF_TEST(Max_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src1_host(size, depth); + fill(src1_host, 0, 255.0); + + cv::Mat src2_host(size, depth); + fill(src2_host, 0, 255.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::max(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::max(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Max_Mat, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// Max_Scalar + +GPU_PERF_TEST(Max_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0, 255.0); + + cv::gpu::GpuMat src(src_host); + double val = 50.0; + cv::gpu::GpuMat dst; + + cv::gpu::max(src, val, dst); + + TEST_CYCLE() + { + cv::gpu::max(src, val, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Max_Scalar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F))); + +////////////////////////////////////////////////////////////////////// +// AddWeighted + +GPU_PERF_TEST(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth1 = GET_PARAM(2); + int depth2 = GET_PARAM(3); + int dst_depth = GET_PARAM(4); + + cv::Mat src1_host(size, depth1); + fill(src1_host, 0, 100.0); + + cv::Mat src2_host(size, depth2); + fill(src2_host, 0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth); + + TEST_CYCLE() + { + cv::gpu::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth); + } +} + +INSTANTIATE_TEST_CASE_P(Core, AddWeighted, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F), + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F), + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// GEMM +#ifdef HAVE_CUBLAS + +CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T) +#define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T)) + +GPU_PERF_TEST(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + int flags = GET_PARAM(3); + + cv::Mat src1_host(size, type); + fill(src1_host, 0.0, 10.0); + + cv::Mat src2_host(size, type); + fill(src2_host, 0.0, 10.0); + + cv::Mat src3_host(size, type); + fill(src3_host, 0.0, 10.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat src3(src3_host); + cv::gpu::GpuMat dst; + + cv::gpu::gemm(src1, src2, 1.0, src3, 1.0, dst, flags); + + declare.time(5.0); + + TEST_CYCLE() + { + cv::gpu::gemm(src1, src2, 1.0, src3, 1.0, dst, flags); + } +} + +INSTANTIATE_TEST_CASE_P(Core, GEMM, testing::Combine( + ALL_DEVICES, + testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)), + testing::Values(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2), + ALL_GEMM_FLAGS)); + +#endif +////////////////////////////////////////////////////////////////////// +// Transpose + +GPU_PERF_TEST(Transpose, cv::gpu::DeviceInfo, cv::Size, MatType) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + + cv::Mat src_host(size, type); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::transpose(src, dst); + + TEST_CYCLE() + { + cv::gpu::transpose(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Transpose, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_64FC1))); + +////////////////////////////////////////////////////////////////////// +// Flip + +enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; +CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) +#define ALL_FLIP_CODES testing::Values(FlipCode(FLIP_BOTH), FlipCode(FLIP_X), FlipCode(FLIP_Y)) + +GPU_PERF_TEST(Flip, cv::gpu::DeviceInfo, cv::Size, MatType, FlipCode) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + int flipCode = GET_PARAM(3); + + cv::Mat src_host(size, type); + fill(src_host, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::flip(src, dst, flipCode); + + TEST_CYCLE() + { + cv::gpu::flip(src, dst, flipCode); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Flip, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4), + ALL_FLIP_CODES)); + +////////////////////////////////////////////////////////////////////// +// LUT_OneChannel + +GPU_PERF_TEST(LUT_OneChannel, cv::gpu::DeviceInfo, cv::Size, MatType) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + + cv::Mat src_host(size, type); + fill(src_host, 0.0, 100.0); + + cv::Mat lut(1, 256, CV_8UC1); + fill(lut, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::LUT(src, lut, dst); + + TEST_CYCLE() + { + cv::gpu::LUT(src, lut, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, LUT_OneChannel, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8UC1, CV_8UC3))); + +////////////////////////////////////////////////////////////////////// +// LUT_MultiChannel + +GPU_PERF_TEST(LUT_MultiChannel, cv::gpu::DeviceInfo, cv::Size, MatType) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + + cv::Mat src_host(size, type); + fill(src_host, 0.0, 100.0); + + cv::Mat lut(1, 256, CV_MAKE_TYPE(CV_8U, src_host.channels())); + fill(lut, 0.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::LUT(src, lut, dst); + + TEST_CYCLE() + { + cv::gpu::LUT(src, lut, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, LUT_MultiChannel, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8UC3))); + +////////////////////////////////////////////////////////////////////// +// Magnitude_Complex + +GPU_PERF_TEST(Magnitude_Complex, cv::gpu::DeviceInfo, cv::Size) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + + cv::Mat src_host(size, CV_32FC2); + fill(src_host, -100.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::magnitude(src, dst); + + TEST_CYCLE() + { + cv::gpu::magnitude(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Magnitude_Complex, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES)); + +////////////////////////////////////////////////////////////////////// +// Magnitude_Sqr_Complex + +GPU_PERF_TEST(Magnitude_Sqr_Complex, cv::gpu::DeviceInfo, cv::Size) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + + cv::Mat src_host(size, CV_32FC2); + fill(src_host, -100.0, 100.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::magnitudeSqr(src, dst); + + TEST_CYCLE() + { + cv::gpu::magnitudeSqr(src, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Magnitude_Sqr_Complex, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES)); + +////////////////////////////////////////////////////////////////////// +// Magnitude + +GPU_PERF_TEST(Magnitude, cv::gpu::DeviceInfo, cv::Size) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + + cv::Mat src1_host(size, CV_32FC1); + fill(src1_host, -100.0, 100.0); + + cv::Mat src2_host(size, CV_32FC1); + fill(src2_host, -100.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::magnitude(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::magnitude(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Magnitude, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES)); + +////////////////////////////////////////////////////////////////////// +// Magnitude_Sqr + +GPU_PERF_TEST(Magnitude_Sqr, cv::gpu::DeviceInfo, cv::Size) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + + cv::Mat src1_host(size, CV_32FC1); + fill(src1_host, -100.0, 100.0); + + cv::Mat src2_host(size, CV_32FC1); + fill(src2_host, -100.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::magnitudeSqr(src1, src2, dst); + + TEST_CYCLE() + { + cv::gpu::magnitudeSqr(src1, src2, dst); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Magnitude_Sqr, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES)); + +////////////////////////////////////////////////////////////////////// +// Phase + +IMPLEMENT_PARAM_CLASS(AngleInDegrees, bool) + +GPU_PERF_TEST(Phase, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + bool angleInDegrees = GET_PARAM(2); + + cv::Mat src1_host(size, CV_32FC1); + fill(src1_host, -100.0, 100.0); + + cv::Mat src2_host(size, CV_32FC1); + fill(src2_host, -100.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat dst; + + cv::gpu::phase(src1, src2, dst, angleInDegrees); + + TEST_CYCLE() + { + cv::gpu::phase(src1, src2, dst, angleInDegrees); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Phase, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(false, true))); + +////////////////////////////////////////////////////////////////////// +// CartToPolar + +GPU_PERF_TEST(CartToPolar, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + bool angleInDegrees = GET_PARAM(2); + + cv::Mat src1_host(size, CV_32FC1); + fill(src1_host, -100.0, 100.0); + + cv::Mat src2_host(size, CV_32FC1); + fill(src2_host, -100.0, 100.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + cv::gpu::GpuMat magnitude; + cv::gpu::GpuMat angle; + + cv::gpu::cartToPolar(src1, src2, magnitude, angle, angleInDegrees); + + TEST_CYCLE() + { + cv::gpu::cartToPolar(src1, src2, magnitude, angle, angleInDegrees); + } +} + +INSTANTIATE_TEST_CASE_P(Core, CartToPolar, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(false, true))); + +////////////////////////////////////////////////////////////////////// +// PolarToCart + +GPU_PERF_TEST(PolarToCart, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + bool angleInDegrees = GET_PARAM(2); + + cv::Mat magnitude_host(size, CV_32FC1); + fill(magnitude_host, 0.0, 100.0); + + cv::Mat angle_host(size, CV_32FC1); + fill(angle_host, 0.0, angleInDegrees ? 360.0 : 2 * CV_PI); + + cv::gpu::GpuMat magnitude(magnitude_host); + cv::gpu::GpuMat angle(angle_host); + cv::gpu::GpuMat x; + cv::gpu::GpuMat y; + + cv::gpu::polarToCart(magnitude, angle, x, y, angleInDegrees); + + TEST_CYCLE() + { + cv::gpu::polarToCart(magnitude, angle, x, y, angleInDegrees); + } +} + +INSTANTIATE_TEST_CASE_P(Core, PolarToCart, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(false, true))); + +////////////////////////////////////////////////////////////////////// +// MeanStdDev + +GPU_PERF_TEST(MeanStdDev, cv::gpu::DeviceInfo, cv::Size) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + + cv::Mat src_host(size, CV_8UC1); + fill(src_host, 0.0, 255.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar mean; + cv::Scalar stddev; + cv::gpu::GpuMat buf; + + cv::gpu::meanStdDev(src, mean, stddev, buf); + + TEST_CYCLE() + { + cv::gpu::meanStdDev(src, mean, stddev, buf); + } +} + +INSTANTIATE_TEST_CASE_P(Core, MeanStdDev, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES)); + +////////////////////////////////////////////////////////////////////// +// Norm + +GPU_PERF_TEST(Norm, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormType) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int normType = GET_PARAM(3); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 255.0); + + cv::gpu::GpuMat src(src_host); + double dst; + cv::gpu::GpuMat buf; + + dst = cv::gpu::norm(src, normType, buf); + + TEST_CYCLE() + { + dst = cv::gpu::norm(src, normType, buf); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Norm, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32S, CV_32F), + testing::Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))); + +////////////////////////////////////////////////////////////////////// +// NormDiff + +GPU_PERF_TEST(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormType) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int normType = GET_PARAM(2); + + cv::Mat src1_host(size, CV_8UC1); + fill(src1_host, 0.0, 255.0); + + cv::Mat src2_host(size, CV_8UC1); + fill(src2_host, 0.0, 255.0); + + cv::gpu::GpuMat src1(src1_host); + cv::gpu::GpuMat src2(src2_host); + double dst; + + dst = cv::gpu::norm(src1, src2, normType); + + TEST_CYCLE() + { + dst = cv::gpu::norm(src1, src2, normType); + } +} + +INSTANTIATE_TEST_CASE_P(Core, NormDiff, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))); + +////////////////////////////////////////////////////////////////////// +// Sum + +GPU_PERF_TEST(Sum, cv::gpu::DeviceInfo, cv::Size, MatType) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + + cv::Mat src_host(size, type); + fill(src_host, 0.0, 255.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar dst; + cv::gpu::GpuMat buf; + + dst = cv::gpu::sum(src, buf); + + TEST_CYCLE() + { + dst = cv::gpu::sum(src, buf); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Sum, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4))); + +////////////////////////////////////////////////////////////////////// +// Sum_Abs + +GPU_PERF_TEST(Sum_Abs, cv::gpu::DeviceInfo, cv::Size, MatType) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + + cv::Mat src_host(size, type); + fill(src_host, 0.0, 255.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar dst; + cv::gpu::GpuMat buf; + + dst = cv::gpu::absSum(src, buf); + + TEST_CYCLE() + { + dst = cv::gpu::absSum(src, buf); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Sum_Abs, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4))); + +////////////////////////////////////////////////////////////////////// +// Sum_Sqr + +GPU_PERF_TEST(Sum_Sqr, cv::gpu::DeviceInfo, cv::Size, MatType) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int type = GET_PARAM(2); + + cv::Mat src_host(size, type); + fill(src_host, 0.0, 255.0); + + cv::gpu::GpuMat src(src_host); + cv::Scalar dst; + cv::gpu::GpuMat buf; + + dst = cv::gpu::sqrSum(src, buf); + + TEST_CYCLE() + { + dst = cv::gpu::sqrSum(src, buf); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Sum_Sqr, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4))); + +////////////////////////////////////////////////////////////////////// +// MinMax + +GPU_PERF_TEST(MinMax, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 255.0); + + cv::gpu::GpuMat src(src_host); + double minVal, maxVal; + cv::gpu::GpuMat buf; + + cv::gpu::minMax(src, &minVal, &maxVal, cv::gpu::GpuMat(), buf); + + TEST_CYCLE() + { + cv::gpu::minMax(src, &minVal, &maxVal, cv::gpu::GpuMat(), buf); + } +} + +INSTANTIATE_TEST_CASE_P(Core, MinMax, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// MinMaxLoc + +GPU_PERF_TEST(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 255.0); + + cv::gpu::GpuMat src(src_host); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + cv::gpu::GpuMat valbuf, locbuf; + + cv::gpu::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(), valbuf, locbuf); + + TEST_CYCLE() + { + cv::gpu::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(), valbuf, locbuf); + } +} + +INSTANTIATE_TEST_CASE_P(Core, MinMaxLoc, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// CountNonZero + +GPU_PERF_TEST(CountNonZero, cv::gpu::DeviceInfo, cv::Size, MatDepth) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + + cv::Mat src_host(size, depth); + fill(src_host, 0.0, 1.5); + + cv::gpu::GpuMat src(src_host); + int dst; + cv::gpu::GpuMat buf; + + dst = cv::gpu::countNonZero(src, buf); + + TEST_CYCLE() + { + dst = cv::gpu::countNonZero(src, buf); + } +} + +INSTANTIATE_TEST_CASE_P(Core, CountNonZero, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_32F, CV_64F))); + +////////////////////////////////////////////////////////////////////// +// Reduce + +CV_ENUM(ReduceCode, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN) +#define ALL_REDUCE_CODES testing::Values(CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN) + +enum {Rows = 0, Cols = 1}; +CV_ENUM(ReduceDim, Rows, Cols) + +GPU_PERF_TEST(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, ReduceCode, ReduceDim) +{ + cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Size size = GET_PARAM(1); + int depth = GET_PARAM(2); + int channels = GET_PARAM(3); + int reduceOp = GET_PARAM(4); + int dim = GET_PARAM(5); + + int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src_host(size, type); + fill(src_host, 0.0, 10.0); + + cv::gpu::GpuMat src(src_host); + cv::gpu::GpuMat dst; + + cv::gpu::reduce(src, dst, dim, reduceOp); + + TEST_CYCLE() + { + cv::gpu::reduce(src, dst, dim, reduceOp); + } +} + +INSTANTIATE_TEST_CASE_P(Core, Reduce, testing::Combine( + ALL_DEVICES, + GPU_TYPICAL_MAT_SIZES, + testing::Values(CV_8U, CV_16U, CV_16S, CV_32F), + testing::Values(1, 2, 3, 4), + ALL_REDUCE_CODES, + testing::Values(ReduceDim(Rows), ReduceDim(Cols)))); + +#endif diff --git a/modules/gpu/src/brute_force_matcher.cpp b/modules/gpu/src/brute_force_matcher.cpp index 8270dfe..17fa6b1 100644 --- a/modules/gpu/src/brute_force_matcher.cpp +++ b/modules/gpu/src/brute_force_matcher.cpp @@ -1005,11 +1005,11 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchConvert(const Mat& trainIdx for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) { - int trainIdx = *trainIdx_ptr; - int imgIdx = *imgIdx_ptr; - float distance = *distance_ptr; + int _trainIdx = *trainIdx_ptr; + int _imgIdx = *imgIdx_ptr; + float _distance = *distance_ptr; - DMatch m(queryIdx, trainIdx, imgIdx, distance); + DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); curMatches.push_back(m); } diff --git a/modules/gpu/src/cuda/bf_knnmatch.cu b/modules/gpu/src/cuda/bf_knnmatch.cu index f59cef0..19dc9df 100644 --- a/modules/gpu/src/cuda/bf_knnmatch.cu +++ b/modules/gpu/src/cuda/bf_knnmatch.cu @@ -45,19 +45,19 @@ #include "opencv2/gpu/device/vec_distance.hpp" #include "opencv2/gpu/device/datamov_utils.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace bf_knnmatch + namespace bf_knnmatch { /////////////////////////////////////////////////////////////////////////////// // Reduction - template - __device__ void findBestMatch(float& bestDistance1, float& bestDistance2, - int& bestTrainIdx1, int& bestTrainIdx2, + template + __device__ void findBestMatch(float& bestDistance1, float& bestDistance2, + int& bestTrainIdx1, int& bestTrainIdx2, float* s_distance, int* s_trainIdx) { - float myBestDistance1 = numeric_limits::max(); + float myBestDistance1 = numeric_limits::max(); float myBestDistance2 = numeric_limits::max(); int myBestTrainIdx1 = -1; int myBestTrainIdx2 = -1; @@ -122,13 +122,13 @@ namespace cv { namespace gpu { namespace device bestTrainIdx2 = myBestTrainIdx2; } - template - __device__ void findBestMatch(float& bestDistance1, float& bestDistance2, - int& bestTrainIdx1, int& bestTrainIdx2, - int& bestImgIdx1, int& bestImgIdx2, + template + __device__ void findBestMatch(float& bestDistance1, float& bestDistance2, + int& bestTrainIdx1, int& bestTrainIdx2, + int& bestImgIdx1, int& bestImgIdx2, float* s_distance, int* s_trainIdx, int* s_imgIdx) { - float myBestDistance1 = numeric_limits::max(); + float myBestDistance1 = numeric_limits::max(); float myBestDistance2 = numeric_limits::max(); int myBestTrainIdx1 = -1; int myBestTrainIdx2 = -1; @@ -208,7 +208,7 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match Unrolled Cached - template + template __device__ void loadQueryToSmem(int queryIdx, const DevMem2D_& query, U* s_query) { #pragma unroll @@ -219,11 +219,11 @@ namespace cv { namespace gpu { namespace device } } - template - __device__ void loopUnrolledCached(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, - typename Dist::value_type* s_query, typename Dist::value_type* s_train, - float& bestDistance1, float& bestDistance2, - int& bestTrainIdx1, int& bestTrainIdx2, + template + __device__ void loopUnrolledCached(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, + typename Dist::value_type* s_query, typename Dist::value_type* s_train, + float& bestDistance1, float& bestDistance2, + int& bestTrainIdx1, int& bestTrainIdx2, int& bestImgIdx1, int& bestImgIdx2) { for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t) @@ -280,7 +280,7 @@ namespace cv { namespace gpu { namespace device } } - template + template __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -313,9 +313,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& distance, + template + void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2D_& trainIdx, const DevMem2D_& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -330,7 +330,7 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template + template __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -374,9 +374,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, + template + void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, + const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -394,11 +394,11 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match Unrolled - template - __device__ void loopUnrolled(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, - typename Dist::value_type* s_query, typename Dist::value_type* s_train, - float& bestDistance1, float& bestDistance2, - int& bestTrainIdx1, int& bestTrainIdx2, + template + __device__ void loopUnrolled(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, + typename Dist::value_type* s_query, typename Dist::value_type* s_train, + float& bestDistance1, float& bestDistance2, + int& bestTrainIdx1, int& bestTrainIdx2, int& bestImgIdx1, int& bestImgIdx2) { for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t) @@ -459,7 +459,7 @@ namespace cv { namespace gpu { namespace device } } - template + template __global__ void matchUnrolled(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -490,9 +490,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& distance, + template + void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2D_& trainIdx, const DevMem2D_& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -507,7 +507,7 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template + template __global__ void matchUnrolled(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -549,9 +549,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, + template + void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, + const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -569,11 +569,11 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match - template - __device__ void loop(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, - typename Dist::value_type* s_query, typename Dist::value_type* s_train, - float& bestDistance1, float& bestDistance2, - int& bestTrainIdx1, int& bestTrainIdx2, + template + __device__ void loop(int queryIdx, const DevMem2D_& query, int imgIdx, const DevMem2D_& train, const Mask& mask, + typename Dist::value_type* s_query, typename Dist::value_type* s_train, + float& bestDistance1, float& bestDistance2, + int& bestTrainIdx1, int& bestTrainIdx2, int& bestImgIdx1, int& bestImgIdx2) { for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t) @@ -633,7 +633,7 @@ namespace cv { namespace gpu { namespace device } } - template + template __global__ void match(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int2* bestTrainIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -664,9 +664,9 @@ namespace cv { namespace gpu { namespace device } } - template - void match(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& distance, + template + void match(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2D_& trainIdx, const DevMem2D_& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -681,7 +681,7 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template + template __global__ void match(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance) { extern __shared__ int smem[]; @@ -723,9 +723,9 @@ namespace cv { namespace gpu { namespace device } } - template - void match(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, + template + void match(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, + const DevMem2D_& trainIdx, const DevMem2D_& imgIdx, const DevMem2D_& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -743,9 +743,9 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // knnMatch 2 dispatcher - template - void match2Dispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, + template + void match2Dispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2Db& trainIdx, const DevMem2Db& distance, int cc, cudaStream_t stream) { if (query.cols <= 64) @@ -761,11 +761,11 @@ namespace cv { namespace gpu { namespace device matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); }*/ else @@ -774,9 +774,9 @@ namespace cv { namespace gpu { namespace device } } - template - void match2Dispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template + void match2Dispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, + const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream) { if (query.cols <= 64) @@ -792,11 +792,11 @@ namespace cv { namespace gpu { namespace device matchUnrolled<16, 256, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, trains, n, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ >(imgIdx), static_cast< DevMem2D_ > (distance), stream); }*/ else @@ -832,7 +832,7 @@ namespace cv { namespace gpu { namespace device s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX]; } else - { + { s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; } @@ -857,7 +857,7 @@ namespace cv { namespace gpu { namespace device } } - template + template void calcDistanceUnrolled(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -895,7 +895,7 @@ namespace cv { namespace gpu { namespace device s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX]; } else - { + { s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; } @@ -920,7 +920,7 @@ namespace cv { namespace gpu { namespace device } } - template + template void calcDistance(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -938,9 +938,9 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Calc Distance dispatcher - template - void calcDistanceDispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Df& allDist, + template + void calcDistanceDispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2Df& allDist, int cc, cudaStream_t stream) { if (query.cols <= 64) @@ -956,11 +956,11 @@ namespace cv { namespace gpu { namespace device calcDistanceUnrolled<16, 256, Dist>(query, train, mask, allDist, stream); } else if (query.cols <= 512) - { + { calcDistanceUnrolled<16, 512, Dist>(query, train, mask, allDist, stream); } else if (query.cols <= 1024) - { + { calcDistanceUnrolled<16, 1024, Dist>(query, train, mask, allDist, stream); }*/ else @@ -972,7 +972,7 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // find knn match kernel - template + template __global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrStepf distance) { const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64; @@ -985,7 +985,7 @@ namespace cv { namespace gpu { namespace device float dist = numeric_limits::max(); int bestIdx = -1; - + for (int i = threadIdx.x; i < allDist.cols; i += BLOCK_SIZE) { float reg = allDistRow[i]; @@ -1013,7 +1013,7 @@ namespace cv { namespace gpu { namespace device } } - template + template void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, 1, 1); @@ -1038,8 +1038,8 @@ namespace cv { namespace gpu { namespace device // knn match Dispatcher template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, int k, const Mask& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, int k, const Mask& mask, + const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream) { if (k == 2) @@ -1051,13 +1051,13 @@ namespace cv { namespace gpu { namespace device calcDistanceDispatcher(query, train, mask, allDist, cc, stream); findKnnMatchDispatcher(k, trainIdx, distance, allDist, cc, stream); } - } + } /////////////////////////////////////////////////////////////////////////////// // knn match caller - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, + const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream) { if (mask.data) @@ -1073,7 +1073,7 @@ namespace cv { namespace gpu { namespace device template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream) { @@ -1091,7 +1091,7 @@ namespace cv { namespace gpu { namespace device template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, - const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, + const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream) { if (mask.data) @@ -1106,8 +1106,8 @@ namespace cv { namespace gpu { namespace device //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream); - template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream) { if (masks.data) @@ -1123,8 +1123,8 @@ namespace cv { namespace gpu { namespace device template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); template void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream) { if (masks.data) @@ -1140,8 +1140,8 @@ namespace cv { namespace gpu { namespace device //template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Di& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); template void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream); - template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, + template void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream) { if (masks.data) diff --git a/modules/gpu/src/cuda/bf_match.cu b/modules/gpu/src/cuda/bf_match.cu index ad8cc88..0248dbf 100644 --- a/modules/gpu/src/cuda/bf_match.cu +++ b/modules/gpu/src/cuda/bf_match.cu @@ -45,14 +45,14 @@ #include "opencv2/gpu/device/vec_distance.hpp" #include "opencv2/gpu/device/datamov_utils.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace bf_match + namespace bf_match { /////////////////////////////////////////////////////////////////////////////// // Reduction - template + template __device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, float* s_distance, int* s_trainIdx) { s_distance += threadIdx.y * BLOCK_SIZE; @@ -66,7 +66,7 @@ namespace cv { namespace gpu { namespace device reducePredVal(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less()); } - template + template __device__ void findBestMatch(float& bestDistance, int& bestTrainIdx, int& bestImgIdx, float* s_distance, int* s_trainIdx, int* s_imgIdx) { s_distance += threadIdx.y * BLOCK_SIZE; @@ -85,7 +85,7 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match Unrolled Cached - template + template __device__ void loadQueryToSmem(int queryIdx, const DevMem2D_& query, U* s_query) { #pragma unroll @@ -96,9 +96,9 @@ namespace cv { namespace gpu { namespace device } } - template + template __device__ void loopUnrolledCached(int queryIdx, const DevMem2D_& query,volatile int imgIdx, const DevMem2D_& train, const Mask& mask, - typename Dist::value_type* s_query, typename Dist::value_type* s_train, + typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance, int& bestTrainIdx, int& bestImgIdx) { for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t) @@ -142,7 +142,7 @@ namespace cv { namespace gpu { namespace device } } - template + template __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_ train, const Mask mask, int* bestTrainIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -173,9 +173,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template + void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -190,8 +190,8 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template - __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, + template + __global__ void matchUnrolledCached(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int* bestTrainIdx, int* bestImgIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -232,9 +232,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template + void matchUnrolledCached(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -252,9 +252,9 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match Unrolled - template + template __device__ void loopUnrolled(int queryIdx, const DevMem2D_& query,volatile int imgIdx, const DevMem2D_& train, const Mask& mask, - typename Dist::value_type* s_query, typename Dist::value_type* s_train, + typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance, int& bestTrainIdx, int& bestImgIdx) { for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t) @@ -314,7 +314,7 @@ namespace cv { namespace gpu { namespace device typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE); - + loopUnrolled(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance, myBestTrainIdx, myBestTrainIdx); __syncthreads(); @@ -331,9 +331,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template + void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void matchUnrolled(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, + __global__ void matchUnrolled(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int* bestTrainIdx, int* bestImgIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -364,7 +364,7 @@ namespace cv { namespace gpu { namespace device typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE); Mask m = mask; - + for (int imgIdx = 0; imgIdx < n; ++imgIdx) { const DevMem2D_ train = trains[imgIdx]; @@ -388,9 +388,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template + void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -408,9 +408,9 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match - template + template __device__ void loop(int queryIdx, const DevMem2D_& query, volatile int imgIdx, const DevMem2D_& train, const Mask& mask, - typename Dist::value_type* s_query, typename Dist::value_type* s_train, + typename Dist::value_type* s_query, typename Dist::value_type* s_train, float& bestDistance, int& bestTrainIdx, int& bestImgIdx) { for (int t = 0, endt = (train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; ++t) @@ -469,7 +469,7 @@ namespace cv { namespace gpu { namespace device typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE); - + loop(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance, myBestTrainIdx, myBestTrainIdx); __syncthreads(); @@ -486,9 +486,9 @@ namespace cv { namespace gpu { namespace device } } - template - void match(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template + void match(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -504,7 +504,7 @@ namespace cv { namespace gpu { namespace device } template - __global__ void match(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, + __global__ void match(const DevMem2D_ query, const DevMem2D_* trains, int n, const Mask mask, int* bestTrainIdx, int* bestImgIdx, float* bestDistance) { extern __shared__ int smem[]; @@ -542,9 +542,9 @@ namespace cv { namespace gpu { namespace device } } - template - void match(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template + void match(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -562,9 +562,9 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match dispatcher - template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template + void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream) { if (query.cols <= 64) @@ -580,11 +580,11 @@ namespace cv { namespace gpu { namespace device matchUnrolled<16, 256, Dist>(query, train, mask, trainIdx, distance, stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, train, mask, trainIdx, distance, stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, train, mask, trainIdx, distance, stream); }*/ else @@ -593,9 +593,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template + void matchDispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream) { if (query.cols <= 64) @@ -611,11 +611,11 @@ namespace cv { namespace gpu { namespace device matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); }*/ else @@ -627,20 +627,20 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match caller - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), - trainIdx, distance, + matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), + trainIdx, distance, cc, stream); } else { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), - trainIdx, distance, + matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), + trainIdx, distance, cc, stream); } } @@ -652,20 +652,20 @@ namespace cv { namespace gpu { namespace device template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), - trainIdx, distance, + matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), + trainIdx, distance, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), - trainIdx, distance, + matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), + trainIdx, distance, cc, stream); } } @@ -677,20 +677,20 @@ namespace cv { namespace gpu { namespace device //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), - trainIdx, distance, + matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), SingleMask(mask), + trainIdx, distance, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), - trainIdx, distance, + matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), WithOutMask(), + trainIdx, distance, cc, stream); } } @@ -701,20 +701,20 @@ namespace cv { namespace gpu { namespace device //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream) { if (masks.data) { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), - trainIdx, imgIdx, distance, + matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), + trainIdx, imgIdx, distance, cc, stream); } else { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), - trainIdx, imgIdx, distance, + matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), + trainIdx, imgIdx, distance, cc, stream); } } @@ -726,20 +726,20 @@ namespace cv { namespace gpu { namespace device template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream) { if (masks.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), - trainIdx, imgIdx, distance, + matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), + trainIdx, imgIdx, distance, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), - trainIdx, imgIdx, distance, + matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), + trainIdx, imgIdx, distance, cc, stream); } } @@ -751,20 +751,20 @@ namespace cv { namespace gpu { namespace device //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_& masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream) { if (masks.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), - trainIdx, imgIdx, distance, + matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, MaskCollection(masks.data), + trainIdx, imgIdx, distance, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), - trainIdx, imgIdx, distance, + matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains.ptr(), trains.cols, WithOutMask(), + trainIdx, imgIdx, distance, cc, stream); } } diff --git a/modules/gpu/src/cuda/bf_radius_match.cu b/modules/gpu/src/cuda/bf_radius_match.cu index 39b721a..015ac0b 100644 --- a/modules/gpu/src/cuda/bf_radius_match.cu +++ b/modules/gpu/src/cuda/bf_radius_match.cu @@ -45,9 +45,9 @@ #include "opencv2/gpu/device/vec_distance.hpp" #include "opencv2/gpu/device/datamov_utils.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace bf_radius_match + namespace bf_radius_match { /////////////////////////////////////////////////////////////////////////////// // Match Unrolled @@ -112,8 +112,8 @@ namespace cv { namespace gpu { namespace device #endif } - template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, + template + void matchUnrolled(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -121,17 +121,17 @@ namespace cv { namespace gpu { namespace device const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - matchUnrolled<<>>(query, 0, train, maxDistance, mask, + matchUnrolled<<>>(query, 0, train, maxDistance, mask, trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); - } + } - template - void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template + void matchUnrolled(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -146,12 +146,12 @@ namespace cv { namespace gpu { namespace device if (masks != 0 && masks[i].data) { - matchUnrolled<<>>(query, i, train, maxDistance, SingleMask(masks[i]), + matchUnrolled<<>>(query, i, train, maxDistance, SingleMask(masks[i]), trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); } else { - matchUnrolled<<>>(query, i, train, maxDistance, WithOutMask(), + matchUnrolled<<>>(query, i, train, maxDistance, WithOutMask(), trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); } cudaSafeCall( cudaGetLastError() ); @@ -223,9 +223,9 @@ namespace cv { namespace gpu { namespace device #endif } - template - void match(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template + void match(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -233,7 +233,7 @@ namespace cv { namespace gpu { namespace device const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - match<<>>(query, 0, train, maxDistance, mask, + match<<>>(query, 0, train, maxDistance, mask, trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols); cudaSafeCall( cudaGetLastError() ); @@ -241,9 +241,9 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaDeviceSynchronize() ); } - template - void match(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template + void match(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, cudaStream_t stream) { const dim3 block(BLOCK_SIZE, BLOCK_SIZE); @@ -258,12 +258,12 @@ namespace cv { namespace gpu { namespace device if (masks != 0 && masks[i].data) { - match<<>>(query, i, train, maxDistance, SingleMask(masks[i]), + match<<>>(query, i, train, maxDistance, SingleMask(masks[i]), trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); } else { - match<<>>(query, i, train, maxDistance, WithOutMask(), + match<<>>(query, i, train, maxDistance, WithOutMask(), trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); } cudaSafeCall( cudaGetLastError() ); @@ -276,9 +276,9 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////////////////////// // Match dispatcher - template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template + void matchDispatcher(const DevMem2D_& query, const DevMem2D_& train, float maxDistance, const Mask& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream) { if (query.cols <= 64) @@ -294,11 +294,11 @@ namespace cv { namespace gpu { namespace device matchUnrolled<16, 256, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, train, maxDistance, mask, trainIdx, distance, nMatches, stream); }*/ else @@ -307,9 +307,9 @@ namespace cv { namespace gpu { namespace device } } - template - void matchDispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template + void matchDispatcher(const DevMem2D_& query, const DevMem2D_* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream) { if (query.cols <= 64) @@ -325,36 +325,36 @@ namespace cv { namespace gpu { namespace device matchUnrolled<16, 256, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); }*/ else { match<16, Dist>(query, trains, n, maxDistance, masks, trainIdx, imgIdx, distance, nMatches, stream); } - } + } /////////////////////////////////////////////////////////////////////////////// // Radius Match caller - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), - trainIdx, distance, nMatches, + matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), + trainIdx, distance, nMatches, cc, stream); } else { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), - trainIdx, distance, nMatches, + matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), + trainIdx, distance, nMatches, cc, stream); } } @@ -366,20 +366,20 @@ namespace cv { namespace gpu { namespace device template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); template void matchL1_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), - trainIdx, distance, nMatches, + matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), + trainIdx, distance, nMatches, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), - trainIdx, distance, nMatches, + matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), + trainIdx, distance, nMatches, cc, stream); } } @@ -391,20 +391,20 @@ namespace cv { namespace gpu { namespace device //template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); template void matchL2_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, - const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, + const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream) { if (mask.data) { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), - trainIdx, distance, nMatches, + matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, SingleMask(mask), + trainIdx, distance, nMatches, cc, stream); } else { - matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), - trainIdx, distance, nMatches, + matchDispatcher(static_cast< DevMem2D_ >(query), static_cast< DevMem2D_ >(train), maxDistance, WithOutMask(), + trainIdx, distance, nMatches, cc, stream); } } @@ -415,12 +415,12 @@ namespace cv { namespace gpu { namespace device //template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); template void matchHamming_gpu(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream) { - matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, - trainIdx, imgIdx, distance, nMatches, + matchDispatcher< L1Dist >(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, + trainIdx, imgIdx, distance, nMatches, cc, stream); } @@ -431,12 +431,12 @@ namespace cv { namespace gpu { namespace device template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); template void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream) { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, - trainIdx, imgIdx, distance, nMatches, + matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, + trainIdx, imgIdx, distance, nMatches, cc, stream); } @@ -447,12 +447,12 @@ namespace cv { namespace gpu { namespace device //template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); template void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream); - template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, - const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, + template void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, + const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_& nMatches, int cc, cudaStream_t stream) { - matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, - trainIdx, imgIdx, distance, nMatches, + matchDispatcher(static_cast< DevMem2D_ >(query), (const DevMem2D_*)trains, n, maxDistance, masks, + trainIdx, imgIdx, distance, nMatches, cc, stream); } diff --git a/modules/gpu/src/cuda/bilateral_filter.cu b/modules/gpu/src/cuda/bilateral_filter.cu index b6d13e1..d09268a 100644 --- a/modules/gpu/src/cuda/bilateral_filter.cu +++ b/modules/gpu/src/cuda/bilateral_filter.cu @@ -43,9 +43,9 @@ #include "internal_shared.hpp" #include "opencv2/gpu/device/limits.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace bilateral_filter + namespace bilateral_filter { __constant__ float* ctable_color; __constant__ float* ctable_space; @@ -108,7 +108,7 @@ namespace cv { namespace gpu { namespace device dp[3] = *(disp + (y+1) * disp_step + x + 0); dp[4] = *(disp + (y ) * disp_step + x + 1); - if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc) + if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc) { const int ymin = ::max(0, y - cradius); const int xmin = ::max(0, x - cradius); @@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace device } } - template + template void bilateral_filter_caller(DevMem2D_ disp, DevMem2Db img, int channels, int iters, cudaStream_t stream) { dim3 threads(32, 8, 1); diff --git a/modules/gpu/src/cuda/blend.cu b/modules/gpu/src/cuda/blend.cu index 02e9649..e716edc 100644 --- a/modules/gpu/src/cuda/blend.cu +++ b/modules/gpu/src/cuda/blend.cu @@ -42,9 +42,9 @@ #include "internal_shared.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace blend + namespace blend { template __global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep img1, const PtrStep img2, @@ -62,14 +62,14 @@ namespace cv { namespace gpu { namespace device T p2 = img2.ptr(y)[x]; result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f); } - } + } template void blendLinearCaller(int rows, int cols, int cn, PtrStep img1, PtrStep img2, PtrStepf weights1, PtrStepf weights2, PtrStep result, cudaStream_t stream) { dim3 threads(16, 16); dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y)); - + blendLinearKernel<<>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result); cudaSafeCall( cudaGetLastError() ); @@ -105,12 +105,12 @@ namespace cv { namespace gpu { namespace device { dim3 threads(16, 16); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - + blendLinearKernel8UC4<<>>(rows, cols, img1, img2, weights1, weights2, result); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall(cudaDeviceSynchronize()); } - } // namespace blend + } // namespace blend }}} // namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/cuda/calib3d.cu b/modules/gpu/src/cuda/calib3d.cu index e296aeb..4776c55 100644 --- a/modules/gpu/src/cuda/calib3d.cu +++ b/modules/gpu/src/cuda/calib3d.cu @@ -44,7 +44,7 @@ #include "opencv2/gpu/device/transform.hpp" #include "opencv2/gpu/device/functional.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200 diff --git a/modules/gpu/src/cuda/canny.cu b/modules/gpu/src/cuda/canny.cu index 4c7c5c7..f9dd490 100644 --- a/modules/gpu/src/cuda/canny.cu +++ b/modules/gpu/src/cuda/canny.cu @@ -44,9 +44,9 @@ #include #include "internal_shared.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace canny + namespace canny { __global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols) { @@ -99,7 +99,7 @@ namespace cv { namespace gpu { namespace device } }; - template __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf, + template __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols) { __shared__ int sdx[18][16]; @@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace device } ////////////////////////////////////////////////////////////////////////////////////////// - + #define CANNY_SHIFT 15 #define TG22 (int)(0.4142135623730950488016887242097*(1< high_thresh); } } - + map.ptr(i + 1)[j + 1] = edge_type; } } @@ -270,7 +270,7 @@ namespace cv { namespace gpu { namespace device const int tid = threadIdx.y * 16 + threadIdx.x; const int lx = tid % 18; - const int ly = tid / 18; + const int ly = tid / 18; if (ly < 14) smem[ly][lx] = map.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx]; @@ -294,10 +294,10 @@ namespace cv { namespace gpu { namespace device n += smem[threadIdx.y ][threadIdx.x ] == 2; n += smem[threadIdx.y ][threadIdx.x + 1] == 2; n += smem[threadIdx.y ][threadIdx.x + 2] == 2; - + n += smem[threadIdx.y + 1][threadIdx.x ] == 2; n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2; - + n += smem[threadIdx.y + 2][threadIdx.x ] == 2; n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2; n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2; @@ -318,10 +318,10 @@ namespace cv { namespace gpu { namespace device n += smem[threadIdx.y ][threadIdx.x ] == 1; n += smem[threadIdx.y ][threadIdx.x + 1] == 1; n += smem[threadIdx.y ][threadIdx.x + 2] == 1; - + n += smem[threadIdx.y + 1][threadIdx.x ] == 1; n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1; - + n += smem[threadIdx.y + 2][threadIdx.x ] == 1; n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1; n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1; @@ -361,7 +361,7 @@ namespace cv { namespace gpu { namespace device #if __CUDA_ARCH__ >= 120 const int stack_size = 512; - + __shared__ unsigned int s_counter; __shared__ unsigned int s_ind; __shared__ ushort2 s_st[stack_size]; @@ -404,11 +404,11 @@ namespace cv { namespace gpu { namespace device if (subTaskIdx < portion) pos = s_st[s_counter - 1 - subTaskIdx]; __syncthreads(); - + if (threadIdx.x == 0) s_counter -= portion; __syncthreads(); - + if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows) { pos.x += c_dx[threadIdx.x & 7]; @@ -452,7 +452,7 @@ namespace cv { namespace gpu { namespace device { void* counter_ptr; cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) ); - + unsigned int count; cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu index 5184222..562f3ff 100644 --- a/modules/gpu/src/cuda/color.cu +++ b/modules/gpu/src/cuda/color.cu @@ -45,7 +45,7 @@ #include #include -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits::functor_type) { @@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace device { enum { smart_block_dim_y = 8 }; enum { smart_shift = 4 }; - }; + }; OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits::functor_type) { diff --git a/modules/gpu/src/cuda/column_filter.cu b/modules/gpu/src/cuda/column_filter.cu index d00bec8..624bd3f 100644 --- a/modules/gpu/src/cuda/column_filter.cu +++ b/modules/gpu/src/cuda/column_filter.cu @@ -48,9 +48,9 @@ #include "opencv2/gpu/device/border_interpolate.hpp" #include "opencv2/gpu/device/static_check.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace column_filter + namespace column_filter { #define MAX_KERNEL_SIZE 32 @@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace device const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y); const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK)); - + B brd(src.rows); linearColumnFilter<<>>(src, dst, anchor, brd); @@ -162,7 +162,7 @@ namespace cv { namespace gpu { namespace device { typedef void (*caller_t)(DevMem2D_ src, DevMem2D_ dst, int anchor, int cc, cudaStream_t stream); - static const caller_t callers[5][33] = + static const caller_t callers[5][33] = { { 0, @@ -338,9 +338,9 @@ namespace cv { namespace gpu { namespace device linearColumnFilter_caller<30, T, D, BrdColWrap>, linearColumnFilter_caller<31, T, D, BrdColWrap>, linearColumnFilter_caller<32, T, D, BrdColWrap> - } + } }; - + loadKernel(kernel, ksize); callers[brd_type][ksize]((DevMem2D_)src, (DevMem2D_)dst, anchor, cc, stream); diff --git a/modules/gpu/src/cuda/copy_make_border.cu b/modules/gpu/src/cuda/copy_make_border.cu index 3397672..a54a9b7 100644 --- a/modules/gpu/src/cuda/copy_make_border.cu +++ b/modules/gpu/src/cuda/copy_make_border.cu @@ -43,9 +43,9 @@ #include "internal_shared.hpp" #include "opencv2/gpu/device/border_interpolate.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { - namespace imgproc + namespace imgproc { template __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_ dst, int top, int left) { @@ -58,9 +58,9 @@ namespace cv { namespace gpu { namespace device template