CMakeLists.txt text whitespace=tabwidth=2
*.png binary
-*.jepg binary
+*.jpeg binary
*.jpg binary
*.exr binary
*.ico binary
add_definitions(-DHAVE_CVCONFIG_H)
ocv_include_directories(${OPENCV_CONFIG_FILE_INCLUDE_DIR})
+# ----------------------------------------------------------------------------
+# Path for additional modules
+# ----------------------------------------------------------------------------
+set(OPENCV_EXTRA_MODULES_PATH "" CACHE PATH "Where to look for additional OpenCV modules")
# ----------------------------------------------------------------------------
# Autodetect if we are in a GIT repository
if(NOT ANDROID_TOOLS_Pkg_Revision GREATER 13)
message(WARNING "OpenCV requires Android SDK tools revision 14 or newer. Otherwise tests and samples will no be compiled.")
endif()
-elseif(ANT_EXECUTABLE)
+else()
find_package(JNI)
endif()
endif()
if(ANDROID)
- add_subdirectory(android/service)
+ add_subdirectory(platforms/android/service)
endif()
if(BUILD_ANDROID_PACKAGE)
- add_subdirectory(android/package)
+ add_subdirectory(platforms/android/package)
endif()
if (ANDROID)
- add_subdirectory(android/libinfo)
+ add_subdirectory(platforms/android/libinfo)
endif()
# ----------------------------------------------------------------------------
if(NOT ANDROID)
status(" JNI:" JNI_INCLUDE_DIRS THEN "${JNI_INCLUDE_DIRS}" ELSE NO)
endif()
-status(" Java tests:" BUILD_TESTS AND (NOT ANDROID OR CAN_BUILD_ANDROID_PROJECTS) THEN YES ELSE NO)
+status(" Java tests:" BUILD_TESTS AND (CAN_BUILD_ANDROID_PROJECTS OR HAVE_opencv_java) THEN YES ELSE NO)
# ========================== documentation ==========================
if(BUILD_DOCS)
+++ /dev/null
-We greatly appreciate your support and contributions and they are always welcomed!
-
-Github pull requests are the convenient way to contribute to OpenCV project. Good pull requests have all of these attributes:
-
-* Are scoped to one specific issue
-* Include a test to demonstrate the correctness
-* Update the docs if relevant
-* Match the [coding style guidelines](http://code.opencv.org/projects/opencv/wiki/CodingStyleGuide)
-* Don't messed by "oops" commits
-
-You can find more detailes about contributing process on http://opencv.org/contribute.html
\ No newline at end of file
Online docs: http://docs.opencv.org
Q&A forum: http://answers.opencv.org
Dev zone: http://code.opencv.org
+
+Please read before starting work on a pull request:
+ http://code.opencv.org/projects/opencv/wiki/How_to_contribute
+
+Summary of guidelines:
+
+* One pull request per issue;
+* Choose the right base branch;
+* Include tests and documentation;
+* Clean up "oops" commits before submitting;
+* Follow the coding style guide.
+message(STATUS "Android toolchain was moved to platfroms/android!")
+message(STATUS "This file is depricated and will be removed!")
+
# Copyright (c) 2010-2011, Ethan Rublee
# Copyright (c) 2011-2013, Andrey Kamaev
# All rights reserved.
# - March 2013
# [+] updated for NDK r8e (x86 version)
# [+] support x86_64 version of NDK
+# - April 2013
+# [+] support non-release NDK layouts (from Linaro git and Android git)
+# [~] automatically detect if explicit link to crtbegin_*.o is needed
# ------------------------------------------------------------------------------
cmake_minimum_required( VERSION 2.6.3 )
endif( ANDROID_NDK )
endif( NOT ANDROID_STANDALONE_TOOLCHAIN )
endif( NOT ANDROID_NDK )
+
# remember found paths
if( ANDROID_NDK )
get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE )
- # try to detect change
- if( CMAKE_AR )
- string( LENGTH "${ANDROID_NDK}" __length )
- string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
- if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK )
- message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first.
- " )
- endif()
- unset( __androidNdkPreviousPath )
- unset( __length )
- endif()
set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE )
set( BUILD_WITH_ANDROID_NDK True )
- file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? )
- string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" )
+ if( EXISTS "${ANDROID_NDK}/RELEASE.TXT" )
+ file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? )
+ string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" )
+ else()
+ set( ANDROID_NDK_RELEASE "r1x" )
+ set( ANDROID_NDK_RELEASE_FULL "unreleased" )
+ endif()
elseif( ANDROID_STANDALONE_TOOLCHAIN )
get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE )
# try to detect change
sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" )
endif()
+# android NDK layout
+if( BUILD_WITH_ANDROID_NDK )
+ if( NOT DEFINED ANDROID_NDK_LAYOUT )
+ # try to automatically detect the layout
+ if( EXISTS "${ANDROID_NDK}/RELEASE.TXT")
+ set( ANDROID_NDK_LAYOUT "RELEASE" )
+ elseif( EXISTS "${ANDROID_NDK}/../../linux-x86/toolchain/" )
+ set( ANDROID_NDK_LAYOUT "LINARO" )
+ elseif( EXISTS "${ANDROID_NDK}/../../gcc/" )
+ set( ANDROID_NDK_LAYOUT "ANDROID" )
+ endif()
+ endif()
+ set( ANDROID_NDK_LAYOUT "${ANDROID_NDK_LAYOUT}" CACHE STRING "The inner layout of NDK" )
+ mark_as_advanced( ANDROID_NDK_LAYOUT )
+ if( ANDROID_NDK_LAYOUT STREQUAL "LINARO" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
+ set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../${ANDROID_NDK_HOST_SYSTEM_NAME}/toolchain" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
+ elseif( ANDROID_NDK_LAYOUT STREQUAL "ANDROID" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
+ set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../gcc/${ANDROID_NDK_HOST_SYSTEM_NAME}/arm" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
+ else() # ANDROID_NDK_LAYOUT STREQUAL "RELEASE"
+ set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/toolchains" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME2}" )
+ endif()
+ get_filename_component( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK_TOOLCHAINS_PATH}" ABSOLUTE )
+
+ # try to detect change of NDK
+ if( CMAKE_AR )
+ string( LENGTH "${ANDROID_NDK_TOOLCHAINS_PATH}" __length )
+ string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
+ if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK_TOOLCHAINS_PATH )
+ message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first.
+ " )
+ endif()
+ unset( __androidNdkPreviousPath )
+ unset( __length )
+ endif()
+endif()
+
+
# get all the details about standalone toolchain
if( BUILD_WITH_STANDALONE_TOOLCHAIN )
__DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" )
endif()
endif()
-macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __host_system_name )
+macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath )
foreach( __toolchain ${${__availableToolchainsLst}} )
- if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK}/toolchains/${__toolchain}/prebuilt/" )
+ if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" )
string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" )
else()
set( __gcc_toolchain "${__toolchain}" )
endif()
- __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${__host_system_name}" )
+ __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK_TOOLCHAINS_PATH}/${__gcc_toolchain}${__toolchain_subpath}" )
if( __machine )
- string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?$" __version "${__gcc_toolchain}" )
- string( REGEX MATCH "^[^-]+" __arch "${__gcc_toolchain}" )
+ string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9x]+)?$" __version "${__gcc_toolchain}" )
+ if( __machine MATCHES i686 )
+ set( __arch "x86" )
+ elseif( __machine MATCHES arm )
+ set( __arch "arm" )
+ elseif( __machine MATCHES mipsel )
+ set( __arch "mipsel" )
+ endif()
list( APPEND __availableToolchainMachines "${__machine}" )
list( APPEND __availableToolchainArchs "${__arch}" )
list( APPEND __availableToolchainCompilerVersions "${__version}" )
set( __availableToolchainMachines "" )
set( __availableToolchainArchs "" )
set( __availableToolchainCompilerVersions "" )
- if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK}/toolchains/${ANDROID_TOOLCHAIN_NAME}/" )
+ if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_TOOLCHAIN_NAME}/" )
# do not go through all toolchains if we know the name
set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" )
- __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} )
- if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 )
- __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+ if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
if( __availableToolchains )
- set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
endif()
endif()
endif()
if( NOT __availableToolchains )
- file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" )
+ file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK_TOOLCHAINS_PATH}" "${ANDROID_NDK_TOOLCHAINS_PATH}/*" )
if( __availableToolchains )
list(SORT __availableToolchainsLst) # we need clang to go after gcc
endif()
__LIST_FILTER( __availableToolchainsLst "^[.]" )
__LIST_FILTER( __availableToolchainsLst "llvm" )
- __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} )
- if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 )
- __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+ if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
if( __availableToolchains )
- set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
endif()
endif()
endif()
list( GET __availableToolchainArchs ${__idx} __toolchainArch )
if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion )
+ string( REPLACE "x" "99" __toolchainVersion "${__toolchainVersion}")
if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion )
set( __toolchainMaxVersion "${__toolchainVersion}" )
set( __toolchainIdx ${__idx} )
elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" )
string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}")
string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
- if( NOT EXISTS "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}/bin/clang${TOOL_OS_SUFFIX}" )
+ if( NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}/bin/clang${TOOL_OS_SUFFIX}" )
message( FATAL_ERROR "Could not find the Clang compiler driver" )
endif()
set( ANDROID_COMPILER_IS_CLANG 1 )
- set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+ set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
else()
set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
unset( ANDROID_COMPILER_IS_CLANG CACHE )
# setup paths and STL for NDK
if( BUILD_WITH_ANDROID_NDK )
- set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+ set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" )
if( ANDROID_STL STREQUAL "none" )
endif()
# find libsupc++.a - rtti & exceptions
if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" )
- if( ANDROID_NDK_RELEASE STRGREATER "r8" ) # r8b
- set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
- elseif( NOT ANDROID_NDK_RELEASE STRLESS "r7" AND ANDROID_NDK_RELEASE STRLESS "r8b")
- set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" )
- else( ANDROID_NDK_RELEASE STRLESS "r7" )
+ set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r8b or newer
+ if( NOT EXISTS "${__libsupcxx}" )
+ set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r7-r8
+ endif()
+ if( NOT EXISTS "${__libsupcxx}" ) # before r7
if( ARMEABI_V7A )
if( ANDROID_FORCE_ARM_BUILD )
set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" )
# setup the cross-compiler
if( NOT CMAKE_C_COMPILER )
- if( NDK_CCACHE )
+ if( NDK_CCACHE AND NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
set( CMAKE_C_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" )
set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" )
if( ANDROID_COMPILER_IS_CLANG )
remove_definitions( -DANDROID )
add_definitions( -DANDROID )
-if(ANDROID_SYSROOT MATCHES "[ ;\"]")
- set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
+if( ANDROID_SYSROOT MATCHES "[ ;\"]" )
+ if( CMAKE_HOST_WIN32 )
+ # try to convert path to 8.3 form
+ file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "@echo %~s1" )
+ execute_process( COMMAND "$ENV{ComSpec}" /c "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "${ANDROID_SYSROOT}"
+ OUTPUT_VARIABLE __path OUTPUT_STRIP_TRAILING_WHITESPACE
+ RESULT_VARIABLE __result ERROR_QUIET )
+ if( __result EQUAL 0 )
+ file( TO_CMAKE_PATH "${__path}" ANDROID_SYSROOT )
+ set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
+ else()
+ set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
+ endif()
+ else()
+ set( ANDROID_CXX_FLAGS "'--sysroot=${ANDROID_SYSROOT}'" )
+ endif()
if( NOT _CMAKE_IN_TRY_COMPILE )
- # quotes will break try_compile and compiler identification
- message(WARNING "Your Android system root has non-alphanumeric symbols. It can break compiler features detection and the whole build.")
+ # quotes can break try_compile and compiler identification
+ message(WARNING "Path to your Android NDK (or toolchain) has non-alphanumeric symbols.\nThe build might be broken.\n")
endif()
else()
set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" )
endif()
+if( ANDROID_STL MATCHES "gnustl" AND (EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}") )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+else()
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+endif()
+
# STL
if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" )
- if( ANDROID_STL MATCHES "gnustl" )
- set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
- set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
- set( CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
- else()
- set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
- set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
- set( CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
- endif()
- if ( X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" )
- # workaround "undefined reference to `__dso_handle'" problem
- set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
- set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
- endif()
if( EXISTS "${__libstl}" )
set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" )
set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" )
set( CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
endif()
if( ANDROID_STL MATCHES "gnustl" )
- set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} -lm" )
- set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} -lm" )
- set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lm" )
+ if( NOT EXISTS "${ANDROID_LIBM_PATH}" )
+ set( ANDROID_LIBM_PATH -lm )
+ endif()
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} ${ANDROID_LIBM_PATH}" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} ${ANDROID_LIBM_PATH}" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} ${ANDROID_LIBM_PATH}" )
endif()
endif()
endif()
if( ANDROID_NO_UNDEFINED )
- set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" )
+ if( MIPS )
+ # there is some sysroot-related problem in mips linker...
+ if( NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined -Wl,-rpath-link,${ANDROID_SYSROOT}/usr/lib" )
+ endif()
+ else()
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" )
+ endif()
endif()
if( ANDROID_SO_UNDEFINED )
set( CMAKE_EXE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" )
if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" )
- set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" )
- set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" )
- set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" )
+ set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" )
+ set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" )
+ set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" )
endif()
# configure rtti
include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} )
link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
+# detect if need link crtbegin_so.o explicitly
+if( NOT DEFINED ANDROID_EXPLICIT_CRT_LINK )
+ set( __cmd "${CMAKE_CXX_CREATE_SHARED_LIBRARY}" )
+ string( REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_C_COMPILER>" "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_CXX_FLAGS>" "${CMAKE_CXX_FLAGS}" __cmd "${__cmd}" )
+ string( REPLACE "<LANGUAGE_COMPILE_FLAGS>" "" __cmd "${__cmd}" )
+ string( REPLACE "<LINK_FLAGS>" "${CMAKE_SHARED_LINKER_FLAGS}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS>" "-shared" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG>" "" __cmd "${__cmd}" )
+ string( REPLACE "<TARGET_SONAME>" "" __cmd "${__cmd}" )
+ string( REPLACE "<TARGET>" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain_crtlink_test.so" __cmd "${__cmd}" )
+ string( REPLACE "<OBJECTS>" "\"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" __cmd "${__cmd}" )
+ string( REPLACE "<LINK_LIBRARIES>" "" __cmd "${__cmd}" )
+ separate_arguments( __cmd )
+ foreach( __var ANDROID_NDK ANDROID_NDK_TOOLCHAINS_PATH ANDROID_STANDALONE_TOOLCHAIN )
+ if( ${__var} )
+ set( __tmp "${${__var}}" )
+ separate_arguments( __tmp )
+ string( REPLACE "${__tmp}" "${${__var}}" __cmd "${__cmd}")
+ endif()
+ endforeach()
+ string( REPLACE "'" "" __cmd "${__cmd}" )
+ string( REPLACE "\"" "" __cmd "${__cmd}" )
+ execute_process( COMMAND ${__cmd} RESULT_VARIABLE __cmd_result OUTPUT_QUIET ERROR_QUIET )
+ if( __cmd_result EQUAL 0 )
+ set( ANDROID_EXPLICIT_CRT_LINK ON )
+ else()
+ set( ANDROID_EXPLICIT_CRT_LINK OFF )
+ endif()
+endif()
+
+if( ANDROID_EXPLICIT_CRT_LINK )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+endif()
+
# setup output directories
set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" )
set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" )
foreach( __var NDK_CCACHE LIBRARY_OUTPUT_PATH_ROOT ANDROID_FORBID_SYGWIN ANDROID_SET_OBSOLETE_VARIABLES
ANDROID_NDK_HOST_X64
ANDROID_NDK
+ ANDROID_NDK_LAYOUT
ANDROID_STANDALONE_TOOLCHAIN
ANDROID_TOOLCHAIN_NAME
ANDROID_ABI
ANDROID_GOLD_LINKER
ANDROID_NOEXECSTACK
ANDROID_RELRO
+ ANDROID_LIBM_PATH
+ ANDROID_EXPLICIT_CRT_LINK
)
if( DEFINED ${__var} )
if( "${__var}" MATCHES " ")
# ANDROID_STANDALONE_TOOLCHAIN
# ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain
# ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems)
+# ANDROID_NDK_LAYOUT : the inner NDK structure (RELEASE, LINARO, ANDROID)
# LIBRARY_OUTPUT_PATH_ROOT : <any valid path>
# NDK_CCACHE : <path to your ccache executable>
# Obsolete:
# ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime
# ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used
# ANDROID_CLANG_VERSION : version of clang compiler if clang is used
+# ANDROID_LIBM_PATH : path to libm.so (set to something like $(TOP)/out/target/product/<product_name>/obj/lib/libm.so) to workaround unresolved `sincos`
#
# Defaults:
# ANDROID_DEFAULT_NDK_API_LEVEL
--- /dev/null
+All Android specific sources are moved to platforms/android.
\ No newline at end of file
+++ /dev/null
-@ECHO OFF
-
-:: enable command extensions
-VERIFY BADVALUE 2>NUL
-SETLOCAL ENABLEEXTENSIONS || (ECHO Unable to enable command extensions. & EXIT \B)
-
-:: build environment
-SET SOURCE_DIR=%cd%
-IF EXIST .\android.toolchain.cmake (SET BUILD_OPENCV=1) ELSE (SET BUILD_OPENCV=0)
-IF EXIST .\jni\nul (SET BUILD_JAVA_PART=1) ELSE (SET BUILD_JAVA_PART=0)
-
-:: load configuration
-PUSHD %~dp0
-SET SCRIPTS_DIR=%cd%
-IF EXIST .\wincfg.cmd CALL .\wincfg.cmd
-POPD
-
-:: inherit old names
-IF NOT DEFINED CMAKE SET CMAKE=%CMAKE_EXE%
-IF NOT DEFINED MAKE SET MAKE=%MAKE_EXE%
-
-:: defaults
-IF NOT DEFINED BUILD_DIR SET BUILD_DIR=build
-IF NOT DEFINED ANDROID_ABI SET ANDROID_ABI=armeabi-v7a
-SET OPENCV_BUILD_DIR=%SCRIPTS_DIR%\..\%BUILD_DIR%
-
-:: check that all required variables defined
-PUSHD .
-IF NOT DEFINED ANDROID_NDK (ECHO. & ECHO You should set an environment variable ANDROID_NDK to the full path to your copy of Android NDK & GOTO end)
-(CD "%ANDROID_NDK%") || (ECHO. & ECHO Directory "%ANDROID_NDK%" specified by ANDROID_NDK variable does not exist & GOTO end)
-
-IF NOT EXIST "%CMAKE%" (ECHO. & ECHO You should set an environment variable CMAKE to the full path to cmake executable & GOTO end)
-IF NOT EXIST "%MAKE%" (ECHO. & ECHO You should set an environment variable MAKE to the full path to native port of make executable & GOTO end)
-
-IF NOT %BUILD_JAVA_PART%==1 GOTO required_variables_checked
-
-IF NOT DEFINED ANDROID_SDK (ECHO. & ECHO You should set an environment variable ANDROID_SDK to the full path to your copy of Android SDK & GOTO end)
-(CD "%ANDROID_SDK%" 2>NUL) || (ECHO. & ECHO Directory "%ANDROID_SDK%" specified by ANDROID_SDK variable does not exist & GOTO end)
-
-IF NOT DEFINED ANT_DIR (ECHO. & ECHO You should set an environment variable ANT_DIR to the full path to Apache Ant root & GOTO end)
-(CD "%ANT_DIR%" 2>NUL) || (ECHO. & ECHO Directory "%ANT_DIR%" specified by ANT_DIR variable does not exist & GOTO end)
-
-IF NOT DEFINED JAVA_HOME (ECHO. & ECHO You should set an environment variable JAVA_HOME to the full path to JDK & GOTO end)
-(CD "%JAVA_HOME%" 2>NUL) || (ECHO. & ECHO Directory "%JAVA_HOME%" specified by JAVA_HOME variable does not exist & GOTO end)
-
-:required_variables_checked
-POPD
-
-:: check for ninja
-echo "%MAKE%"|findstr /i ninja >nul:
-IF %errorlevel%==1 (SET BUILD_WITH_NINJA=0) ELSE (SET BUILD_WITH_NINJA=1)
-IF %BUILD_WITH_NINJA%==1 (SET CMAKE_GENERATOR=Ninja) ELSE (SET CMAKE_GENERATOR=MinGW Makefiles)
-
-:: create build dir
-IF DEFINED REBUILD rmdir /S /Q "%BUILD_DIR%" 2>NUL
-MKDIR "%BUILD_DIR%" 2>NUL
-PUSHD "%BUILD_DIR%" || (ECHO. & ECHO Directory "%BUILD_DIR%" is not found & GOTO end)
-
-:: run cmake
-ECHO. & ECHO Runnning cmake...
-ECHO ANDROID_ABI=%ANDROID_ABI%
-ECHO.
-IF NOT %BUILD_OPENCV%==1 GOTO other-cmake
-:opencv-cmake
-("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DCMAKE_TOOLCHAIN_FILE="%SOURCE_DIR%"\android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%\..") && GOTO cmakefin
-ECHO. & ECHO cmake failed & GOTO end
-:other-cmake
-("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DOpenCV_DIR="%OPENCV_BUILD_DIR%" -DCMAKE_TOOLCHAIN_FILE="%OPENCV_BUILD_DIR%\..\android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%") && GOTO cmakefin
-ECHO. & ECHO cmake failed & GOTO end
-:cmakefin
-
-:: run make
-ECHO. & ECHO Building native libs...
-IF %BUILD_WITH_NINJA%==0 ("%MAKE%" -j %NUMBER_OF_PROCESSORS% VERBOSE=%VERBOSE%) || (ECHO. & ECHO make failed & GOTO end)
-IF %BUILD_WITH_NINJA%==1 ("%MAKE%") || (ECHO. & ECHO ninja failed & GOTO end)
-
-IF NOT %BUILD_JAVA_PART%==1 GOTO end
-POPD && PUSHD %SOURCE_DIR%
-
-:: configure java part
-ECHO. & ECHO Updating Android project...
-(CALL "%ANDROID_SDK%\tools\android" update project --name %PROJECT_NAME% --path .) || (ECHO. & ECHO failed to update android project & GOTO end)
-
-:: compile java part
-ECHO. & ECHO Compiling Android project...
-(CALL "%ANT_DIR%\bin\ant" debug) || (ECHO. & ECHO failed to compile android project & GOTO end)
-
-:end
-POPD
-ENDLOCAL
+++ /dev/null
-@ECHO OFF
-
-PUSHD %~dp0..
-CALL .\scripts\build.cmd %* -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
-POPD
\ No newline at end of file
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_armeabi
-cd build_armeabi
-
-cmake -DANDROID_ABI=armeabi -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_mips
-cd build_mips
-
-cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_neon
-cd build_neon
-
-cmake -DANDROID_ABI="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_service
-cd build_service
-
-cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../..
+++ /dev/null
-#!/bin/sh
-
-cd `dirname $0`/..
-
-mkdir -p build_x86
-cd build_x86
-
-cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
-
+++ /dev/null
-:: variables required for OpenCV build ::
-:: Note: all pathes should be specified without tailing slashes!
-SET ANDROID_NDK=C:\full\path\to\your\copy\of\android\NDK\android-ndk-r7b
-SET CMAKE_EXE=C:\full\path\to\cmake\utility\cmake.exe
-SET MAKE_EXE=%ANDROID_NDK%\prebuilt\windows\bin\make.exe
-
-:: variables required for android-opencv build ::
-SET ANDROID_SDK=C:\full\path\to\your\copy\of\android\SDK\android-sdk-windows
-SET ANT_DIR=C:\full\path\to\ant\directory\apache-ant-1.8.2
-SET JAVA_HOME=C:\full\path\to\JDK\jdk1.6.0_25
-
-:: configuration options ::
-:::: general ARM-V7 settings
-SET ANDROID_ABI=armeabi-v7a
-SET BUILD_DIR=build
-
-:::: uncomment following lines to compile for old emulator or old device
-::SET ANDROID_ABI=armeabi
-::SET BUILD_DIR=build_armeabi
-
-:::: uncomment following lines to compile for ARM-V7 with NEON support
-::SET ANDROID_ABI=armeabi-v7a with NEON
-::SET BUILD_DIR=build_neon
-
-:::: uncomment following lines to compile for x86
-::SET ANDROID_ABI=x86
-::SET BUILD_DIR=build_x86
-
-:::: other options
-::SET ANDROID_NATIVE_API_LEVEL=8 &:: android-3 is enough for native part of OpenCV but android-8 is required for Java API
+++ /dev/null
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = _build
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
-
-help:
- @echo "Please use \`make <target>' where <target> is one of"
- @echo " html to make standalone HTML files"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
-
-clean:
- -rm -rf $(BUILDDIR)/*
-
-html:
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-pickle:
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-json:
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OpenCVEngine.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OpenCVEngine.qhc"
-
-latex:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
- "run these through (pdf)latex."
-
-changes:
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
}
-struct FeatureIdxOnlyPrecalc
+struct FeatureIdxOnlyPrecalc : ParallelLoopBody
{
FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
{
idst = _buf->data.i;
is_buf_16u = _is_buf_16u;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
cv::AutoBuffer<float> valCache(sample_count);
float* valCachePtr = (float*)valCache;
- for ( int fi = range.begin(); fi < range.end(); fi++)
+ for ( int fi = range.start; fi < range.end; fi++)
{
for( int si = 0; si < sample_count; si++ )
{
bool is_buf_16u;
};
-struct FeatureValAndIdxPrecalc
+struct FeatureValAndIdxPrecalc : ParallelLoopBody
{
FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
{
idst = _buf->data.i;
is_buf_16u = _is_buf_16u;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
- for ( int fi = range.begin(); fi < range.end(); fi++)
+ for ( int fi = range.start; fi < range.end; fi++)
{
for( int si = 0; si < sample_count; si++ )
{
bool is_buf_16u;
};
-struct FeatureValOnlyPrecalc
+struct FeatureValOnlyPrecalc : ParallelLoopBody
{
FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
{
valCache = _valCache;
sample_count = _sample_count;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
- for ( int fi = range.begin(); fi < range.end(); fi++)
+ for ( int fi = range.start; fi < range.end; fi++)
for( int si = 0; si < sample_count; si++ )
valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
}
int minNum = MIN( numPrecalcVal, numPrecalcIdx);
double proctime = -TIME( 0 );
- parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx),
- FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
- parallel_for( BlockedRange(0, minNum),
- FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
- parallel_for( BlockedRange(minNum, numPrecalcVal),
- FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
+ parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
+ FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
+ parallel_for_( Range(0, minNum),
+ FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
+ parallel_for_( Range(minNum, numPrecalcVal),
+ FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
}
endif()
endmacro()
+# OpenCV fails some tests when 'char' is 'unsigned' by default
+add_extra_compiler_option(-fsigned-char)
+
if(MINGW)
# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40838
# here we are trying to workaround the problem
set(HAVE_CUBLAS 1)
endif()
+ if(${CUDA_VERSION} VERSION_LESS "5.5")
+ find_cuda_helper_libs(npp)
+ else()
+ find_cuda_helper_libs(nppc)
+ find_cuda_helper_libs(nppi)
+ find_cuda_helper_libs(npps)
+ set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
+ endif()
+
if(WITH_NVCUVID)
find_cuda_helper_libs(nvcuvid)
set(HAVE_NVCUVID 1)
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)
- find_cuda_helper_libs(npp)
-
macro(ocv_cuda_compile VAR)
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
set(${var}_backup_in_cuda_compile_ "${${var}}")
endif()
if(NOT HAVE_QT)
- find_package(Qt4)
+ find_package(Qt4 REQUIRED QtCore QtGui QtTest)
if(QT4_FOUND)
set(HAVE_QT TRUE)
add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work
#
# Created: 5 Aug 2011 by Marian Zajko (marian.zajko@ximea.com)
# Updated: 25 June 2012 by Igor Kuzmin (parafin@ximea.com)
+# Updated: 22 October 2012 by Marian Zajko (marian.zajko@ximea.com)
#
set(XIMEA_FOUND)
if(WIN32)
# Try to find the XIMEA API path in registry.
GET_FILENAME_COMPONENT(XIMEA_PATH "[HKEY_CURRENT_USER\\Software\\XIMEA\\CamSupport\\API;Path]" ABSOLUTE)
-
- if(EXISTS XIMEA_PATH)
+
+ if(EXISTS ${XIMEA_PATH})
set(XIMEA_FOUND 1)
# set LIB folders
- set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86")
+ if(CMAKE_CL_64)
+ set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x64")
+ else()
+ set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86")
+ endif()
else()
set(XIMEA_FOUND 0)
endif()
mark_as_advanced(FORCE XIMEA_FOUND)
mark_as_advanced(FORCE XIMEA_PATH)
-mark_as_advanced(FORCE XIMEA_LIBRARY_DIR)
-
+mark_as_advanced(FORCE XIMEA_LIBRARY_DIR)
\ No newline at end of file
endif()
if(ANDROID)
- install(FILES "${OpenCV_SOURCE_DIR}/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/)
+ install(FILES "${OpenCV_SOURCE_DIR}/platforms/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/)
endif()
# --------------------------------------------------------------------------------------------
# collect modules
set(OPENCV_INITIAL_PASS ON)
foreach(__path ${ARGN})
- ocv_get_real_path(__path "${__path}")
+ get_filename_component(__path "${__path}" ABSOLUTE)
list(FIND __directories_observed "${__path}" __pathIdx)
if(__pathIdx GREATER -1)
if(__ocvmodules)
list(SORT __ocvmodules)
foreach(mod ${__ocvmodules})
- ocv_get_real_path(__modpath "${__path}/${mod}")
+ get_filename_component(__modpath "${__path}/${mod}" ABSOLUTE)
if(EXISTS "${__modpath}/CMakeLists.txt")
list(FIND __directories_observed "${__modpath}" __pathIdx)
# ocv_create_module(<extra link dependencies>)
# ocv_create_module(SKIP_LINK)
macro(ocv_create_module)
- add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES})
+ add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES}
+ "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/opencv_modules.hpp")
if(NOT "${ARGN}" STREQUAL "SKIP_LINK")
target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
endmacro()
-# get absolute path with symlinks resolved
-macro(ocv_get_real_path VAR PATHSTR)
- if(CMAKE_VERSION VERSION_LESS 2.8)
- get_filename_component(${VAR} "${PATHSTR}" ABSOLUTE)
- else()
- get_filename_component(${VAR} "${PATHSTR}" REALPATH)
- endif()
-endmacro()
-
-
# convert list of paths to full paths
macro(ocv_convert_to_full_paths VAR)
if(${VAR})
endif()
endforeach()
- file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/android/service/doc/*.rst")
- file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/android/service/doc/*.jpg")
+ file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.rst")
+ file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.jpg")
list(APPEND OPENCV_FILES_REF ${_OPENCV_FILES_REF})
list(APPEND OPENCV_FILES_REF_PICT ${_OPENCV_FILES_REF_PICT})
u'', 'manual'),
('doc/tutorials/tutorials', 'opencv_tutorials.tex', u'The OpenCV Tutorials',
u'', 'manual'),
- ('android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
+ ('platforms/android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
u'', 'manual'),
]
std::vector< DMatch > good_matches;
for( int i = 0; i < descriptors_1.rows; i++ )
- { if( matches[i].distance < 2*min_dist )
+ { if( matches[i].distance <= 2*min_dist )
{ good_matches.push_back( matches[i]); }
}
.. image:: images/Feature_FlannMatcher_Keypoints_Result.jpg
:align: center
:height: 250pt
-
-
-
:maxdepth: 2
modules/refman.rst
- android/refman.rst
+ platforms/android/refman.rst
doc/user_guide/user_guide.rst
doc/tutorials/tutorials.rst
set(OPENCV_MODULES_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
endif()
-ocv_glob_modules(${OPENCV_MODULES_PATH})
+ocv_glob_modules(${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH})
set(OPENCV_MODULE_TYPE STATIC)
ocv_define_module(androidcamera INTERNAL opencv_core log dl)
-ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/android/service/engine/jni/include")
+ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/platforms/android/service/engine/jni/include")
# Android source tree for native camera
SET (ANDROID_SOURCE_TREE "ANDROID_SOURCE_TREE-NOTFOUND" CACHE PATH
transform(points, modif_points, transformation);
}
- class Mutex
- {
- public:
- Mutex() {
- }
- void lock()
- {
-#ifdef HAVE_TBB
- resultsMutex.lock();
-#endif
- }
-
- void unlock()
- {
-#ifdef HAVE_TBB
- resultsMutex.unlock();
-#endif
- }
-
- private:
-#ifdef HAVE_TBB
- tbb::mutex resultsMutex;
-#endif
- };
-
struct CameraParameters
{
void init(Mat _intrinsics, Mat _distCoeffs)
};
-struct FindStereoCorrespInvoker
+struct FindStereoCorrespInvoker : ParallelLoopBody
{
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
Mat& _disp, CvStereoBMState* _state,
validDisparityRect = _validDisparityRect;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
int cols = left->cols, rows = left->rows;
- int _row0 = min(cvRound(range.begin() * rows / nstripes), rows);
- int _row1 = min(cvRound(range.end() * rows / nstripes), rows);
- uchar *ptr = state->slidingSumBuf->data.ptr + range.begin() * stripeBufSize;
+ int _row0 = min(cvRound(range.start * rows / nstripes), rows);
+ int _row1 = min(cvRound(range.end * rows / nstripes), rows);
+ uchar *ptr = state->slidingSumBuf->data.ptr + range.start * stripeBufSize;
int FILTERED = (state->minDisparity - 1)*16;
Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
const bool useShorts = false;
#endif
-#ifdef HAVE_TBB
const double SAD_overhead_coeff = 10.0;
double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
double maxStripeSize = min(max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height);
int nstripes = cvCeil(height / maxStripeSize);
-#else
- const int nstripes = 1;
-#endif
int bufSize = max(bufSize0 * nstripes, max(bufSize1 * 2, bufSize2));
state->minDisparity, state->numberOfDisparities,
state->SADWindowSize);
- parallel_for(BlockedRange(0, nstripes),
- FindStereoCorrespInvoker(left, right, disp, state, nstripes,
- bufSize0, useShorts, validDisparityRect));
+ parallel_for_(Range(0, nstripes),
+ FindStereoCorrespInvoker(left, right, disp, state, nstripes,
+ bufSize0, useShorts, validDisparityRect));
if( state->speckleRange >= 0 && state->speckleWindowSize > 0 )
{
.. ocv:function:: int Mat::depth() const
-The method returns the identifier of the matrix element depth (the type of each individual channel). For example, for a 16-bit signed 3-channel array, the method returns ``CV_16S`` . A complete list of matrix types contains the following values:
+The method returns the identifier of the matrix element depth (the type of each individual channel). For example, for a 16-bit signed element array, the method returns ``CV_16S`` . A complete list of matrix types contains the following values:
* ``CV_8U`` - 8-bit unsigned integers ( ``0..255`` )
~AutoLock() { mutex->unlock(); }
protected:
Mutex* mutex;
+private:
+ AutoLock(const AutoLock&);
+ AutoLock& operator = (const AutoLock&);
};
}
:param compactResult: Parameter used when the mask (or masks) is not empty. If ``compactResult`` is false, the ``matches`` vector has the same size as ``queryDescriptors`` rows. If ``compactResult`` is true, the ``matches`` vector does not contain matches for fully masked-out query descriptors.
- :param maxDistance: Threshold for the distance between matched descriptors.
+ :param maxDistance: Threshold for the distance between matched descriptors. Distance means here metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured in Pixels)!
For each query descriptor, the methods find such training descriptors that the distance between the query descriptor and the training descriptor is equal or smaller than ``maxDistance``. Found matches are returned in the distance increasing order.
}
namespace {
-class GridAdaptedFeatureDetectorInvoker
+class GridAdaptedFeatureDetectorInvoker : public ParallelLoopBody
{
private:
int gridRows_, gridCols_;
const Mat& image_;
const Mat& mask_;
const Ptr<FeatureDetector>& detector_;
-#ifdef HAVE_TBB
- tbb::mutex* kptLock_;
-#endif
+ Mutex* kptLock_;
GridAdaptedFeatureDetectorInvoker& operator=(const GridAdaptedFeatureDetectorInvoker&); // to quiet MSVC
public:
- GridAdaptedFeatureDetectorInvoker(const Ptr<FeatureDetector>& detector, const Mat& image, const Mat& mask, vector<KeyPoint>& keypoints, int maxPerCell, int gridRows, int gridCols
-#ifdef HAVE_TBB
- , tbb::mutex* kptLock
-#endif
- ) : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell),
- keypoints_(keypoints), image_(image), mask_(mask), detector_(detector)
-#ifdef HAVE_TBB
- , kptLock_(kptLock)
-#endif
+ GridAdaptedFeatureDetectorInvoker(const Ptr<FeatureDetector>& detector, const Mat& image, const Mat& mask,
+ vector<KeyPoint>& keypoints, int maxPerCell, int gridRows, int gridCols,
+ cv::Mutex* kptLock)
+ : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell),
+ keypoints_(keypoints), image_(image), mask_(mask), detector_(detector),
+ kptLock_(kptLock)
{
}
- void operator() (const BlockedRange& range) const
+ void operator() (const Range& range) const
{
- for (int i = range.begin(); i < range.end(); ++i)
+ for (int i = range.start; i < range.end; ++i)
{
int celly = i / gridCols_;
int cellx = i - celly * gridCols_;
it->pt.x += col_range.start;
it->pt.y += row_range.start;
}
-#ifdef HAVE_TBB
- tbb::mutex::scoped_lock join_keypoints(*kptLock_);
-#endif
+
+ cv::AutoLock join_keypoints(*kptLock_);
keypoints_.insert( keypoints_.end(), sub_keypoints.begin(), sub_keypoints.end() );
}
}
keypoints.reserve(maxTotalKeypoints);
int maxPerCell = maxTotalKeypoints / (gridRows * gridCols);
-#ifdef HAVE_TBB
- tbb::mutex kptLock;
- cv::parallel_for(cv::BlockedRange(0, gridRows * gridCols),
+ cv::Mutex kptLock;
+ cv::parallel_for_(cv::Range(0, gridRows * gridCols),
GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols, &kptLock));
-#else
- GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols)(cv::BlockedRange(0, gridRows * gridCols));
-#endif
}
/*
set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
if(WITH_NVCUVID)
- set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY})
+ set(cuda_link_libs ${cuda_link_libs} ${CUDA_CUDA_LIBRARY} ${CUDA_nvcuvid_LIBRARY})
endif()
if(WIN32)
return dst;
}
- __device__ __forceinline__ RGB2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
-
- __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_)
- :unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2RGB() {}
+ __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
};
template <> struct RGB2RGB<uchar, 4, 4, 2> : unary_function<uint, uint>
return dst;
}
- __device__ __forceinline__ RGB2RGB():unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_):unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2RGB() {}
+ __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
};
}
return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
}
- __device__ __forceinline__ RGB2RGB5x5():unary_function<uchar3, ushort>(){}
- __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function<uchar3, ushort>(){}
+ __host__ __device__ __forceinline__ RGB2RGB5x5() {}
+ __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
};
template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx,green_bits> : unary_function<uint, ushort>
return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
}
- __device__ __forceinline__ RGB2RGB5x5():unary_function<uint, ushort>(){}
- __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function<uint, ushort>(){}
+ __host__ __device__ __forceinline__ RGB2RGB5x5() {}
+ __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
};
}
RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
return dst;
}
- __device__ __forceinline__ RGB5x52RGB():unary_function<ushort, uchar3>(){}
- __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function<ushort, uchar3>(){}
+ __host__ __device__ __forceinline__ RGB5x52RGB() {}
+ __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
};
RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
return dst;
}
- __device__ __forceinline__ RGB5x52RGB():unary_function<ushort, uint>(){}
- __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function<ushort, uint>(){}
+ __host__ __device__ __forceinline__ RGB5x52RGB() {}
+ __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ Gray2RGB():unary_function<T, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_)
- : unary_function<T, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ Gray2RGB() {}
+ __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
};
template <> struct Gray2RGB<uchar, 4> : unary_function<uchar, uint>
return dst;
}
- __device__ __forceinline__ Gray2RGB():unary_function<uchar, uint>(){}
- __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_):unary_function<uchar, uint>(){}
+ __host__ __device__ __forceinline__ Gray2RGB() {}
+ __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
};
}
return Gray2RGB5x5Converter<green_bits>::cvt(src);
}
- __device__ __forceinline__ Gray2RGB5x5():unary_function<uchar, ushort>(){}
- __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5& other_):unary_function<uchar, ushort>(){}
+ __host__ __device__ __forceinline__ Gray2RGB5x5() {}
+ __host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {}
};
}
{
return RGB5x52GrayConverter<green_bits>::cvt(src);
}
- __device__ __forceinline__ RGB5x52Gray() : unary_function<ushort, uchar>(){}
- __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray& other_) : unary_function<ushort, uchar>(){}
+ __host__ __device__ __forceinline__ RGB5x52Gray() {}
+ __host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {}
};
}
{
return RGB2GrayConvert<bidx>(&src.x);
}
- __device__ __forceinline__ RGB2Gray() : unary_function<typename TypeVec<T, scn>::vec_type, T>(){}
- __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, T>(){}
+ __host__ __device__ __forceinline__ RGB2Gray() {}
+ __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
};
template <int bidx> struct RGB2Gray<uchar, 4, bidx> : unary_function<uint, uchar>
{
return RGB2GrayConvert<bidx>(src);
}
- __device__ __forceinline__ RGB2Gray() : unary_function<uint, uchar>(){}
- __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_) : unary_function<uint, uchar>(){}
+ __host__ __device__ __forceinline__ RGB2Gray() {}
+ __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
};
}
RGB2YUVConvert<bidx>(&src.x, dst);
return dst;
}
- __device__ __forceinline__ RGB2YUV()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2YUV(const RGB2YUV& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2YUV() {}
+ __host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {}
};
}
return dst;
}
- __device__ __forceinline__ YUV2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ YUV2RGB() {}
+ __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
};
template <int bidx> struct YUV2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
{
return YUV2RGBConvert<bidx>(src);
}
- __device__ __forceinline__ YUV2RGB() : unary_function<uint, uint>(){}
- __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ YUV2RGB() {}
+ __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
};
}
RGB2YCrCbConvert<bidx>(&src.x, dst);
return dst;
}
- __device__ __forceinline__ RGB2YCrCb()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2YCrCb() {}
+ __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
};
template <int bidx> struct RGB2YCrCb<uchar, 4, 4, bidx> : unary_function<uint, uint>
return RGB2YCrCbConvert<bidx>(src);
}
- __device__ __forceinline__ RGB2YCrCb() : unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2YCrCb() {}
+ __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
};
}
return dst;
}
- __device__ __forceinline__ YCrCb2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ YCrCb2RGB() {}
+ __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
};
template <int bidx> struct YCrCb2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
{
return YCrCb2RGBConvert<bidx>(src);
}
- __device__ __forceinline__ YCrCb2RGB() : unary_function<uint, uint>(){}
- __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ YCrCb2RGB() {}
+ __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2XYZ()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2XYZ() {}
+ __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
};
template <int bidx> struct RGB2XYZ<uchar, 4, 4, bidx> : unary_function<uint, uint>
{
return RGB2XYZConvert<bidx>(src);
}
- __device__ __forceinline__ RGB2XYZ() : unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2XYZ() {}
+ __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
};
}
return dst;
}
- __device__ __forceinline__ XYZ2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ XYZ2RGB() {}
+ __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
};
template <int bidx> struct XYZ2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
{
return XYZ2RGBConvert<bidx>(src);
}
- __device__ __forceinline__ XYZ2RGB() : unary_function<uint, uint>(){}
- __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ XYZ2RGB() {}
+ __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2HSV()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2HSV() {}
+ __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
};
template <int bidx, int hr> struct RGB2HSV<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
{
return RGB2HSVConvert<bidx, hr>(src);
}
- __device__ __forceinline__ RGB2HSV():unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_):unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2HSV() {}
+ __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
};
}
return dst;
}
- __device__ __forceinline__ HSV2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ HSV2RGB() {}
+ __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
};
template <int bidx, int hr> struct HSV2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
{
return HSV2RGBConvert<bidx, hr>(src);
}
- __device__ __forceinline__ HSV2RGB():unary_function<uint, uint>(){}
- __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_):unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ HSV2RGB() {}
+ __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2HLS()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ RGB2HLS() {}
+ __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
};
template <int bidx, int hr> struct RGB2HLS<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
{
return RGB2HLSConvert<bidx, hr>(src);
}
- __device__ __forceinline__ RGB2HLS() : unary_function<uint, uint>(){}
- __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ RGB2HLS() {}
+ __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
};
}
return dst;
}
- __device__ __forceinline__ HLS2RGB()
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
- __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_)
- : unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
+ __host__ __device__ __forceinline__ HLS2RGB() {}
+ __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
};
template <int bidx, int hr> struct HLS2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
{
return HLS2RGBConvert<bidx, hr>(src);
}
- __device__ __forceinline__ HLS2RGB() : unary_function<uint, uint>(){}
- __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_) : unary_function<uint, uint>(){}
+ __host__ __device__ __forceinline__ HLS2RGB() {}
+ __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2Lab() {}
- __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {}
+ __host__ __device__ __forceinline__ RGB2Lab() {}
+ __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
};
template <int scn, int dcn, bool srgb, int blueIdx>
struct RGB2Lab<float, scn, dcn, srgb, blueIdx>
return dst;
}
- __device__ __forceinline__ RGB2Lab() {}
- __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {}
+ __host__ __device__ __forceinline__ RGB2Lab() {}
+ __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
};
}
return dst;
}
- __device__ __forceinline__ Lab2RGB() {}
- __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {}
+ __host__ __device__ __forceinline__ Lab2RGB() {}
+ __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
};
template <int scn, int dcn, bool srgb, int blueIdx>
struct Lab2RGB<float, scn, dcn, srgb, blueIdx>
return dst;
}
- __device__ __forceinline__ Lab2RGB() {}
- __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {}
+ __host__ __device__ __forceinline__ Lab2RGB() {}
+ __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
};
}
return dst;
}
- __device__ __forceinline__ RGB2Luv() {}
- __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {}
+ __host__ __device__ __forceinline__ RGB2Luv() {}
+ __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
};
template <int scn, int dcn, bool srgb, int blueIdx>
struct RGB2Luv<float, scn, dcn, srgb, blueIdx>
return dst;
}
- __device__ __forceinline__ RGB2Luv() {}
- __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {}
+ __host__ __device__ __forceinline__ RGB2Luv() {}
+ __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
};
}
return dst;
}
- __device__ __forceinline__ Luv2RGB() {}
- __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {}
+ __host__ __device__ __forceinline__ Luv2RGB() {}
+ __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
};
template <int scn, int dcn, bool srgb, int blueIdx>
struct Luv2RGB<float, scn, dcn, srgb, blueIdx>
return dst;
}
- __device__ __forceinline__ Luv2RGB() {}
- __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {}
+ __host__ __device__ __forceinline__ Luv2RGB() {}
+ __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
};
}
{
return a + b;
}
- __device__ __forceinline__ plus(const plus& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ plus():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ plus() {}
+ __host__ __device__ __forceinline__ plus(const plus&) {}
};
template <typename T> struct minus : binary_function<T, T, T>
{
return a - b;
}
- __device__ __forceinline__ minus(const minus& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ minus():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ minus() {}
+ __host__ __device__ __forceinline__ minus(const minus&) {}
};
template <typename T> struct multiplies : binary_function<T, T, T>
{
return a * b;
}
- __device__ __forceinline__ multiplies(const multiplies& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ multiplies():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ multiplies() {}
+ __host__ __device__ __forceinline__ multiplies(const multiplies&) {}
};
template <typename T> struct divides : binary_function<T, T, T>
{
return a / b;
}
- __device__ __forceinline__ divides(const divides& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ divides():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ divides() {}
+ __host__ __device__ __forceinline__ divides(const divides&) {}
};
template <typename T> struct modulus : binary_function<T, T, T>
{
return a % b;
}
- __device__ __forceinline__ modulus(const modulus& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ modulus():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ modulus() {}
+ __host__ __device__ __forceinline__ modulus(const modulus&) {}
};
template <typename T> struct negate : unary_function<T, T>
{
return -a;
}
- __device__ __forceinline__ negate(const negate& other):unary_function<T,T>(){}
- __device__ __forceinline__ negate():unary_function<T,T>(){}
+ __host__ __device__ __forceinline__ negate() {}
+ __host__ __device__ __forceinline__ negate(const negate&) {}
};
// Comparison Operations
{
return a == b;
}
- __device__ __forceinline__ equal_to(const equal_to& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ equal_to():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ equal_to() {}
+ __host__ __device__ __forceinline__ equal_to(const equal_to&) {}
};
template <typename T> struct not_equal_to : binary_function<T, T, bool>
{
return a != b;
}
- __device__ __forceinline__ not_equal_to(const not_equal_to& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ not_equal_to():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ not_equal_to() {}
+ __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
};
template <typename T> struct greater : binary_function<T, T, bool>
{
return a > b;
}
- __device__ __forceinline__ greater(const greater& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ greater():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ greater() {}
+ __host__ __device__ __forceinline__ greater(const greater&) {}
};
template <typename T> struct less : binary_function<T, T, bool>
{
return a < b;
}
- __device__ __forceinline__ less(const less& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ less():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ less() {}
+ __host__ __device__ __forceinline__ less(const less&) {}
};
template <typename T> struct greater_equal : binary_function<T, T, bool>
{
return a >= b;
}
- __device__ __forceinline__ greater_equal(const greater_equal& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ greater_equal():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ greater_equal() {}
+ __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
};
template <typename T> struct less_equal : binary_function<T, T, bool>
{
return a <= b;
}
- __device__ __forceinline__ less_equal(const less_equal& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ less_equal():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ less_equal() {}
+ __host__ __device__ __forceinline__ less_equal(const less_equal&) {}
};
// Logical Operations
{
return a && b;
}
- __device__ __forceinline__ logical_and(const logical_and& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ logical_and():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ logical_and() {}
+ __host__ __device__ __forceinline__ logical_and(const logical_and&) {}
};
template <typename T> struct logical_or : binary_function<T, T, bool>
{
return a || b;
}
- __device__ __forceinline__ logical_or(const logical_or& other):binary_function<T,T,bool>(){}
- __device__ __forceinline__ logical_or():binary_function<T,T,bool>(){}
+ __host__ __device__ __forceinline__ logical_or() {}
+ __host__ __device__ __forceinline__ logical_or(const logical_or&) {}
};
template <typename T> struct logical_not : unary_function<T, bool>
{
return !a;
}
- __device__ __forceinline__ logical_not(const logical_not& other):unary_function<T,bool>(){}
- __device__ __forceinline__ logical_not():unary_function<T,bool>(){}
+ __host__ __device__ __forceinline__ logical_not() {}
+ __host__ __device__ __forceinline__ logical_not(const logical_not&) {}
};
// Bitwise Operations
{
return a & b;
}
- __device__ __forceinline__ bit_and(const bit_and& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ bit_and():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ bit_and() {}
+ __host__ __device__ __forceinline__ bit_and(const bit_and&) {}
};
template <typename T> struct bit_or : binary_function<T, T, T>
{
return a | b;
}
- __device__ __forceinline__ bit_or(const bit_or& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ bit_or():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ bit_or() {}
+ __host__ __device__ __forceinline__ bit_or(const bit_or&) {}
};
template <typename T> struct bit_xor : binary_function<T, T, T>
{
return a ^ b;
}
- __device__ __forceinline__ bit_xor(const bit_xor& other):binary_function<T,T,T>(){}
- __device__ __forceinline__ bit_xor():binary_function<T,T,T>(){}
+ __host__ __device__ __forceinline__ bit_xor() {}
+ __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
};
template <typename T> struct bit_not : unary_function<T, T>
{
return ~v;
}
- __device__ __forceinline__ bit_not(const bit_not& other):unary_function<T,T>(){}
- __device__ __forceinline__ bit_not():unary_function<T,T>(){}
+ __host__ __device__ __forceinline__ bit_not() {}
+ __host__ __device__ __forceinline__ bit_not(const bit_not&) {}
};
// Generalized Identity Operations
{
return x;
}
- __device__ __forceinline__ identity(const identity& other):unary_function<T,T>(){}
- __device__ __forceinline__ identity():unary_function<T,T>(){}
+ __host__ __device__ __forceinline__ identity() {}
+ __host__ __device__ __forceinline__ identity(const identity&) {}
};
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
{
return lhs;
}
- __device__ __forceinline__ project1st(const project1st& other):binary_function<T1,T2,T1>(){}
- __device__ __forceinline__ project1st():binary_function<T1,T2,T1>(){}
+ __host__ __device__ __forceinline__ project1st() {}
+ __host__ __device__ __forceinline__ project1st(const project1st&) {}
};
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
{
return rhs;
}
- __device__ __forceinline__ project2nd(const project2nd& other):binary_function<T1,T2,T2>(){}
- __device__ __forceinline__ project2nd():binary_function<T1,T2,T2>(){}
+ __host__ __device__ __forceinline__ project2nd() {}
+ __host__ __device__ __forceinline__ project2nd(const project2nd&) {}
};
// Min/Max Operations
template <> struct name<type> : binary_function<type, type, type> \
{ \
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
- __device__ __forceinline__ name() {}\
- __device__ __forceinline__ name(const name&) {}\
+ __host__ __device__ __forceinline__ name() {}\
+ __host__ __device__ __forceinline__ name(const name&) {}\
};
template <typename T> struct maximum : binary_function<T, T, T>
{
return max(lhs, rhs);
}
- __device__ __forceinline__ maximum() {}
- __device__ __forceinline__ maximum(const maximum&) {}
+ __host__ __device__ __forceinline__ maximum() {}
+ __host__ __device__ __forceinline__ maximum(const maximum&) {}
};
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
{
return min(lhs, rhs);
}
- __device__ __forceinline__ minimum() {}
- __device__ __forceinline__ minimum(const minimum&) {}
+ __host__ __device__ __forceinline__ minimum() {}
+ __host__ __device__ __forceinline__ minimum(const minimum&) {}
};
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
#undef OPENCV_GPU_IMPLEMENT_MINMAX
// Math functions
-///bound=========================================
template <typename T> struct abs_func : unary_function<T, T>
{
return abs(x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
{
return x;
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<signed char> : unary_function<signed char, signed char>
{
return ::abs((int)x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<char> : unary_function<char, char>
{
return ::abs((int)x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
{
return x;
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<short> : unary_function<short, short>
{
return ::abs((int)x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
{
return x;
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<int> : unary_function<int, int>
{
return ::abs(x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<float> : unary_function<float, float>
{
return ::fabsf(x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
template <> struct abs_func<double> : unary_function<double, double>
{
return ::fabs(x);
}
- __device__ __forceinline__ abs_func() {}
- __device__ __forceinline__ abs_func(const abs_func&) {}
+ __host__ __device__ __forceinline__ abs_func() {}
+ __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
};
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \
{ \
return func ## f(v); \
} \
- __device__ __forceinline__ name ## _func() {} \
- __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+ __host__ __device__ __forceinline__ name ## _func() {} \
+ __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
}; \
template <> struct name ## _func<double> : unary_function<double, double> \
{ \
{ \
return func(v); \
} \
- __device__ __forceinline__ name ## _func() {} \
- __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+ __host__ __device__ __forceinline__ name ## _func() {} \
+ __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
};
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
{ \
return func ## f(v1, v2); \
} \
+ __host__ __device__ __forceinline__ name ## _func() {} \
+ __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
}; \
template <> struct name ## _func<double> : binary_function<double, double, double> \
{ \
{ \
return func(v1, v2); \
} \
+ __host__ __device__ __forceinline__ name ## _func() {} \
+ __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
};
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
{
return src1 * src1 + src2 * src2;
}
- __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func& other) : binary_function<T, T, float>(){}
- __device__ __forceinline__ hypot_sqr_func() : binary_function<T, T, float>(){}
+ __host__ __device__ __forceinline__ hypot_sqr_func() {}
+ __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
};
// Saturate Cast Functor
{
return saturate_cast<D>(v);
}
- __device__ __forceinline__ saturate_cast_func(const saturate_cast_func& other):unary_function<T, D>(){}
- __device__ __forceinline__ saturate_cast_func():unary_function<T, D>(){}
+ __host__ __device__ __forceinline__ saturate_cast_func() {}
+ __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
};
// Threshold Functors
return (src > thresh) * maxVal;
}
- __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
- : unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
-
- __device__ __forceinline__ thresh_binary_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_binary_func() {}
+ __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
+ : thresh(other.thresh), maxVal(other.maxVal) {}
const T thresh;
const T maxVal;
return (src <= thresh) * maxVal;
}
- __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
- : unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
-
- __device__ __forceinline__ thresh_binary_inv_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_binary_inv_func() {}
+ __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
+ : thresh(other.thresh), maxVal(other.maxVal) {}
const T thresh;
const T maxVal;
return minimum<T>()(src, thresh);
}
- __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
- : unary_function<T, T>(), thresh(other.thresh){}
-
- __device__ __forceinline__ thresh_trunc_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_trunc_func() {}
+ __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
+ : thresh(other.thresh) {}
const T thresh;
};
{
return (src > thresh) * src;
}
- __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
- : unary_function<T, T>(), thresh(other.thresh){}
- __device__ __forceinline__ thresh_to_zero_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_to_zero_func() {}
+ __host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
+ : thresh(other.thresh) {}
const T thresh;
};
{
return (src <= thresh) * src;
}
- __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
- : unary_function<T, T>(), thresh(other.thresh){}
- __device__ __forceinline__ thresh_to_zero_inv_func():unary_function<T, T>(){}
+ __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
+ __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
+ : thresh(other.thresh) {}
const T thresh;
};
-//bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============>
+
// Function Object Adaptors
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
{
return !pred(x);
}
- __device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function<typename Predicate::argument_type, bool>(){}
- __device__ __forceinline__ unary_negate() : unary_function<typename Predicate::argument_type, bool>(){}
+ __host__ __device__ __forceinline__ unary_negate() {}
+ __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
const Predicate pred;
};
{
return !pred(x,y);
}
- __device__ __forceinline__ binary_negate(const binary_negate& other)
- : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
- __device__ __forceinline__ binary_negate() :
- binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
+ __host__ __device__ __forceinline__ binary_negate() {}
+ __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
const Predicate pred;
};
return op(arg1, a);
}
- __device__ __forceinline__ binder1st(const binder1st& other) :
- unary_function<typename Op::second_argument_type, typename Op::result_type>(){}
+ __host__ __device__ __forceinline__ binder1st() {}
+ __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
const Op op;
const typename Op::first_argument_type arg1;
return op(a, arg2);
}
- __device__ __forceinline__ binder2nd(const binder2nd& other) :
- unary_function<typename Op::first_argument_type, typename Op::result_type>(), op(other.op), arg2(other.arg2){}
+ __host__ __device__ __forceinline__ binder2nd() {}
+ __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
const Op op;
const typename Op::second_argument_type arg2;
struct WithOutMask
{
- __device__ __forceinline__ WithOutMask(){}
- __device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
+ __host__ __device__ __forceinline__ WithOutMask(){}
+ __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
__device__ __forceinline__ void next() const
{
#ifndef __OPENCV_GPU_VECMATH_HPP__
#define __OPENCV_GPU_VECMATH_HPP__
-#include "saturate_cast.hpp"
#include "vec_traits.hpp"
-#include "functional.hpp"
+#include "saturate_cast.hpp"
namespace cv { namespace gpu { namespace device
{
- namespace vec_math_detail
+
+// saturate_cast
+
+namespace vec_math_detail
+{
+ template <int cn, typename VecD> struct SatCastHelper;
+ template <typename VecD> struct SatCastHelper<1, VecD>
{
- template <int cn, typename VecD> struct SatCastHelper;
- template <typename VecD> struct SatCastHelper<1, VecD>
+ template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
- template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
- {
- typedef typename VecTraits<VecD>::elem_type D;
- return VecTraits<VecD>::make(saturate_cast<D>(v.x));
- }
- };
- template <typename VecD> struct SatCastHelper<2, VecD>
- {
- template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
- {
- typedef typename VecTraits<VecD>::elem_type D;
- return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
- }
- };
- template <typename VecD> struct SatCastHelper<3, VecD>
+ typedef typename VecTraits<VecD>::elem_type D;
+ return VecTraits<VecD>::make(saturate_cast<D>(v.x));
+ }
+ };
+ template <typename VecD> struct SatCastHelper<2, VecD>
+ {
+ template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
- template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
- {
- typedef typename VecTraits<VecD>::elem_type D;
- return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
- }
- };
- template <typename VecD> struct SatCastHelper<4, VecD>
+ typedef typename VecTraits<VecD>::elem_type D;
+ return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
+ }
+ };
+ template <typename VecD> struct SatCastHelper<3, VecD>
+ {
+ template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
- template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
- {
- typedef typename VecTraits<VecD>::elem_type D;
- return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
- }
- };
-
- template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v)
+ typedef typename VecTraits<VecD>::elem_type D;
+ return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
+ }
+ };
+ template <typename VecD> struct SatCastHelper<4, VecD>
+ {
+ template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
{
- return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
+ typedef typename VecTraits<VecD>::elem_type D;
+ return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
}
+ };
+
+ template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
+ {
+ return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
}
+}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
- template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);}
-
-#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
- __device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
- { \
- func<type> f; \
- return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x)); \
+// unary operators
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
+ __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
+ { \
+ return VecTraits<output_type ## 1>::make(op (a.x)); \
} \
- __device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a) \
+ __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
{ \
- func<type> f; \
- return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \
+ return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
} \
- __device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a) \
+ __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
{ \
- func<type> f; \
- return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \
+ return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
} \
- __device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a) \
+ __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
{ \
- func<type> f; \
- return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
+ return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
}
- namespace vec_math_detail
- {
- template <typename T1, typename T2> struct BinOpTraits
- {
- typedef int argument_type;
- };
- template <typename T> struct BinOpTraits<T, T>
- {
- typedef T argument_type;
- };
- template <typename T> struct BinOpTraits<T, double>
- {
- typedef double argument_type;
- };
- template <typename T> struct BinOpTraits<double, T>
- {
- typedef double argument_type;
- };
- template <> struct BinOpTraits<double, double>
- {
- typedef double argument_type;
- };
- template <typename T> struct BinOpTraits<T, float>
- {
- typedef float argument_type;
- };
- template <typename T> struct BinOpTraits<float, T>
- {
- typedef float argument_type;
- };
- template <> struct BinOpTraits<float, float>
- {
- typedef float argument_type;
- };
- template <> struct BinOpTraits<double, float>
- {
- typedef double argument_type;
- };
- template <> struct BinOpTraits<float, double>
- {
- typedef double argument_type;
- };
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
+
+// unary functions
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
+ __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
+ { \
+ return VecTraits<output_type ## 1>::make(func (a.x)); \
+ } \
+ __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
+ { \
+ return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
+ } \
+ __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
+ { \
+ return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
+ } \
+ __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
+ { \
+ return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
}
-#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
- __device__ __forceinline__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
+
+// binary operators (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
+ __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
{ \
- func<type> f; \
- return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
+ return VecTraits<output_type ## 1>::make(a.x op b.x); \
} \
- template <typename T> \
- __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
+ __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
{ \
- func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
- return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
+ return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
} \
- template <typename T> \
- __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
+ __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
{ \
- func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
- return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
+ return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
} \
- __device__ __forceinline__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
+ __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
{ \
- func<type> f; \
- return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
+ return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
+ }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
+
+// binary operators (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
+ __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
+ { \
+ return VecTraits<output_type ## 1>::make(a.x op s); \
+ } \
+ __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
+ { \
+ return VecTraits<output_type ## 1>::make(s op b.x); \
+ } \
+ __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
+ { \
+ return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
+ } \
+ __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
+ { \
+ return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
+ } \
+ __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
+ { \
+ return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
+ } \
+ __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
+ { \
+ return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
+ } \
+ __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
+ { \
+ return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
+ } \
+ __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
+ { \
+ return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
+ }
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
+
+// binary function (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
+ __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
+ { \
+ return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
+ } \
+ __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
+ { \
+ return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
} \
- template <typename T> \
- __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
+ __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
{ \
- func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
- return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
+ return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
} \
- template <typename T> \
- __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
+ __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
{ \
- func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
- return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
+ return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
+ }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
+
+// binary function (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
+ __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
+ { \
+ return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
+ } \
+ __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
+ { \
+ return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
} \
- __device__ __forceinline__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
+ __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
{ \
- func<type> f; \
- return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
+ return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
} \
- template <typename T> \
- __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
+ __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
{ \
- func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
- return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
+ return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
} \
- template <typename T> \
- __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
+ __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
{ \
- func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
- return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
+ return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
} \
- __device__ __forceinline__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
+ __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
{ \
- func<type> f; \
- return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
+ return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
} \
- template <typename T> \
- __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
+ __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
{ \
- func<typename vec_math_detail::BinOpTraits<type, T>::argument_type> f; \
- return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
+ return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
} \
- template <typename T> \
- __device__ __forceinline__ typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
+ __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
{ \
- func<typename vec_math_detail::BinOpTraits<T, type>::argument_type> f; \
- return VecTraits<typename TypeVec<typename func<typename vec_math_detail::BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
+ return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
}
-#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, abs, abs_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func)
-
-#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \
- OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \
- OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
- OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
-
- OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
- OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
- OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
- OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
- OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
- OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
- OPENCV_GPU_IMPLEMENT_VEC_OP(float)
- OPENCV_GPU_IMPLEMENT_VEC_OP(double)
-
- #undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
- #undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
- #undef OPENCV_GPU_IMPLEMENT_VEC_OP
- #undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
+
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_VECMATH_HPP__
TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
- GPU_SANITY_CHECK(dst);
+ GPU_SANITY_CHECK(dst, 1);
}
else
{
TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
- GPU_SANITY_CHECK(newFrame);
+ GPU_SANITY_CHECK(newFrame, 1e-4);
}
else
{
TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
- GPU_SANITY_CHECK(vertex);
+ GPU_SANITY_CHECK(vertex, 1e-6);
GPU_SANITY_CHECK(colors);
}
else
TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
- GPU_SANITY_CHECK(u);
- GPU_SANITY_CHECK(v);
+ GPU_SANITY_CHECK(u, 1e-1);
+ GPU_SANITY_CHECK(v, 1e-1);
}
else
{
}
// Computes rotation, translation pair for small subsets if the input data
- class TransformHypothesesGenerator
+ class TransformHypothesesGenerator : public ParallelLoopBody
{
public:
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
transl_vectors(transl_vectors_) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
// Input data for generation of the current hypothesis
vector<int> subset_indices(subset_size);
Mat rot_mat(3, 3, CV_64F);
Mat transl_vec(1, 3, CV_64F);
- for (int iter = range.begin(); iter < range.end(); ++iter)
+ for (int iter = range.start; iter < range.end; ++iter)
{
selectRandom(subset_size, num_points, subset_indices);
for (int i = 0; i < subset_size; ++i)
// Generate set of hypotheses using small subsets of the input data
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
num_points, subset_size, rot_matrices, transl_vectors);
- parallel_for(BlockedRange(0, num_iters), body);
+ parallel_for_(Range(0, num_iters), body);
// Compute scores (i.e. number of inliers) for each hypothesis
GpuMat d_object(object);
GpuMat dclassified(1, 1, CV_32S);
cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
- PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize);
+ PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize);
while (level.isFeasible(maxObjectSize))
{
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
}
- __device__ __forceinline__ TransformOp() {}
- __device__ __forceinline__ TransformOp(const TransformOp&) {}
+ __host__ __device__ __forceinline__ TransformOp() {}
+ __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {}
};
void call(const PtrStepSz<float3> src, const float* rot,
(cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
(cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
}
- __device__ __forceinline__ ProjectOp() {}
- __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
+ __host__ __device__ __forceinline__ ProjectOp() {}
+ __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
};
void call(const PtrStepSz<float3> src, const float* rot,
return ::abs(x) + ::abs(y);
}
- __device__ __forceinline__ L1() {}
- __device__ __forceinline__ L1(const L1&) {}
+ __host__ __device__ __forceinline__ L1() {}
+ __host__ __device__ __forceinline__ L1(const L1&) {}
};
struct L2 : binary_function<int, int, float>
{
return ::sqrtf(x * x + y * y);
}
- __device__ __forceinline__ L2() {}
- __device__ __forceinline__ L2(const L2&) {}
+ __host__ __device__ __forceinline__ L2() {}
+ __host__ __device__ __forceinline__ L2(const L2&) {}
};
}
return (uchar)(-(e >> 1));
}
- __device__ __forceinline__ GetEdges() {}
- __device__ __forceinline__ GetEdges(const GetEdges&) {}
+ __host__ __device__ __forceinline__ GetEdges() {}
+ __host__ __device__ __forceinline__ GetEdges(const GetEdges&) {}
};
}
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{
- I d = a - b;
+ I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z;
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{
- I d = a - b;
+ I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z &&
return vadd4(a, b);
}
- __device__ __forceinline__ VAdd4() {}
- __device__ __forceinline__ VAdd4(const VAdd4& other) {}
+ __host__ __device__ __forceinline__ VAdd4() {}
+ __host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
};
////////////////////////////////////
return vadd2(a, b);
}
- __device__ __forceinline__ VAdd2() {}
- __device__ __forceinline__ VAdd2(const VAdd2& other) {}
+ __host__ __device__ __forceinline__ VAdd2() {}
+ __host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
};
////////////////////////////////////
return saturate_cast<D>(a + b);
}
- __device__ __forceinline__ AddMat() {}
- __device__ __forceinline__ AddMat(const AddMat& other) {}
+ __host__ __device__ __forceinline__ AddMat() {}
+ __host__ __device__ __forceinline__ AddMat(const AddMat&) {}
};
}
return vsub4(a, b);
}
- __device__ __forceinline__ VSub4() {}
- __device__ __forceinline__ VSub4(const VSub4& other) {}
+ __host__ __device__ __forceinline__ VSub4() {}
+ __host__ __device__ __forceinline__ VSub4(const VSub4&) {}
};
////////////////////////////////////
return vsub2(a, b);
}
- __device__ __forceinline__ VSub2() {}
- __device__ __forceinline__ VSub2(const VSub2& other) {}
+ __host__ __device__ __forceinline__ VSub2() {}
+ __host__ __device__ __forceinline__ VSub2(const VSub2&) {}
};
////////////////////////////////////
return saturate_cast<D>(a - b);
}
- __device__ __forceinline__ SubMat() {}
- __device__ __forceinline__ SubMat(const SubMat& other) {}
+ __host__ __device__ __forceinline__ SubMat() {}
+ __host__ __device__ __forceinline__ SubMat(const SubMat&) {}
};
}
return res;
}
- __device__ __forceinline__ Mul_8uc4_32f() {}
- __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
+ __host__ __device__ __forceinline__ Mul_8uc4_32f() {}
+ __host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
};
struct Mul_16sc4_32f : binary_function<short4, float, short4>
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
}
- __device__ __forceinline__ Mul_16sc4_32f() {}
- __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
+ __host__ __device__ __forceinline__ Mul_16sc4_32f() {}
+ __host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
};
template <typename T, typename D> struct Mul : binary_function<T, T, D>
return saturate_cast<D>(a * b);
}
- __device__ __forceinline__ Mul() {}
- __device__ __forceinline__ Mul(const Mul& other) {}
+ __host__ __device__ __forceinline__ Mul() {}
+ __host__ __device__ __forceinline__ Mul(const Mul&) {}
};
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
return b != 0 ? saturate_cast<D>(a / b) : 0;
}
- __device__ __forceinline__ Div() {}
- __device__ __forceinline__ Div(const Div& other) {}
+ __host__ __device__ __forceinline__ Div() {}
+ __host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, float> : binary_function<T, T, float>
{
return b != 0 ? static_cast<float>(a) / b : 0;
}
- __device__ __forceinline__ Div() {}
- __device__ __forceinline__ Div(const Div& other) {}
+ __host__ __device__ __forceinline__ Div() {}
+ __host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, double> : binary_function<T, T, double>
{
return b != 0 ? static_cast<double>(a) / b : 0;
}
- __device__ __forceinline__ Div() {}
- __device__ __forceinline__ Div(const Div& other) {}
+ __host__ __device__ __forceinline__ Div() {}
+ __host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
return vabsdiff4(a, b);
}
- __device__ __forceinline__ VAbsDiff4() {}
- __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
+ __host__ __device__ __forceinline__ VAbsDiff4() {}
+ __host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
};
////////////////////////////////////
return vabsdiff2(a, b);
}
- __device__ __forceinline__ VAbsDiff2() {}
- __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
+ __host__ __device__ __forceinline__ VAbsDiff2() {}
+ __host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
};
////////////////////////////////////
return saturate_cast<T>(_abs(a - b));
}
- __device__ __forceinline__ AbsDiffMat() {}
- __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
+ __host__ __device__ __forceinline__ AbsDiffMat() {}
+ __host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
};
}
return saturate_cast<T>(x * x);
}
- __device__ __forceinline__ Sqr() {}
- __device__ __forceinline__ Sqr(const Sqr& other) {}
+ __host__ __device__ __forceinline__ Sqr() {}
+ __host__ __device__ __forceinline__ Sqr(const Sqr&) {}
};
}
return saturate_cast<T>(f(x));
}
- __device__ __forceinline__ Exp() {}
- __device__ __forceinline__ Exp(const Exp& other) {}
+ __host__ __device__ __forceinline__ Exp() {}
+ __host__ __device__ __forceinline__ Exp(const Exp&) {}
};
}
return vcmpeq4(a, b);
}
- __device__ __forceinline__ VCmpEq4() {}
- __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
+ __host__ __device__ __forceinline__ VCmpEq4() {}
+ __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
};
struct VCmpNe4 : binary_function<uint, uint, uint>
{
return vcmpne4(a, b);
}
- __device__ __forceinline__ VCmpNe4() {}
- __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
+ __host__ __device__ __forceinline__ VCmpNe4() {}
+ __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
};
struct VCmpLt4 : binary_function<uint, uint, uint>
{
return vcmplt4(a, b);
}
- __device__ __forceinline__ VCmpLt4() {}
- __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
+ __host__ __device__ __forceinline__ VCmpLt4() {}
+ __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
};
struct VCmpLe4 : binary_function<uint, uint, uint>
{
return vcmple4(a, b);
}
- __device__ __forceinline__ VCmpLe4() {}
- __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
+ __host__ __device__ __forceinline__ VCmpLe4() {}
+ __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
};
////////////////////////////////////
return vmin4(a, b);
}
- __device__ __forceinline__ VMin4() {}
- __device__ __forceinline__ VMin4(const VMin4& other) {}
+ __host__ __device__ __forceinline__ VMin4() {}
+ __host__ __device__ __forceinline__ VMin4(const VMin4&) {}
};
////////////////////////////////////
return vmin2(a, b);
}
- __device__ __forceinline__ VMin2() {}
- __device__ __forceinline__ VMin2(const VMin2& other) {}
+ __host__ __device__ __forceinline__ VMin2() {}
+ __host__ __device__ __forceinline__ VMin2(const VMin2&) {}
};
}
return vmax4(a, b);
}
- __device__ __forceinline__ VMax4() {}
- __device__ __forceinline__ VMax4(const VMax4& other) {}
+ __host__ __device__ __forceinline__ VMax4() {}
+ __host__ __device__ __forceinline__ VMax4(const VMax4&) {}
};
////////////////////////////////////
return vmax2(a, b);
}
- __device__ __forceinline__ VMax2() {}
- __device__ __forceinline__ VMax2(const VMax2& other) {}
+ __host__ __device__ __forceinline__ VMax2() {}
+ __host__ __device__ __forceinline__ VMax2(const VMax2&) {}
};
}
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/emulation.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
+#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/dynamic_smem.hpp"
const int ind = ::atomicAdd(r_sizes + n, 1);
if (ind < maxSize)
- r_table(n, ind) = p - templCenter;
+ r_table(n, ind) = saturate_cast<short2>(p - templCenter);
}
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
for (int j = 0; j < r_row_size; ++j)
{
- short2 c = p - r_row[j];
+ int2 c = p - r_row[j];
c.x = __float2int_rn(c.x * idp);
c.y = __float2int_rn(c.y * idp);
const ErrorEntry npp_errors [] =
{
- error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
- error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
- error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
-
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif
+#if NPP_VERSION < 5500
error_entry( NPP_BAD_ARG_ERROR ),
- error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
- error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ),
- error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
- error_entry( NPP_NOT_EVEN_STEP_ERROR ),
- error_entry( NPP_INTERPOLATION_ERROR ),
- error_entry( NPP_RESIZE_FACTOR_ERROR ),
- error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ),
- error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ),
+ error_entry( NPP_POINTER_ERROR ),
+ error_entry( NPP_WARNING ),
+ error_entry( NPP_ODD_ROI_WARNING ),
+#else
+ error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
+ error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
+ error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
+ error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
+ error_entry( NPP_MEMFREE_ERROR ),
+ error_entry( NPP_MEMSET_ERROR ),
+ error_entry( NPP_QUALITY_INDEX_ERROR ),
+ error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
+ error_entry( NPP_CHANNEL_ORDER_ERROR ),
+ error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
+ error_entry( NPP_QUADRANGLE_ERROR ),
+ error_entry( NPP_RECTANGLE_ERROR ),
+ error_entry( NPP_COEFFICIENT_ERROR ),
+ error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
+ error_entry( NPP_COI_ERROR ),
+ error_entry( NPP_DIVISOR_ERROR ),
+ error_entry( NPP_CHANNEL_ERROR ),
+ error_entry( NPP_STRIDE_ERROR ),
+ error_entry( NPP_ANCHOR_ERROR ),
+ error_entry( NPP_MASK_SIZE_ERROR ),
+ error_entry( NPP_MIRROR_FLIP_ERROR ),
+ error_entry( NPP_MOMENT_00_ZERO_ERROR ),
+ error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
+ error_entry( NPP_THRESHOLD_ERROR ),
+ error_entry( NPP_CONTEXT_MATCH_ERROR ),
+ error_entry( NPP_FFT_FLAG_ERROR ),
+ error_entry( NPP_FFT_ORDER_ERROR ),
+ error_entry( NPP_SCALE_RANGE_ERROR ),
+ error_entry( NPP_DATA_TYPE_ERROR ),
+ error_entry( NPP_OUT_OFF_RANGE_ERROR ),
+ error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
+ error_entry( NPP_MEMORY_ALLOCATION_ERR ),
+ error_entry( NPP_RANGE_ERROR ),
+ error_entry( NPP_BAD_ARGUMENT_ERROR ),
+ error_entry( NPP_NO_MEMORY_ERROR ),
+ error_entry( NPP_ERROR_RESERVED ),
+ error_entry( NPP_NO_OPERATION_WARNING ),
+ error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
+ error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
+#endif
+
+ error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
+ error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
+ error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
+ error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
+ error_entry( NPP_TEXTURE_BIND_ERROR ),
+ error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
+ error_entry( NPP_NOT_EVEN_STEP_ERROR ),
+ error_entry( NPP_INTERPOLATION_ERROR ),
+ error_entry( NPP_RESIZE_FACTOR_ERROR ),
+ error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
+ error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ),
- error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ),
- error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
- error_entry( NPP_DOUBLE_SIZE_WARNING ),
- error_entry( NPP_ODD_ROI_WARNING )
+ error_entry( NPP_DOUBLE_SIZE_WARNING )
};
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
- typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
- NppiSize oSizeROI, Npp64f* pRetVal);
+#if CUDA_VERSION < 5050
+ typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal);
- static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+ static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+#else
+ typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
+ NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer);
+
+ typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize);
+
+ static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
+
+ static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R};
+#endif
NppiSize sz;
sz.width = src1.cols;
DeviceBuffer dbuf;
- nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
+#if CUDA_VERSION < 5050
+ nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
+#else
+ int bufSize;
+ buf_size_funcs[funcIdx](sz, &bufSize);
+
+ GpuMat buf(1, bufSize, CV_8UC1);
+
+ nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) );
+#endif
cudaSafeCall( cudaDeviceSynchronize() );
#define CUDART_MINIMUM_REQUIRED_VERSION 4010
#define NPP_MINIMUM_REQUIRED_VERSION 4100
+ #define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
+
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it."
#endif
- #if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
+ #if (NPP_VERSION < NPP_MINIMUM_REQUIRED_VERSION)
#error "Insufficient NPP version, please update it."
#endif
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::add(mat, val, dst_gold, cv::noArray(), depth.second);
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::add(mat, val, dst_gold, mask, depth.second);
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::subtract(mat, val, dst_gold, mask, depth.second);
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
cv::Mat dst_gold = cv::min(src, val);
- EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
}
}
cv::Mat dst_gold = cv::max(src, val);
- EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+ EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
}
}
for (int i = 0; i < v_gold.rows; ++i)
f.read(v_gold.ptr<char>(i), v_gold.cols * sizeof(float));
- EXPECT_MAT_NEAR(u_gold, u, 0);
- EXPECT_MAT_NEAR(v_gold, v, 0);
+ EXPECT_MAT_SIMILAR(u_gold, u, 1e-3);
+ EXPECT_MAT_SIMILAR(v_gold, v, 1e-3);
#else
std::ofstream f(fname.c_str(), std::ios_base::binary);
endif()
include(${QT_USE_FILE})
- if(QT_INCLUDE_DIR)
- ocv_include_directories(${QT_INCLUDE_DIR})
- endif()
-
QT4_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc)
QT4_WRAP_CPP(_MOC_OUTFILES src/window_QT.h)
- list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES} ${QT_QTTEST_LIBRARY})
+ list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES})
list(APPEND highgui_srcs src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES})
ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag)
if(${_have_flag})
if(XIMEA_LIBRARY_DIR)
link_directories(${XIMEA_LIBRARY_DIR})
endif()
- list(APPEND HIGHGUI_LIBRARIES m3api)
+ if(CMAKE_CL_64)
+ list(APPEND HIGHGUI_LIBRARIES m3apiX64)
+ else()
+ list(APPEND HIGHGUI_LIBRARIES m3api)
+ endif()
endif(HAVE_XIMEA)
if(HAVE_FFMPEG)
frame = cvCreateImage( cvSize(w,h), 8, 3 );
}
- VI.getPixels( index, (uchar*)frame->imageData, false, true );
- return frame;
+ if (VI.getPixels( index, (uchar*)frame->imageData, false, true ))
+ return frame;
+ else
+ return NULL;
}
double CvCaptureCAM_DShow::getProperty( int property_id )
virtual IplImage* retrieveFrame(int);
virtual int getCaptureDomain() { return CV_CAP_XIAPI; } // Return the type of the capture object: CV_CAP_VFW, etc...
-protected:
+private:
void init();
void errMsg(const char* msg, int errNum);
+ void resetCvImage();
+ int getBpp();
IplImage* frame;
HANDLE hmv;
DWORD numDevices;
- XI_IMG image;
- int width;
- int height;
- int format;
int timeout;
+ XI_IMG image;
};
/**********************************************************************************/
CvCapture* cvCreateCameraCapture_XIMEA( int index )
{
- CvCaptureCAM_XIMEA* capture = new CvCaptureCAM_XIMEA;
+ CvCaptureCAM_XIMEA* capture = new CvCaptureCAM_XIMEA;
if( capture->open( index ))
return capture;
// always use auto white ballance
mvret = xiSetParamInt( hmv, XI_PRM_AUTO_WB, 1);
if(mvret != XI_OK) goto error;
+
+ // default image format RGB24
+ mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, XI_RGB24);
+ if(mvret != XI_OK) goto error;
+ int width = 0;
mvret = xiGetParamInt( hmv, XI_PRM_WIDTH, &width);
if(mvret != XI_OK) goto error;
+ int height = 0;
mvret = xiGetParamInt( hmv, XI_PRM_HEIGHT, &height);
if(mvret != XI_OK) goto error;
- // default image format RGB24
- format = XI_RGB24;
- mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, format);
- if(mvret != XI_OK) goto error;
-
// allocate frame buffer for RGB24 image
frame = cvCreateImage(cvSize( width, height), IPL_DEPTH_8U, 3);
errMsg("StartAcquisition XI_DEVICE failed", mvret);
goto error;
}
-
return true;
error:
+ errMsg("Open XI_DEVICE failed", mvret);
xiCloseDevice(hmv);
hmv = NULL;
return false;
void CvCaptureCAM_XIMEA::close()
{
- if(hmv)
- {
- xiStopAcquisition(hmv);
- xiCloseDevice(hmv);
- hmv = NULL;
- }
+ if(frame)
+ cvReleaseImage(&frame);
+
+ xiStopAcquisition(hmv);
+ xiCloseDevice(hmv);
+ hmv = NULL;
}
/**********************************************************************************/
bool CvCaptureCAM_XIMEA::grabFrame()
{
+ memset(&image, 0, sizeof(XI_IMG));
image.size = sizeof(XI_IMG);
int mvret = xiGetImage( hmv, timeout, &image);
IplImage* CvCaptureCAM_XIMEA::retrieveFrame(int)
{
// update cvImage after format has changed
- if( (int)image.width != width || (int)image.height != height || image.frm != (XI_IMG_FORMAT)format)
- {
- cvReleaseImage(&frame);
- switch( image.frm)
- {
- case XI_MONO8 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break;
- case XI_MONO16 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break;
- case XI_RGB24 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break;
- case XI_RGB32 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break;
- default :
- return frame;
- }
- // update global image format
- format = image.frm;
- width = image.width;
- height = image.height;
- }
-
+ resetCvImage();
+
// copy pixel data
switch( image.frm)
{
- case XI_MONO8 : memcpy( frame->imageData, image.bp, image.width*image.height); break;
- case XI_MONO16 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break;
- case XI_RGB24 : memcpy( frame->imageData, image.bp, image.width*image.height*3); break;
- case XI_RGB32 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(DWORD)); break;
+ case XI_MONO8 :
+ case XI_RAW8 : memcpy( frame->imageData, image.bp, image.width*image.height); break;
+ case XI_MONO16 :
+ case XI_RAW16 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break;
+ case XI_RGB24 :
+ case XI_RGB_PLANAR : memcpy( frame->imageData, image.bp, image.width*image.height*3); break;
+ case XI_RGB32 : memcpy( frame->imageData, image.bp, image.width*image.height*4); break;
default: break;
}
return frame;
/**********************************************************************************/
+void CvCaptureCAM_XIMEA::resetCvImage()
+{
+ int width = 0, height = 0, format = 0;
+ xiGetParamInt( hmv, XI_PRM_WIDTH, &width);
+ xiGetParamInt( hmv, XI_PRM_HEIGHT, &height);
+ xiGetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, &format);
+
+ if( (int)image.width != width || (int)image.height != height || image.frm != (XI_IMG_FORMAT)format)
+ {
+ if(frame) cvReleaseImage(&frame);
+ frame = NULL;
+
+ switch( image.frm)
+ {
+ case XI_MONO8 :
+ case XI_RAW8 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break;
+ case XI_MONO16 :
+ case XI_RAW16 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break;
+ case XI_RGB24 :
+ case XI_RGB_PLANAR : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break;
+ case XI_RGB32 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break;
+ default :
+ return;
+ }
+ }
+ cvZero(frame);
+}
+/**********************************************************************************/
+
double CvCaptureCAM_XIMEA::getProperty( int property_id )
{
if(hmv == NULL)
switch(property_id)
{
// OCV parameters
- case CV_CAP_PROP_FRAME_WIDTH : mvret = xiSetParamInt( hmv, XI_PRM_WIDTH, ival);
- if(mvret == XI_OK) width = ival;
- break;
- case CV_CAP_PROP_FRAME_HEIGHT : mvret = xiSetParamInt( hmv, XI_PRM_HEIGHT, ival);
- if(mvret == XI_OK) height = ival;
- break;
+ case CV_CAP_PROP_FRAME_WIDTH : mvret = xiSetParamInt( hmv, XI_PRM_WIDTH, ival); break;
+ case CV_CAP_PROP_FRAME_HEIGHT : mvret = xiSetParamInt( hmv, XI_PRM_HEIGHT, ival); break;
case CV_CAP_PROP_FPS : mvret = xiSetParamFloat( hmv, XI_PRM_FRAMERATE, fval); break;
case CV_CAP_PROP_GAIN : mvret = xiSetParamFloat( hmv, XI_PRM_GAIN, fval); break;
case CV_CAP_PROP_EXPOSURE : mvret = xiSetParamInt( hmv, XI_PRM_EXPOSURE, ival); break;
// XIMEA camera properties
case CV_CAP_PROP_XI_DOWNSAMPLING : mvret = xiSetParamInt( hmv, XI_PRM_DOWNSAMPLING, ival); break;
- case CV_CAP_PROP_XI_DATA_FORMAT : mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, ival);
- if(mvret == XI_OK) format = ival;
- break;
+ case CV_CAP_PROP_XI_DATA_FORMAT : mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, ival); break;
case CV_CAP_PROP_XI_OFFSET_X : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_X, ival); break;
case CV_CAP_PROP_XI_OFFSET_Y : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_Y, ival); break;
case CV_CAP_PROP_XI_TRG_SOURCE : mvret = xiSetParamInt( hmv, XI_PRM_TRG_SOURCE, ival); break;
void CvCaptureCAM_XIMEA::errMsg(const char* msg, int errNum)
{
#if defined WIN32 || defined _WIN32
- char buf[512];
+ char buf[512]="";
sprintf( buf, "%s : %d\n", msg, errNum);
OutputDebugString(buf);
#else
#endif
}
+/**********************************************************************************/
+
+int CvCaptureCAM_XIMEA::getBpp()
+{
+ switch( image.frm)
+ {
+ case XI_MONO8 :
+ case XI_RAW8 : return 1;
+ case XI_MONO16 :
+ case XI_RAW16 : return 2;
+ case XI_RGB24 :
+ case XI_RGB_PLANAR : return 3;
+ case XI_RGB32 : return 4;
+ default :
+ return 0;
+ }
+}
+
/**********************************************************************************/
\ No newline at end of file
void cv::imshow( const string& winname, InputArray _img )
{
+ const Size size = _img.size();
#ifndef HAVE_OPENGL
- Mat img = _img.getMat();
- CvMat c_img = img;
- cvShowImage(winname.c_str(), &c_img);
+ CV_Assert(size.width>0 && size.height>0);
+ {
+ Mat img = _img.getMat();
+ CvMat c_img = img;
+ cvShowImage(winname.c_str(), &c_img);
+ }
#else
const double useGl = getWindowProperty(winname, WND_PROP_OPENGL);
+ CV_Assert(size.width>0 && size.height>0);
if (useGl <= 0)
{
if (autoSize > 0)
{
- Size size = _img.size();
resizeWindow(winname, size.width, size.height);
}
The function can do the following transformations:
*
+ RGB :math:`\leftrightarrow` GRAY ( ``CV_BGR2GRAY, CV_RGB2GRAY, CV_GRAY2BGR, CV_GRAY2RGB`` )
Transformations within RGB space like adding/removing the alpha channel, reversing the channel order, conversion to/from 16-bit RGB color (R5:G6:B5 or R5:G5:B5), as well as conversion to/from grayscale using:
.. math::
* **GC_PR_BGD** defines a possible background pixel.
- * **GC_PR_BGD** defines a possible foreground pixel.
+ * **GC_PR_FGD** defines a possible foreground pixel.
:param rect: ROI containing a segmented object. The pixels outside of the ROI are marked as "obvious background". The parameter is only used when ``mode==GC_INIT_WITH_RECT`` .
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+// ----------------------------------------------------------------------
+// CLAHE
+
+namespace
+{
+ class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
+ {
+ public:
+ CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) :
+ src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale)
+ {
+ }
+
+ void operator ()(const cv::Range& range) const;
+
+ private:
+ cv::Mat src_;
+ mutable cv::Mat lut_;
+
+ cv::Size tileSize_;
+ int tilesX_;
+ int tilesY_;
+ int clipLimit_;
+ float lutScale_;
+ };
+
+ void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const
+ {
+ const int histSize = 256;
+
+ uchar* tileLut = lut_.ptr(range.start);
+ const size_t lut_step = lut_.step;
+
+ for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
+ {
+ const int ty = k / tilesX_;
+ const int tx = k % tilesX_;
+
+ // retrieve tile submatrix
+
+ cv::Rect tileROI;
+ tileROI.x = tx * tileSize_.width;
+ tileROI.y = ty * tileSize_.height;
+ tileROI.width = tileSize_.width;
+ tileROI.height = tileSize_.height;
+
+ const cv::Mat tile = src_(tileROI);
+
+ // calc histogram
+
+ int tileHist[histSize] = {0, };
+
+ int height = tileROI.height;
+ const size_t sstep = tile.step;
+ for (const uchar* ptr = tile.ptr<uchar>(0); height--; ptr += sstep)
+ {
+ int x = 0;
+ for (; x <= tileROI.width - 4; x += 4)
+ {
+ int t0 = ptr[x], t1 = ptr[x+1];
+ tileHist[t0]++; tileHist[t1]++;
+ t0 = ptr[x+2]; t1 = ptr[x+3];
+ tileHist[t0]++; tileHist[t1]++;
+ }
+
+ for (; x < tileROI.width; ++x)
+ tileHist[ptr[x]]++;
+ }
+
+ // clip histogram
+
+ if (clipLimit_ > 0)
+ {
+ // how many pixels were clipped
+ int clipped = 0;
+ for (int i = 0; i < histSize; ++i)
+ {
+ if (tileHist[i] > clipLimit_)
+ {
+ clipped += tileHist[i] - clipLimit_;
+ tileHist[i] = clipLimit_;
+ }
+ }
+
+ // redistribute clipped pixels
+ int redistBatch = clipped / histSize;
+ int residual = clipped - redistBatch * histSize;
+
+ for (int i = 0; i < histSize; ++i)
+ tileHist[i] += redistBatch;
+
+ for (int i = 0; i < residual; ++i)
+ tileHist[i]++;
+ }
+
+ // calc Lut
+
+ int sum = 0;
+ for (int i = 0; i < histSize; ++i)
+ {
+ sum += tileHist[i];
+ tileLut[i] = cv::saturate_cast<uchar>(sum * lutScale_);
+ }
+ }
+ }
+
+ class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
+ {
+ public:
+ CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) :
+ src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
+ {
+ }
+
+ void operator ()(const cv::Range& range) const;
+
+ private:
+ cv::Mat src_;
+ mutable cv::Mat dst_;
+ cv::Mat lut_;
+
+ cv::Size tileSize_;
+ int tilesX_;
+ int tilesY_;
+ };
+
+ void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const
+ {
+ const size_t lut_step = lut_.step;
+
+ for (int y = range.start; y < range.end; ++y)
+ {
+ const uchar* srcRow = src_.ptr<uchar>(y);
+ uchar* dstRow = dst_.ptr<uchar>(y);
+
+ const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
+
+ int ty1 = cvFloor(tyf);
+ int ty2 = ty1 + 1;
+
+ const float ya = tyf - ty1;
+
+ ty1 = std::max(ty1, 0);
+ ty2 = std::min(ty2, tilesY_ - 1);
+
+ const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_);
+ const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_);
+
+ for (int x = 0; x < src_.cols; ++x)
+ {
+ const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
+
+ int tx1 = cvFloor(txf);
+ int tx2 = tx1 + 1;
+
+ const float xa = txf - tx1;
+
+ tx1 = std::max(tx1, 0);
+ tx2 = std::min(tx2, tilesX_ - 1);
+
+ const int srcVal = srcRow[x];
+
+ const size_t ind1 = tx1 * lut_step + srcVal;
+ const size_t ind2 = tx2 * lut_step + srcVal;
+
+ float res = 0;
+
+ res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
+ res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
+ res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
+ res += lutPlane2[ind2] * ((xa) * (ya));
+
+ dstRow[x] = cv::saturate_cast<uchar>(res);
+ }
+ }
+ }
+
+ class CLAHE_Impl : public cv::CLAHE
+ {
+ public:
+ CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
+
+ cv::AlgorithmInfo* info() const;
+
+ void apply(cv::InputArray src, cv::OutputArray dst);
+
+ void setClipLimit(double clipLimit);
+ double getClipLimit() const;
+
+ void setTilesGridSize(cv::Size tileGridSize);
+ cv::Size getTilesGridSize() const;
+
+ void collectGarbage();
+
+ private:
+ double clipLimit_;
+ int tilesX_;
+ int tilesY_;
+
+ cv::Mat srcExt_;
+ cv::Mat lut_;
+ };
+
+ CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
+ clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
+ {
+ }
+
+ CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE",
+ obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
+ obj.info()->addParam(obj, "tilesX", obj.tilesX_);
+ obj.info()->addParam(obj, "tilesY", obj.tilesY_))
+
+ void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
+ {
+ cv::Mat src = _src.getMat();
+
+ CV_Assert( src.type() == CV_8UC1 );
+
+ _dst.create( src.size(), src.type() );
+ cv::Mat dst = _dst.getMat();
+
+ const int histSize = 256;
+
+ lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
+
+ cv::Size tileSize;
+ cv::Mat srcForLut;
+
+ if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
+ {
+ tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
+ srcForLut = src;
+ }
+ else
+ {
+ cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
+
+ tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
+ srcForLut = srcExt_;
+ }
+
+ const int tileSizeTotal = tileSize.area();
+ const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
+
+ int clipLimit = 0;
+ if (clipLimit_ > 0.0)
+ {
+ clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
+ clipLimit = std::max(clipLimit, 1);
+ }
+
+ CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
+ cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
+
+ CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_);
+ cv::parallel_for_(cv::Range(0, src.rows), interpolationBody);
+ }
+
+ void CLAHE_Impl::setClipLimit(double clipLimit)
+ {
+ clipLimit_ = clipLimit;
+ }
+
+ double CLAHE_Impl::getClipLimit() const
+ {
+ return clipLimit_;
+ }
+
+ void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
+ {
+ tilesX_ = tileGridSize.width;
+ tilesY_ = tileGridSize.height;
+ }
+
+ cv::Size CLAHE_Impl::getTilesGridSize() const
+ {
+ return cv::Size(tilesX_, tilesY_);
+ }
+
+ void CLAHE_Impl::collectGarbage()
+ {
+ srcExt_.release();
+ lut_.release();
+ }
+}
+
+cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
+{
+ return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
+}
const int ITUR_BT_601_CBV = -74448;
template<int bIdx, int uIdx>
-struct YUV420sp2RGB888Invoker
+struct YUV420sp2RGB888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* my1, *muv;
YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
: dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin() * 2;
- int rangeEnd = range.end() * 2;
+ int rangeBegin = range.start * 2;
+ int rangeEnd = range.end * 2;
//R = 1.164(Y - 16) + 1.596(V - 128)
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
};
template<int bIdx, int uIdx>
-struct YUV420sp2RGBA8888Invoker
+struct YUV420sp2RGBA8888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* my1, *muv;
YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
: dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin() * 2;
- int rangeEnd = range.end() * 2;
+ int rangeBegin = range.start * 2;
+ int rangeEnd = range.end * 2;
//R = 1.164(Y - 16) + 1.596(V - 128)
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
};
template<int bIdx>
-struct YUV420p2RGB888Invoker
+struct YUV420p2RGB888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* my1, *mu, *mv;
YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
: dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- const int rangeBegin = range.begin() * 2;
- const int rangeEnd = range.end() * 2;
+ const int rangeBegin = range.start * 2;
+ const int rangeEnd = range.end * 2;
size_t uvsteps[2] = {width/2, stride - width/2};
int usIdx = ustepIdx, vsIdx = vstepIdx;
const uchar* y1 = my1 + rangeBegin * stride;
- const uchar* u1 = mu + (range.begin() / 2) * stride;
- const uchar* v1 = mv + (range.begin() / 2) * stride;
+ const uchar* u1 = mu + (range.start / 2) * stride;
+ const uchar* v1 = mv + (range.start / 2) * stride;
- if(range.begin() % 2 == 1)
+ if(range.start % 2 == 1)
{
u1 += uvsteps[(usIdx++) & 1];
v1 += uvsteps[(vsIdx++) & 1];
};
template<int bIdx>
-struct YUV420p2RGBA8888Invoker
+struct YUV420p2RGBA8888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* my1, *mu, *mv;
YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
: dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin() * 2;
- int rangeEnd = range.end() * 2;
+ int rangeBegin = range.start * 2;
+ int rangeEnd = range.end * 2;
size_t uvsteps[2] = {width/2, stride - width/2};
int usIdx = ustepIdx, vsIdx = vstepIdx;
const uchar* y1 = my1 + rangeBegin * stride;
- const uchar* u1 = mu + (range.begin() / 2) * stride;
- const uchar* v1 = mv + (range.begin() / 2) * stride;
+ const uchar* u1 = mu + (range.start / 2) * stride;
+ const uchar* v1 = mv + (range.start / 2) * stride;
- if(range.begin() % 2 == 1)
+ if(range.start % 2 == 1)
{
u1 += uvsteps[(usIdx++) & 1];
v1 += uvsteps[(vsIdx++) & 1];
inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
{
YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows/2), converter);
+ parallel_for_(Range(0, _dst.rows/2), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows/2));
+ converter(Range(0, _dst.rows/2));
}
template<int bIdx, int uIdx>
inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
{
YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows/2), converter);
+ parallel_for_(Range(0, _dst.rows/2), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows/2));
+ converter(Range(0, _dst.rows/2));
}
template<int bIdx>
inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
{
YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows/2), converter);
+ parallel_for_(Range(0, _dst.rows/2), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows/2));
+ converter(Range(0, _dst.rows/2));
}
template<int bIdx>
inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
{
YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows/2), converter);
+ parallel_for_(Range(0, _dst.rows/2), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows/2));
+ converter(Range(0, _dst.rows/2));
}
///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
template<int bIdx, int uIdx, int yIdx>
-struct YUV422toRGB888Invoker
+struct YUV422toRGB888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* src;
YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
: dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin();
- int rangeEnd = range.end();
+ int rangeBegin = range.start;
+ int rangeEnd = range.end;
const int uidx = 1 - yIdx + uIdx * 2;
const int vidx = (2 + uidx) % 4;
};
template<int bIdx, int uIdx, int yIdx>
-struct YUV422toRGBA8888Invoker
+struct YUV422toRGBA8888Invoker : ParallelLoopBody
{
Mat* dst;
const uchar* src;
YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
: dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int rangeBegin = range.begin();
- int rangeEnd = range.end();
+ int rangeBegin = range.start;
+ int rangeEnd = range.end;
const int uidx = 1 - yIdx + uIdx * 2;
const int vidx = (2 + uidx) % 4;
inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
{
YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows), converter);
+ parallel_for_(Range(0, _dst.rows), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows));
+ converter(Range(0, _dst.rows));
}
template<int bIdx, int uIdx, int yIdx>
inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
{
YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
-#ifdef HAVE_TBB
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
- parallel_for(BlockedRange(0, _dst.rows), converter);
+ parallel_for_(Range(0, _dst.rows), converter);
else
-#endif
- converter(BlockedRange(0, _dst.rows));
+ converter(Range(0, _dst.rows));
}
/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
namespace cv
{
-struct DTColumnInvoker
+struct DTColumnInvoker : ParallelLoopBody
{
DTColumnInvoker( const CvMat* _src, CvMat* _dst, const int* _sat_tab, const float* _sqr_tab)
{
sqr_tab = _sqr_tab;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
- int i, i1 = range.begin(), i2 = range.end();
+ int i, i1 = range.start, i2 = range.end;
int m = src->rows;
size_t sstep = src->step, dstep = dst->step/sizeof(float);
AutoBuffer<int> _d(m);
};
-struct DTRowInvoker
+struct DTRowInvoker : ParallelLoopBody
{
DTRowInvoker( CvMat* _dst, const float* _sqr_tab, const float* _inv_tab )
{
inv_tab = _inv_tab;
}
- void operator()( const BlockedRange& range ) const
+ void operator()( const Range& range ) const
{
const float inf = 1e15f;
- int i, i1 = range.begin(), i2 = range.end();
+ int i, i1 = range.start, i2 = range.end;
int n = dst->cols;
AutoBuffer<uchar> _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int));
float* f = (float*)(uchar*)_buf;
for( ; i <= m*3; i++ )
sat_tab[i] = i - shift;
- cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab));
+ cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab));
// stage 2: compute modified distance transform for each row
float* inv_tab = sqr_tab + n;
sqr_tab[i] = (float)(i*i);
}
- cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));
+ cv::parallel_for_(cv::Range(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));
}
}
}
-class EqualizeHistCalcHist_Invoker
+class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody
{
public:
enum {HIST_SZ = 256};
-#ifdef HAVE_TBB
- typedef tbb::mutex* MutextPtr;
-#else
- typedef void* MutextPtr;
-#endif
-
- EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, MutextPtr histogramLock)
+ EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock)
: src_(src), globalHistogram_(histogram), histogramLock_(histogramLock)
{ }
- void operator()( const cv::BlockedRange& rowRange ) const
+ void operator()( const cv::Range& rowRange ) const
{
int localHistogram[HIST_SZ] = {0, };
const size_t sstep = src_.step;
int width = src_.cols;
- int height = rowRange.end() - rowRange.begin();
+ int height = rowRange.end - rowRange.start;
if (src_.isContinuous())
{
height = 1;
}
- for (const uchar* ptr = src_.ptr<uchar>(rowRange.begin()); height--; ptr += sstep)
+ for (const uchar* ptr = src_.ptr<uchar>(rowRange.start); height--; ptr += sstep)
{
int x = 0;
for (; x <= width - 4; x += 4)
localHistogram[ptr[x]]++;
}
-#ifdef HAVE_TBB
- tbb::mutex::scoped_lock lock(*histogramLock_);
-#endif
+ cv::AutoLock lock(*histogramLock_);
for( int i = 0; i < HIST_SZ; i++ )
globalHistogram_[i] += localHistogram[i];
static bool isWorthParallel( const cv::Mat& src )
{
-#ifdef HAVE_TBB
return ( src.total() >= 640*480 );
-#else
- (void)src;
- return false;
-#endif
}
private:
cv::Mat& src_;
int* globalHistogram_;
- MutextPtr histogramLock_;
+ cv::Mutex* histogramLock_;
};
-class EqualizeHistLut_Invoker
+class EqualizeHistLut_Invoker : public cv::ParallelLoopBody
{
public:
EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut )
lut_(lut)
{ }
- void operator()( const cv::BlockedRange& rowRange ) const
+ void operator()( const cv::Range& rowRange ) const
{
const size_t sstep = src_.step;
const size_t dstep = dst_.step;
int width = src_.cols;
- int height = rowRange.end() - rowRange.begin();
+ int height = rowRange.end - rowRange.start;
int* lut = lut_;
if (src_.isContinuous() && dst_.isContinuous())
height = 1;
}
- const uchar* sptr = src_.ptr<uchar>(rowRange.begin());
- uchar* dptr = dst_.ptr<uchar>(rowRange.begin());
+ const uchar* sptr = src_.ptr<uchar>(rowRange.start);
+ uchar* dptr = dst_.ptr<uchar>(rowRange.start);
for (; height--; sptr += sstep, dptr += dstep)
{
static bool isWorthParallel( const cv::Mat& src )
{
-#ifdef HAVE_TBB
return ( src.total() >= 640*480 );
-#else
- (void)src;
- return false;
-#endif
}
private:
if(src.empty())
return;
-#ifdef HAVE_TBB
- tbb::mutex histogramLockInstance;
- EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = &histogramLockInstance;
-#else
- EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = 0;
-#endif
+ Mutex histogramLockInstance;
const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ;
int hist[hist_sz] = {0,};
int lut[hist_sz];
- EqualizeHistCalcHist_Invoker calcBody(src, hist, histogramLock);
+ EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance);
EqualizeHistLut_Invoker lutBody(src, dst, lut);
- cv::BlockedRange heightRange(0, src.rows);
+ cv::Range heightRange(0, src.rows);
if(EqualizeHistCalcHist_Invoker::isWorthParallel(src))
- parallel_for(heightRange, calcBody);
+ parallel_for_(heightRange, calcBody);
else
calcBody(heightRange);
}
if(EqualizeHistLut_Invoker::isWorthParallel(src))
- parallel_for(heightRange, lutBody);
+ parallel_for_(heightRange, lutBody);
else
lutBody(heightRange);
}
// ----------------------------------------------------------------------
-// CLAHE
-
-namespace
-{
- class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
- {
- public:
- CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) :
- src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale)
- {
- }
-
- void operator ()(const cv::Range& range) const;
-
- private:
- cv::Mat src_;
- mutable cv::Mat lut_;
-
- cv::Size tileSize_;
- int tilesX_;
- int tilesY_;
- int clipLimit_;
- float lutScale_;
- };
-
- void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const
- {
- const int histSize = 256;
-
- uchar* tileLut = lut_.ptr(range.start);
- const size_t lut_step = lut_.step;
-
- for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
- {
- const int ty = k / tilesX_;
- const int tx = k % tilesX_;
-
- // retrieve tile submatrix
-
- cv::Rect tileROI;
- tileROI.x = tx * tileSize_.width;
- tileROI.y = ty * tileSize_.height;
- tileROI.width = tileSize_.width;
- tileROI.height = tileSize_.height;
-
- const cv::Mat tile = src_(tileROI);
-
- // calc histogram
-
- int tileHist[histSize] = {0, };
-
- int height = tileROI.height;
- const size_t sstep = tile.step;
- for (const uchar* ptr = tile.ptr<uchar>(0); height--; ptr += sstep)
- {
- int x = 0;
- for (; x <= tileROI.width - 4; x += 4)
- {
- int t0 = ptr[x], t1 = ptr[x+1];
- tileHist[t0]++; tileHist[t1]++;
- t0 = ptr[x+2]; t1 = ptr[x+3];
- tileHist[t0]++; tileHist[t1]++;
- }
-
- for (; x < tileROI.width; ++x)
- tileHist[ptr[x]]++;
- }
-
- // clip histogram
-
- if (clipLimit_ > 0)
- {
- // how many pixels were clipped
- int clipped = 0;
- for (int i = 0; i < histSize; ++i)
- {
- if (tileHist[i] > clipLimit_)
- {
- clipped += tileHist[i] - clipLimit_;
- tileHist[i] = clipLimit_;
- }
- }
-
- // redistribute clipped pixels
- int redistBatch = clipped / histSize;
- int residual = clipped - redistBatch * histSize;
-
- for (int i = 0; i < histSize; ++i)
- tileHist[i] += redistBatch;
-
- for (int i = 0; i < residual; ++i)
- tileHist[i]++;
- }
-
- // calc Lut
-
- int sum = 0;
- for (int i = 0; i < histSize; ++i)
- {
- sum += tileHist[i];
- tileLut[i] = cv::saturate_cast<uchar>(sum * lutScale_);
- }
- }
- }
-
- class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
- {
- public:
- CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) :
- src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
- {
- }
-
- void operator ()(const cv::Range& range) const;
-
- private:
- cv::Mat src_;
- mutable cv::Mat dst_;
- cv::Mat lut_;
-
- cv::Size tileSize_;
- int tilesX_;
- int tilesY_;
- };
-
- void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const
- {
- const size_t lut_step = lut_.step;
-
- for (int y = range.start; y < range.end; ++y)
- {
- const uchar* srcRow = src_.ptr<uchar>(y);
- uchar* dstRow = dst_.ptr<uchar>(y);
-
- const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
-
- int ty1 = cvFloor(tyf);
- int ty2 = ty1 + 1;
-
- const float ya = tyf - ty1;
-
- ty1 = std::max(ty1, 0);
- ty2 = std::min(ty2, tilesY_ - 1);
-
- const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_);
- const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_);
-
- for (int x = 0; x < src_.cols; ++x)
- {
- const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
-
- int tx1 = cvFloor(txf);
- int tx2 = tx1 + 1;
-
- const float xa = txf - tx1;
-
- tx1 = std::max(tx1, 0);
- tx2 = std::min(tx2, tilesX_ - 1);
-
- const int srcVal = srcRow[x];
-
- const size_t ind1 = tx1 * lut_step + srcVal;
- const size_t ind2 = tx2 * lut_step + srcVal;
-
- float res = 0;
-
- res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
- res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
- res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
- res += lutPlane2[ind2] * ((xa) * (ya));
-
- dstRow[x] = cv::saturate_cast<uchar>(res);
- }
- }
- }
-
- class CLAHE_Impl : public cv::CLAHE
- {
- public:
- CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
-
- cv::AlgorithmInfo* info() const;
-
- void apply(cv::InputArray src, cv::OutputArray dst);
-
- void setClipLimit(double clipLimit);
- double getClipLimit() const;
-
- void setTilesGridSize(cv::Size tileGridSize);
- cv::Size getTilesGridSize() const;
-
- void collectGarbage();
-
- private:
- double clipLimit_;
- int tilesX_;
- int tilesY_;
-
- cv::Mat srcExt_;
- cv::Mat lut_;
- };
-
- CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
- clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
- {
- }
-
- CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE",
- obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
- obj.info()->addParam(obj, "tilesX", obj.tilesX_);
- obj.info()->addParam(obj, "tilesY", obj.tilesY_))
-
- void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
- {
- cv::Mat src = _src.getMat();
-
- CV_Assert( src.type() == CV_8UC1 );
-
- _dst.create( src.size(), src.type() );
- cv::Mat dst = _dst.getMat();
-
- const int histSize = 256;
-
- lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
-
- cv::Size tileSize;
- cv::Mat srcForLut;
-
- if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
- {
- tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
- srcForLut = src;
- }
- else
- {
- cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
-
- tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
- srcForLut = srcExt_;
- }
-
- const int tileSizeTotal = tileSize.area();
- const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
-
- int clipLimit = 0;
- if (clipLimit_ > 0.0)
- {
- clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
- clipLimit = std::max(clipLimit, 1);
- }
-
- CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
- cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
-
- CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_);
- cv::parallel_for_(cv::Range(0, src.rows), interpolationBody);
- }
-
- void CLAHE_Impl::setClipLimit(double clipLimit)
- {
- clipLimit_ = clipLimit;
- }
-
- double CLAHE_Impl::getClipLimit() const
- {
- return clipLimit_;
- }
-
- void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
- {
- tilesX_ = tileGridSize.width;
- tilesY_ = tileGridSize.height;
- }
-
- cv::Size CLAHE_Impl::getTilesGridSize() const
- {
- return cv::Size(tilesX_, tilesY_);
- }
-
- void CLAHE_Impl::collectGarbage()
- {
- srcExt_.release();
- lut_.release();
- }
-}
-
-cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
-{
- return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
-}
-
-// ----------------------------------------------------------------------
/* Implementation of RTTI and Generic Functions for CvHistogram */
#define CV_TYPE_NAME_HIST "opencv-hist"
namespace cv
{
-class MorphologyRunner
+class MorphologyRunner : public ParallelLoopBody
{
public:
MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations,
columnBorderType = _columnBorderType;
}
- void operator () ( const BlockedRange& range ) const
+ void operator () ( const Range& range ) const
{
- int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows);
- int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows);
+ int row0 = min(cvRound(range.start * src.rows / nStripes), src.rows);
+ int row1 = min(cvRound(range.end * src.rows / nStripes), src.rows);
/*if(0)
printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
- src.rows, src.cols, range.begin(), range.end(), row0, row1);*/
+ src.rows, src.cols, range.start, range.end, row0, row1);*/
Mat srcStripe = src.rowRange(row0, row1);
Mat dstStripe = dst.rowRange(row0, row1);
}
int nStripes = 1;
-#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION
+#if defined HAVE_TEGRA_OPTIMIZATION
if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing
(borderType & BORDER_ISOLATED) == 0 && //TODO: check border types
src.rows >= 64 ) //NOTE: just heuristics
nStripes = 4;
#endif
- parallel_for(BlockedRange(0, nStripes),
- MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
+ parallel_for_(Range(0, nStripes),
+ MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
//Ptr<FilterEngine> f = createMorphologyFilter(op, src.type(),
// kernel, anchor, borderType, borderType, borderValue );
styledAttrs.recycle();
}
+ /**
+ * Sets the camera index
+ * @param cameraIndex new camera index
+ */
+ public void setCameraIndex(int cameraIndex) {
+ this.mCameraIndex = cameraIndex;
+ }
+
public interface CvCameraViewListener {
/**
* This method is invoked when camera preview has started. After this method is invoked
protected MatOfByte(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfByte(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfDouble(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfDouble(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfFloat(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfFloat(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfFloat4(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfFloat4(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfFloat6(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfFloat6(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfInt(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfInt(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfInt4(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfInt4(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfKeyPoint(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfKeyPoint(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfPoint(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfPoint(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfPoint2f(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfPoint2f(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfPoint3(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfPoint3(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfPoint3f(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfPoint3f(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
protected MatOfRect(long addr) {
super(addr);
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
public MatOfRect(Mat m) {
super(m, Range.all());
- if(checkVector(_channels, _depth) < 0 )
+ if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incomatible Mat");
//FIXME: do we need release() here?
}
#include "precomp.hpp"
-#ifdef HAVE_TBB
-#include <tbb/tbb.h>
-#endif
-
CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
{
term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
return iter;
}
-struct rprop_loop {
+struct rprop_loop : cv::ParallelLoopBody {
rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0,
int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count,
CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz)
int buf_sz;
- void operator()( const cv::BlockedRange& range ) const
+ void operator()( const cv::Range& range ) const
{
double* buf_ptr;
double** x = 0;
buf_ptr += (df[i] - x[i])*2;
}
- for(int si = range.begin(); si < range.end(); si++ )
+ for(int si = range.start; si < range.end; si++ )
{
if (si % dcount0 != 0) continue;
int n1, n2, k;
}
// backward pass, update dEdw
- #ifdef HAVE_TBB
- static tbb::spin_mutex mutex;
- tbb::spin_mutex::scoped_lock lock;
- #endif
+ static cv::Mutex mutex;
+
for(int i = l_count-1; i > 0; i-- )
{
n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
cvMul( grad1, &_df, grad1 );
- #ifdef HAVE_TBB
- lock.acquire(mutex);
- #endif
- cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
- cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
- cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
-
- // update bias part of dEdw
- for( k = 0; k < dcount; k++ )
- {
- double* dst = _dEdw.data.db + n1*n2;
- const double* src = grad1->data.db + k*n2;
- for(int j = 0; j < n2; j++ )
- dst[j] += src[j];
+
+ {
+ cv::AutoLock lock(mutex);
+ cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
+ cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
+ cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
+
+ // update bias part of dEdw
+ for( k = 0; k < dcount; k++ )
+ {
+ double* dst = _dEdw.data.db + n1*n2;
+ const double* src = grad1->data.db + k*n2;
+ for(int j = 0; j < n2; j++ )
+ dst[j] += src[j];
+ }
+
+ if (i > 1)
+ cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
}
- if (i > 1)
- cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
- #ifdef HAVE_TBB
- lock.release();
- #endif
cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
if( i > 1 )
cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
double E = 0;
// first, iterate through all the samples and compute dEdw
- cv::parallel_for(cv::BlockedRange(0, count),
+ cv::parallel_for_(cv::Range(0, count),
rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes,
ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz)
);
}
-class Tree_predictor
+class Tree_predictor : public cv::ParallelLoopBody
{
private:
pCvSeq* weak;
const CvMat* missing;
const float shrinkage;
-#ifdef HAVE_TBB
- static tbb::spin_mutex SumMutex;
-#endif
+ static cv::Mutex SumMutex;
public:
Tree_predictor& operator=( const Tree_predictor& )
{ return *this; }
- virtual void operator()(const cv::BlockedRange& range) const
+ virtual void operator()(const cv::Range& range) const
{
-#ifdef HAVE_TBB
- tbb::spin_mutex::scoped_lock lock;
-#endif
CvSeqReader reader;
- int begin = range.begin();
- int end = range.end();
+ int begin = range.start;
+ int end = range.end;
int weak_count = end - begin;
CvDTree* tree;
tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value);
}
}
-#ifdef HAVE_TBB
- lock.acquire(SumMutex);
- sum[i] += tmp_sum;
- lock.release();
-#else
- sum[i] += tmp_sum;
-#endif
+
+ {
+ cv::AutoLock lock(SumMutex);
+ sum[i] += tmp_sum;
+ }
}
} // Tree_predictor::operator()
}; // class Tree_predictor
-
-#ifdef HAVE_TBB
-tbb::spin_mutex Tree_predictor::SumMutex;
-#endif
-
+cv::Mutex Tree_predictor::SumMutex;
float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
Tree_predictor predictor = Tree_predictor(weak_seq, class_count,
params.shrinkage, _sample, _missing, sum);
-//#ifdef HAVE_TBB
-// tbb::parallel_for(cv::BlockedRange(begin, end), predictor,
-// tbb::auto_partitioner());
-//#else
- cv::parallel_for(cv::BlockedRange(begin, end), predictor);
-//#endif
+ cv::parallel_for_(cv::Range(begin, end), predictor);
for (int i=0; i<class_count; ++i)
sum[i] = sum[i] /** params.shrinkage*/ + base_value;
//===========================================================================
-class Sample_predictor
+class Sample_predictor : public cv::ParallelLoopBody
{
private:
const CvGBTrees* gbt;
{}
- virtual void operator()(const cv::BlockedRange& range) const
+ virtual void operator()(const cv::Range& range) const
{
- int begin = range.begin();
- int end = range.end();
+ int begin = range.start;
+ int end = range.end;
CvMat x;
CvMat miss;
Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(),
_data->get_missing(), _sample_idx);
-//#ifdef HAVE_TBB
-// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner());
-//#else
- cv::parallel_for(cv::BlockedRange(0,n), predictor);
-//#endif
+ cv::parallel_for_(cv::Range(0,n), predictor);
int* sidx = _sample_idx ? _sample_idx->data.i : 0;
int r_step = CV_IS_MAT_CONT(response->type) ?
return result;
}
-struct P1 {
+struct P1 : cv::ParallelLoopBody {
P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
{
float* result;
int buf_sz;
- void operator()( const cv::BlockedRange& range ) const
+ void operator()( const cv::Range& range ) const
{
cv::AutoBuffer<float> buf(buf_sz);
- for(int i = range.begin(); i < range.end(); i += 1 )
+ for(int i = range.start; i < range.end; i += 1 )
{
float* neighbor_responses = &buf[0];
float* dist = neighbor_responses + 1*k;
int k1 = get_sample_count();
k1 = MIN( k1, k );
- cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
- _results, _neighbor_responses, _dist, &result)
+ cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
+ _results, _neighbor_responses, _dist, &result)
);
return result;
return result;
}
-struct predict_body {
+struct predict_body : cv::ParallelLoopBody {
predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
CvMat* _results, float* _value, int _var_count1
float* value;
int var_count1;
- void operator()( const cv::BlockedRange& range ) const
+ void operator()( const cv::Range& range ) const
{
int cls = -1;
cv::AutoBuffer<double> buffer(nclasses + var_count1);
CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
- for(int k = range.begin(); k < range.end(); k += 1 )
+ for(int k = range.start; k < range.end; k += 1 )
{
int ival;
double opt = FLT_MAX;
const int* vidx = var_idx ? var_idx->data.i : 0;
- cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
- vidx, cls_labels, results, &value, var_count
- ));
+ cv::parallel_for_(cv::Range(0, samples->rows),
+ predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
+ vidx, cls_labels, results, &value, var_count));
return value;
}
return result;
}
-struct predict_body_svm {
+struct predict_body_svm : ParallelLoopBody {
predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results)
{
pointer = _pointer;
const CvMat* samples;
CvMat* results;
- void operator()( const cv::BlockedRange& range ) const
+ void operator()( const cv::Range& range ) const
{
- for(int i = range.begin(); i < range.end(); i++ )
+ for(int i = range.start; i < range.end; i++ )
{
CvMat sample;
cvGetRow( samples, &sample, i );
float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const
{
float result = 0;
- cv::parallel_for(cv::BlockedRange(0, samples->rows),
+ cv::parallel_for_(cv::Range(0, samples->rows),
predict_body_svm(this, &result, samples, results)
);
return result;
+++ /dev/null
-Background Subtraction
-======================
-
-.. highlight:: cpp
-
-
-
-gpu::VIBE_GPU
--------------
-.. ocv:class:: gpu::VIBE_GPU
-
-Class used for background/foreground segmentation. ::
-
- class VIBE_GPU
- {
- public:
- explicit VIBE_GPU(unsigned long rngSeed = 1234567);
-
- void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
-
- void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
-
- void release();
-
- ...
- };
-
-The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [VIBE2011]_.
-
-
-
-gpu::VIBE_GPU::VIBE_GPU
------------------------
-The constructor.
-
-.. ocv:function:: gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed = 1234567)
-
- :param rngSeed: Value used to initiate a random sequence.
-
-Default constructor sets all parameters to default values.
-
-
-
-gpu::VIBE_GPU::initialize
--------------------------
-Initialize background model and allocates all inner buffers.
-
-.. ocv:function:: void gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null())
-
- :param firstFrame: First frame from video sequence.
-
- :param stream: Stream for the asynchronous version.
-
-
-
-gpu::VIBE_GPU::operator()
--------------------------
-Updates the background model and returns the foreground mask
-
-.. ocv:function:: void gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null())
-
- :param frame: Next video frame.
-
- :param fgmask: The output foreground mask as an 8-bit binary image.
-
- :param stream: Stream for the asynchronous version.
-
-
-
-gpu::VIBE_GPU::release
-----------------------
-Releases all inner buffer's memory.
-
-.. ocv:function:: void gpu::VIBE_GPU::release()
-
-
-
-
-.. [VIBE2011] O. Barnich and M. Van D Roogenbroeck. *ViBe: A universal background subtraction algorithm for video sequences*. IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
:maxdepth: 2
feature_detection
- background_subtraction
GpuMat maxPosBuffer;
};
-/*!
- * The class implements the following algorithm:
- * "ViBe: A universal background subtraction algorithm for video sequences"
- * O. Barnich and M. Van D Roogenbroeck
- * IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011
- */
-class CV_EXPORTS VIBE_GPU
-{
-public:
- //! the default constructor
- explicit VIBE_GPU(unsigned long rngSeed = 1234567);
-
- //! re-initiaization method
- void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
-
- //! the update operator
- void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
-
- //! releases all inner buffers
- void release();
-
- int nbSamples; // number of samples per pixel
- int reqMatches; // #_min
- int radius; // R
- int subsamplingFactor; // amount of random subsampling
-
-private:
- Size frameSize_;
-
- unsigned long rngSeed_;
- GpuMat randStates_;
-
- GpuMat samples_;
-};
-
} // namespace gpu
} // namespace cv
using namespace testing;
using namespace perf;
-#if defined(HAVE_XINE) || \
- defined(HAVE_GSTREAMER) || \
- defined(HAVE_QUICKTIME) || \
- defined(HAVE_AVFOUNDATION) || \
- defined(HAVE_FFMPEG) || \
- defined(WIN32) /* assume that we have ffmpeg */
-
-# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1
-#else
-# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0
-#endif
-
//////////////////////////////////////////////////////////////////////
// SURF
}
}
-//////////////////////////////////////////////////////
-// VIBE
-
-#if BUILD_WITH_VIDEO_INPUT_SUPPORT
-
-DEF_PARAM_TEST(Video_Cn, string, int);
-
-PERF_TEST_P(Video_Cn, GPU_VIBE,
- Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
- GPU_CHANNELS_1_3_4))
-{
- const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
- const int cn = GET_PARAM(1);
-
- cv::VideoCapture cap(inputFile);
- ASSERT_TRUE(cap.isOpened());
-
- cv::Mat frame;
- cap >> frame;
- ASSERT_FALSE(frame.empty());
-
- if (cn != 3)
- {
- cv::Mat temp;
- if (cn == 1)
- cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
- else
- cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
- cv::swap(temp, frame);
- }
-
- if (PERF_RUN_GPU())
- {
- cv::gpu::GpuMat d_frame(frame);
- cv::gpu::VIBE_GPU vibe;
- cv::gpu::GpuMat foreground;
-
- vibe(d_frame, foreground);
-
- for (int i = 0; i < 10; ++i)
- {
- cap >> frame;
- ASSERT_FALSE(frame.empty());
-
- if (cn != 3)
- {
- cv::Mat temp;
- if (cn == 1)
- cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
- else
- cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
- cv::swap(temp, frame);
- }
-
- d_frame.upload(frame);
-
- startTimer(); next();
- vibe(d_frame, foreground);
- stopTimer();
- }
-
- GPU_SANITY_CHECK(foreground);
- }
- else
- {
- FAIL_NO_CPU();
- }
-}
-
-#endif
-
#endif
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "opencv2/opencv_modules.hpp"
-
-#ifdef HAVE_OPENCV_GPU
-
-#include "opencv2/gpu/device/common.hpp"
-
-namespace cv { namespace gpu { namespace device
-{
- namespace vibe
- {
- void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);
-
- void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
-
- void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
- }
-}}}
-
-namespace cv { namespace gpu { namespace device
-{
- namespace vibe
- {
- __constant__ int c_nbSamples;
- __constant__ int c_reqMatches;
- __constant__ int c_radius;
- __constant__ int c_subsamplingFactor;
-
- void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor)
- {
- cudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int)) );
- cudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(int)) );
- }
-
- __device__ __forceinline__ uint nextRand(uint& state)
- {
- const unsigned int CV_RNG_COEFF = 4164903690U;
- state = state * CV_RNG_COEFF + (state >> 16);
- return state;
- }
-
- __constant__ int c_xoff[9] = {-1, 0, 1, -1, 1, -1, 0, 1, 0};
- __constant__ int c_yoff[9] = {-1, -1, -1, 0, 0, 1, 1, 1, 0};
-
- __device__ __forceinline__ int2 chooseRandomNeighbor(int x, int y, uint& randState, int count = 8)
- {
- int idx = nextRand(randState) % count;
-
- return make_int2(x + c_xoff[idx], y + c_yoff[idx]);
- }
-
- __device__ __forceinline__ uchar cvt(uchar val)
- {
- return val;
- }
- __device__ __forceinline__ uchar4 cvt(const uchar3& val)
- {
- return make_uchar4(val.x, val.y, val.z, 0);
- }
- __device__ __forceinline__ uchar4 cvt(const uchar4& val)
- {
- return val;
- }
-
- template <typename SrcT, typename SampleT>
- __global__ void init(const PtrStepSz<SrcT> frame, PtrStep<SampleT> samples, PtrStep<uint> randStates)
- {
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
- if (x >= frame.cols || y >= frame.rows)
- return;
-
- uint localState = randStates(y, x);
-
- for (int k = 0; k < c_nbSamples; ++k)
- {
- int2 np = chooseRandomNeighbor(x, y, localState, 9);
-
- np.x = ::max(0, ::min(np.x, frame.cols - 1));
- np.y = ::max(0, ::min(np.y, frame.rows - 1));
-
- SrcT pix = frame(np.y, np.x);
-
- samples(k * frame.rows + y, x) = cvt(pix);
- }
-
- randStates(y, x) = localState;
- }
-
- template <typename SrcT, typename SampleT>
- void init_caller(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
- {
- dim3 block(32, 8);
- dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
-
- cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) );
-
- init<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, (PtrStepSz<SampleT>) samples, randStates);
- cudaSafeCall( cudaGetLastError() );
-
- if (stream == 0)
- cudaSafeCall( cudaDeviceSynchronize() );
- }
-
- void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
- {
- typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream);
- static const func_t funcs[] =
- {
- 0, init_caller<uchar, uchar>, 0, init_caller<uchar3, uchar4>, init_caller<uchar4, uchar4>
- };
-
- funcs[cn](frame, samples, randStates, stream);
- }
-
- __device__ __forceinline__ int calcDist(uchar a, uchar b)
- {
- return ::abs(a - b);
- }
- __device__ __forceinline__ int calcDist(const uchar3& a, const uchar4& b)
- {
- return (::abs(a.x - b.x) + ::abs(a.y - b.y) + ::abs(a.z - b.z)) / 3;
- }
- __device__ __forceinline__ int calcDist(const uchar4& a, const uchar4& b)
- {
- return (::abs(a.x - b.x) + ::abs(a.y - b.y) + ::abs(a.z - b.z)) / 3;
- }
-
- template <typename SrcT, typename SampleT>
- __global__ void update(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStep<SampleT> samples, PtrStep<uint> randStates)
- {
- const int x = blockIdx.x * blockDim.x + threadIdx.x;
- const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
- if (x >= frame.cols || y >= frame.rows)
- return;
-
- uint localState = randStates(y, x);
-
- SrcT imgPix = frame(y, x);
-
- // comparison with the model
-
- int count = 0;
- for (int k = 0; (count < c_reqMatches) && (k < c_nbSamples); ++k)
- {
- SampleT samplePix = samples(k * frame.rows + y, x);
-
- int distance = calcDist(imgPix, samplePix);
-
- if (distance < c_radius)
- ++count;
- }
-
- // pixel classification according to reqMatches
-
- fgmask(y, x) = (uchar) (-(count < c_reqMatches));
-
- if (count >= c_reqMatches)
- {
- // the pixel belongs to the background
-
- // gets a random number between 0 and subsamplingFactor-1
- int randomNumber = nextRand(localState) % c_subsamplingFactor;
-
- // update of the current pixel model
- if (randomNumber == 0)
- {
- // random subsampling
-
- int k = nextRand(localState) % c_nbSamples;
-
- samples(k * frame.rows + y, x) = cvt(imgPix);
- }
-
- // update of a neighboring pixel model
- randomNumber = nextRand(localState) % c_subsamplingFactor;
-
- if (randomNumber == 0)
- {
- // random subsampling
-
- // chooses a neighboring pixel randomly
- int2 np = chooseRandomNeighbor(x, y, localState);
-
- np.x = ::max(0, ::min(np.x, frame.cols - 1));
- np.y = ::max(0, ::min(np.y, frame.rows - 1));
-
- // chooses the value to be replaced randomly
- int k = nextRand(localState) % c_nbSamples;
-
- samples(k * frame.rows + np.y, np.x) = cvt(imgPix);
- }
- }
-
- randStates(y, x) = localState;
- }
-
- template <typename SrcT, typename SampleT>
- void update_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
- {
- dim3 block(32, 8);
- dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
-
- cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) );
-
- update<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, (PtrStepSz<SampleT>) samples, randStates);
- cudaSafeCall( cudaGetLastError() );
-
- if (stream == 0)
- cudaSafeCall( cudaDeviceSynchronize() );
- }
-
- void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
- {
- typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream);
- static const func_t funcs[] =
- {
- 0, update_caller<uchar, uchar>, 0, update_caller<uchar3, uchar4>, update_caller<uchar4, uchar4>
- };
-
- funcs[cn](frame, fgmask, samples, randStates, stream);
- }
- }
-}}}
-
-#endif /* HAVE_OPENCV_GPU */
findScaleSpaceExtrema(gpyr, dogpyr, keypoints);
KeyPointsFilter::removeDuplicated( keypoints );
- if( !mask.empty() )
- KeyPointsFilter::runByPixelsMask( keypoints, mask );
-
if( nfeatures > 0 )
KeyPointsFilter::retainBest(keypoints, nfeatures);
//t = (double)getTickCount() - t;
kpt.pt *= scale;
kpt.size *= scale;
}
+
+ if( !mask.empty() )
+ KeyPointsFilter::runByPixelsMask( keypoints, mask );
}
else
{
}
// Multi-threaded construction of the scale-space pyramid
-struct SURFBuildInvoker
+struct SURFBuildInvoker : ParallelLoopBody
{
SURFBuildInvoker( const Mat& _sum, const vector<int>& _sizes,
const vector<int>& _sampleSteps,
traces = &_traces;
}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- for( int i=range.begin(); i<range.end(); i++ )
+ for( int i=range.start; i<range.end; i++ )
calcLayerDetAndTrace( *sum, (*sizes)[i], (*sampleSteps)[i], (*dets)[i], (*traces)[i] );
}
};
// Multi-threaded search of the scale-space pyramid for keypoints
-struct SURFFindInvoker
+struct SURFFindInvoker : ParallelLoopBody
{
SURFFindInvoker( const Mat& _sum, const Mat& _mask_sum,
const vector<Mat>& _dets, const vector<Mat>& _traces,
const vector<int>& sizes, vector<KeyPoint>& keypoints,
int octave, int layer, float hessianThreshold, int sampleStep );
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- for( int i=range.begin(); i<range.end(); i++ )
+ for( int i=range.start; i<range.end; i++ )
{
int layer = (*middleIndices)[i];
int octave = i / nOctaveLayers;
int nOctaveLayers;
float hessianThreshold;
-#ifdef HAVE_TBB
- static tbb::mutex findMaximaInLayer_m;
-#endif
+ static Mutex findMaximaInLayer_m;
};
-#ifdef HAVE_TBB
-tbb::mutex SURFFindInvoker::findMaximaInLayer_m;
-#endif
+Mutex SURFFindInvoker::findMaximaInLayer_m;
/*
if( interp_ok )
{
/*printf( "KeyPoint %f %f %d\n", point.pt.x, point.pt.y, point.size );*/
-#ifdef HAVE_TBB
- tbb::mutex::scoped_lock lock(findMaximaInLayer_m);
-#endif
+ cv::AutoLock lock(findMaximaInLayer_m);
keypoints.push_back(kpt);
}
}
}
// Calculate hessian determinant and trace samples in each layer
- parallel_for( BlockedRange(0, nTotalLayers),
- SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
+ parallel_for_( Range(0, nTotalLayers),
+ SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
// Find maxima in the determinant of the hessian
- parallel_for( BlockedRange(0, nMiddleLayers),
- SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
- sampleSteps, middleIndices, keypoints,
- nOctaveLayers, hessianThreshold) );
+ parallel_for_( Range(0, nMiddleLayers),
+ SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
+ sampleSteps, middleIndices, keypoints,
+ nOctaveLayers, hessianThreshold) );
std::sort(keypoints.begin(), keypoints.end(), KeypointGreater());
}
-struct SURFInvoker
+struct SURFInvoker : ParallelLoopBody
{
enum { ORI_RADIUS = 6, ORI_WIN = 60, PATCH_SZ = 20 };
}
}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
/* X and Y gradient wavelet data */
const int NX=2, NY=2;
int dsize = extended ? 128 : 64;
- int k, k1 = range.begin(), k2 = range.end();
+ int k, k1 = range.start, k2 = range.end;
float maxSize = 0;
for( k = k1; k < k2; k++ )
{
// we call SURFInvoker in any case, even if we do not need descriptors,
// since it computes orientation of each feature.
- parallel_for(BlockedRange(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
+ parallel_for_(Range(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
// remove keypoints that were marked for deletion
for( i = j = 0; i < N; i++ )
const char noImage2dOption [] = "-D DISABLE_IMAGE2D";
- static char SURF_OPTIONS [1024] = "";
- static bool USE_IMAGE2d = false;
+ static bool use_image2d = false;
+
static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth)
{
- char * pSURF_OPTIONS = SURF_OPTIONS;
- static bool OPTION_INIT = false;
- if(!OPTION_INIT)
+ char optBuf [100] = {0};
+ char * optBufPtr = optBuf;
+ if( !use_image2d )
{
- if( !USE_IMAGE2d )
- {
- strcat(pSURF_OPTIONS, noImage2dOption);
- pSURF_OPTIONS += strlen(noImage2dOption);
- }
-
- size_t wave_size = 0;
- queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
- std::sprintf(pSURF_OPTIONS, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
- OPTION_INIT = true;
+ strcat(optBufPtr, noImage2dOption);
+ optBufPtr += strlen(noImage2dOption);
}
- openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS);
+ cl_kernel kernel;
+ kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
+ size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
+ CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
+ sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
+ openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
}
}
}
counters.setTo(Scalar::all(0));
integral(img, surf_.sum);
- if(support_image2d())
+ use_image2d = support_image2d();
+ if(use_image2d)
{
- try
- {
- bindImgTex(img, imgTex);
- bindImgTex(surf_.sum, sumTex);
- USE_IMAGE2d = true;
- }
- catch (const cv::Exception& e)
- {
- USE_IMAGE2d = false;
- if(e.code != CL_IMAGE_FORMAT_NOT_SUPPORTED && e.code != -217)
- {
- throw e;
- }
- }
+ bindImgTex(img, imgTex);
+ bindImgTex(surf_.sum, sumTex);
+ finish();
}
maskSumTex = 0;
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-#if defined(HAVE_OPENCV_GPU)
-
-#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
-
-cv::gpu::VIBE_GPU::VIBE_GPU(unsigned long) { throw_nogpu(); }
-void cv::gpu::VIBE_GPU::initialize(const GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::VIBE_GPU::operator()(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::VIBE_GPU::release() {}
-
-#else
-
-namespace cv { namespace gpu { namespace device
-{
- namespace vibe
- {
- void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);
-
- void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
-
- void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
- }
-}}}
-
-namespace
-{
- const int defaultNbSamples = 20;
- const int defaultReqMatches = 2;
- const int defaultRadius = 20;
- const int defaultSubsamplingFactor = 16;
-}
-
-cv::gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed) :
- frameSize_(0, 0), rngSeed_(rngSeed)
-{
- nbSamples = defaultNbSamples;
- reqMatches = defaultReqMatches;
- radius = defaultRadius;
- subsamplingFactor = defaultSubsamplingFactor;
-}
-
-void cv::gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& s)
-{
- using namespace cv::gpu::device::vibe;
-
- CV_Assert(firstFrame.type() == CV_8UC1 || firstFrame.type() == CV_8UC3 || firstFrame.type() == CV_8UC4);
-
- cudaStream_t stream = StreamAccessor::getStream(s);
-
- loadConstants(nbSamples, reqMatches, radius, subsamplingFactor);
-
- frameSize_ = firstFrame.size();
-
- if (randStates_.size() != frameSize_)
- {
- cv::RNG rng(rngSeed_);
- cv::Mat h_randStates(frameSize_, CV_8UC4);
- rng.fill(h_randStates, cv::RNG::UNIFORM, 0, 255);
- randStates_.upload(h_randStates);
- }
-
- int ch = firstFrame.channels();
- int sample_ch = ch == 1 ? 1 : 4;
-
- samples_.create(nbSamples * frameSize_.height, frameSize_.width, CV_8UC(sample_ch));
-
- init_gpu(firstFrame, ch, samples_, randStates_, stream);
-}
-
-void cv::gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& s)
-{
- using namespace cv::gpu::device::vibe;
-
- CV_Assert(frame.depth() == CV_8U);
-
- int ch = frame.channels();
- int sample_ch = ch == 1 ? 1 : 4;
-
- if (frame.size() != frameSize_ || sample_ch != samples_.channels())
- initialize(frame);
-
- fgmask.create(frameSize_, CV_8UC1);
-
- update_gpu(frame, ch, fgmask, samples_, randStates_, StreamAccessor::getStream(s));
-}
-
-void cv::gpu::VIBE_GPU::release()
-{
- frameSize_ = Size(0, 0);
-
- randStates_.release();
-
- samples_.release();
-}
-
-#endif
-
-#endif // defined(HAVE_OPENCV_GPU)
testing::Values(SURF_Extended(false), SURF_Extended(true)),
testing::Values(SURF_Upright(false), SURF_Upright(true))));
-//////////////////////////////////////////////////////
-// VIBE
-
-PARAM_TEST_CASE(VIBE, cv::Size, MatType, UseRoi)
-{
-};
-
-GPU_TEST_P(VIBE, Accuracy)
-{
- const cv::Size size = GET_PARAM(0);
- const int type = GET_PARAM(1);
- const bool useRoi = GET_PARAM(2);
-
- const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
-
- cv::Mat frame = randomMat(size, type, 0.0, 100);
- cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
-
- cv::gpu::VIBE_GPU vibe;
- cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
- vibe.initialize(d_frame);
-
- for (int i = 0; i < 20; ++i)
- vibe(d_frame, d_fgmask);
-
- frame = randomMat(size, type, 160, 255);
- d_frame = loadMat(frame, useRoi);
- vibe(d_frame, d_fgmask);
-
- // now fgmask should be entirely foreground
- ASSERT_MAT_NEAR(fullfg, d_fgmask, 0);
-}
-
-INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine(
- DIFFERENT_SIZES,
- testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)),
- WHOLE_SUBMAT));
-
#endif
Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) );
Size scaledImageSize( cvRound( grayImage.cols/factor ), cvRound( grayImage.rows/factor ) );
- Size processingRectSize( scaledImageSize.width - originalWindowSize.width + 1, scaledImageSize.height - originalWindowSize.height + 1 );
+ Size processingRectSize( scaledImageSize.width - originalWindowSize.width, scaledImageSize.height - originalWindowSize.height );
if( processingRectSize.width <= 0 || processingRectSize.height <= 0 )
break;
int stripCount, stripSize;
- #ifdef HAVE_TBB
const int PTS_PER_THREAD = 1000;
stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
- #else
- stripCount = 1;
- stripSize = processingRectSize.height;
- #endif
if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates,
rejectLevels, levelWeights, outputRejectLevels ) )
// For each component perform searching
for (i = 0; i < kComponents; i++)
{
-#ifdef HAVE_TBB
int error = searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
b[i], maxXBorder, maxYBorder, scoreThreshold,
&(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
free(partsDisplacementArr);
return LATENT_SVM_SEARCH_OBJECT_FAILED;
}
-#else
- (void)numThreads;
- searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
- b[i], maxXBorder, maxYBorder, scoreThreshold,
- &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
- &(scoreArr[i]), &(partsDisplacementArr[i]));
-#endif
estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i],
filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i]));
componentIndex += (kPartFilters[i] + 1);
endif()
set(the_description "OpenCL-accelerated Computer Vision")
-ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video)
+ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
CV_EXPORTS void setBinpath(const char *path);
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
+ //returns cl_context *
CV_EXPORTS void* getoclContext();
-
+ //returns cl_command_queue *
CV_EXPORTS void* getoclCommandQueue();
//explicit call clFinish. The global command queue will be used.
//getDevice also need to be called before this function
CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
+ //returns true when global OpenCL context is initialized
+ CV_EXPORTS bool initialized();
+
//////////////////////////////// Error handling ////////////////////////
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
protected:
Context();
friend class auto_ptr<Context>;
-
+ friend bool initialized();
private:
static auto_ptr<Context> clCxt;
static int val;
//! computes element-wise product of the two arrays (c = a * b)
// supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
+ //! multiplies matrix to a number (dst = scalar * src)
+ // supports CV_32FC1 only
+ CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst);
//! computes element-wise quotient of the two arrays (c = a / b)
// supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1);
// support all C1 types
CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
+ CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
//! finds global minimum and maximum array elements and returns their values with locations
// support all C1 types
CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist);
//! only 8UC1 and 256 bins is supported now
CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst);
+
+ //! only 8UC1 is supported now
+ class CV_EXPORTS CLAHE
+ {
+ public:
+ virtual void apply(const oclMat &src, oclMat &dst) = 0;
+
+ virtual void setClipLimit(double clipLimit) = 0;
+ virtual double getClipLimit() const = 0;
+
+ virtual void setTilesGridSize(Size tileGridSize) = 0;
+ virtual Size getTilesGridSize() const = 0;
+
+ virtual void collectGarbage() = 0;
+ };
+ CV_EXPORTS Ptr<cv::ocl::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+
//! bilateralFilter
// supports 8UC1 8UC4
CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
}
//! applies non-separable 2D linear filter to the image
+ // Note, at the moment this function only works when anchor point is in the kernel center
+ // and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
+ CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
+ int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
OclCascadeClassifierBuf() :
m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
- ~OclCascadeClassifierBuf() {}
+ ~OclCascadeClassifierBuf() { release(); }
void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
std::vector<oclMat> image_sqsums;
};
-
//! computes the proximity map for the raster template and the image where the template is searched for
// Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
-
-
///////////////////////////////////////////// Canny /////////////////////////////////////////////
-
struct CV_EXPORTS CannyBuf;
-
-
-
//! compute edges of the input image using Canny operator
-
// Support CV_8UC1 only
-
CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-
CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
-
CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
-
CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
-
-
struct CV_EXPORTS CannyBuf
-
{
-
CannyBuf() : counter(NULL) {}
-
~CannyBuf()
{
release();
}
-
explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
-
{
-
create(image_size, apperture_size);
-
}
-
CannyBuf(const oclMat &dx_, const oclMat &dy_);
-
-
void create(const Size &image_size, int apperture_size = 3);
-
-
-
void release();
-
-
-
oclMat dx, dy;
-
oclMat dx_buf, dy_buf;
-
oclMat edgeBuf;
-
oclMat trackBuf1, trackBuf2;
-
void *counter;
-
Ptr<FilterEngine_GPU> filterDX, filterDY;
-
};
///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
const oclMat &src3, double beta, oclMat &dst, int flags = 0);
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
-
struct CV_EXPORTS HOGDescriptor
-
{
-
enum { DEFAULT_WIN_SIGMA = -1 };
-
enum { DEFAULT_NLEVELS = 64 };
-
enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
-
-
-
HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
-
Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
-
int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
-
double threshold_L2hys = 0.2, bool gamma_correction = true,
-
int nlevels = DEFAULT_NLEVELS);
-
-
size_t getDescriptorSize() const;
-
size_t getBlockHistogramSize() const;
-
-
-
void setSVMDetector(const vector<float> &detector);
-
-
-
static vector<float> getDefaultPeopleDetector();
-
static vector<float> getPeopleDetector48x96();
-
static vector<float> getPeopleDetector64x128();
-
-
-
void detect(const oclMat &img, vector<Point> &found_locations,
-
double hit_threshold = 0, Size win_stride = Size(),
-
Size padding = Size());
-
-
-
void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
-
double hit_threshold = 0, Size win_stride = Size(),
-
Size padding = Size(), double scale0 = 1.05,
-
int group_threshold = 2);
-
-
-
void getDescriptors(const oclMat &img, Size win_stride,
-
oclMat &descriptors,
-
int descr_format = DESCR_FORMAT_COL_BY_COL);
-
-
-
Size win_size;
-
Size block_size;
-
Size block_stride;
-
Size cell_size;
int nbins;
-
double win_sigma;
-
double threshold_L2hys;
-
bool gamma_correction;
-
int nlevels;
-
-
protected:
-
// initialize buffers; only need to do once in case of multiscale detection
-
void init_buffer(const oclMat &img, Size win_stride);
-
-
-
void computeBlockHistograms(const oclMat &img);
-
void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
-
-
-
double getWinSigma() const;
-
bool checkDetectorSize() const;
-
-
static int numPartsWithin(int size, int part_size, int stride);
-
static Size numPartsWithin(Size size, Size part_size, Size stride);
-
-
// Coefficients of the separating plane
-
float free_coef;
-
oclMat detector;
-
-
-
// Results of the last classification step
-
oclMat labels;
-
Mat labels_host;
-
-
-
// Results of the last histogram evaluation step
-
oclMat block_hists;
-
-
-
// Gradients conputation results
-
oclMat grad, qangle;
-
-
-
// scaled image
-
oclMat image_scale;
-
-
-
// effect size of input image (might be different from original size after scaling)
-
Size effect_size;
-
};
/****************************************************************************************\
* Distance *
\****************************************************************************************/
-
template<typename T>
struct CV_EXPORTS Accumulator
{
typedef T Type;
};
-
template<> struct Accumulator<unsigned char>
{
typedef float Type;
{
public:
enum DistType {L1Dist = 0, L2Dist, HammingDist};
-
explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
-
-
-
// Add descriptors to train descriptor collection
-
void add(const std::vector<oclMat> &descCollection);
-
-
-
// Get train descriptors collection
-
const std::vector<oclMat> &getTrainDescriptors() const;
-
-
-
// Clear train descriptors collection
-
void clear();
-
-
-
// Return true if there are not train descriptors in collection
-
bool empty() const;
-
-
// Return true if the matcher supports mask in match methods
-
bool isMaskSupported() const;
-
-
// Find one best match for each query descriptor
-
void matchSingle(const oclMat &query, const oclMat &train,
-
oclMat &trainIdx, oclMat &distance,
-
const oclMat &mask = oclMat());
-
-
// Download trainIdx and distance and convert it to CPU vector with DMatch
-
static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
-
// Convert trainIdx and distance to vector with DMatch
-
static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
-
-
// Find one best match for each query descriptor
-
void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
-
-
// Make gpu collection of trains and masks in suitable format for matchCollection function
-
void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
-
// Find one best match from train collection for each query descriptor
-
void matchCollection(const oclMat &query, const oclMat &trainCollection,
-
oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-
const oclMat &masks = oclMat());
-
-
// Download trainIdx, imgIdx and distance and convert it to vector with DMatch
-
static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
-
// Convert trainIdx, imgIdx and distance to vector with DMatch
-
static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
-
-
// Find one best match from train collection for each query descriptor.
-
void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
-
-
// Find k best matches for each query descriptor (in increasing order of distances)
-
void knnMatchSingle(const oclMat &query, const oclMat &train,
-
oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
-
const oclMat &mask = oclMat());
-
-
// Download trainIdx and distance and convert it to vector with DMatch
-
// compactResult is used when mask is not empty. If compactResult is false matches
-
// vector will have the same size as queryDescriptors rows. If compactResult is true
-
// matches vector will not contain matches for fully masked out query descriptors.
-
static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
-
std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
// Convert trainIdx and distance to vector with DMatch
-
static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
-
std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-
// Find k best matches for each query descriptor (in increasing order of distances).
-
// compactResult is used when mask is not empty. If compactResult is false matches
-
// vector will have the same size as queryDescriptors rows. If compactResult is true
-
// matches vector will not contain matches for fully masked out query descriptors.
-
void knnMatch(const oclMat &query, const oclMat &train,
-
std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
-
bool compactResult = false);
-
-
// Find k best matches from train collection for each query descriptor (in increasing order of distances)
-
void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
-
oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
-
const oclMat &maskCollection = oclMat());
-
-
// Download trainIdx and distance and convert it to vector with DMatch
-
// compactResult is used when mask is not empty. If compactResult is false matches
-
// vector will have the same size as queryDescriptors rows. If compactResult is true
-
// matches vector will not contain matches for fully masked out query descriptors.
-
static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
-
std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
// Convert trainIdx and distance to vector with DMatch
-
static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
-
std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-
// Find k best matches for each query descriptor (in increasing order of distances).
-
// compactResult is used when mask is not empty. If compactResult is false matches
-
// vector will have the same size as queryDescriptors rows. If compactResult is true
-
// matches vector will not contain matches for fully masked out query descriptors.
-
void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
-
const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
-
-
// Find best matches for each query descriptor which have distance less than maxDistance.
-
// nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
-
// carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
-
// because it didn't have enough memory.
-
// If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
-
// otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-
// Matches doesn't sorted.
-
void radiusMatchSingle(const oclMat &query, const oclMat &train,
-
oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
-
const oclMat &mask = oclMat());
-
-
// Download trainIdx, nMatches and distance and convert it to vector with DMatch.
-
// matches will be sorted in increasing order of distances.
-
// compactResult is used when mask is not empty. If compactResult is false matches
-
// vector will have the same size as queryDescriptors rows. If compactResult is true
-
// matches vector will not contain matches for fully masked out query descriptors.
-
static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
-
std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
// Convert trainIdx, nMatches and distance to vector with DMatch.
-
static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
-
std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-
-
// Find best matches for each query descriptor which have distance less than maxDistance
-
// in increasing order of distances).
-
void radiusMatch(const oclMat &query, const oclMat &train,
-
std::vector< std::vector<DMatch> > &matches, float maxDistance,
-
const oclMat &mask = oclMat(), bool compactResult = false);
-
-
-
// Find best matches for each query descriptor which have distance less than maxDistance.
-
// If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
-
// otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-
// Matches doesn't sorted.
-
void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
-
const std::vector<oclMat> &masks = std::vector<oclMat>());
-
-
-
// Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
-
// matches will be sorted in increasing order of distances.
-
// compactResult is used when mask is not empty. If compactResult is false matches
-
// vector will have the same size as queryDescriptors rows. If compactResult is true
-
// matches vector will not contain matches for fully masked out query descriptors.
-
static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
-
std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
// Convert trainIdx, nMatches and distance to vector with DMatch.
-
static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
-
std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
-
-
-
// Find best matches from train collection for each query descriptor which have distance less than
-
// maxDistance (in increasing order of distances).
-
void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
-
const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
-
-
-
DistType distType;
-
-
-
private:
-
std::vector<oclMat> trainDescCollection;
-
};
-
-
template <class Distance>
-
class CV_EXPORTS BruteForceMatcher_OCL;
-
-
template <typename T>
-
class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
-
{
-
public:
-
explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
-
explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
-
};
template <typename T>
-
class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
-
{
-
public:
-
explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
-
explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
-
};
template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
-
{
-
public:
-
explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
-
explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
+ };
+ class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base
+ {
+ public:
+ explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
};
+ class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
+ {
+ public:
+ explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
+ int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
+ //! return 1 rows matrix with CV_32FC2 type
+ void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
+ //! download points of type Point2f to a vector. the vector's content will be erased
+ void downloadPoints(const oclMat &points, vector<Point2f> &points_v);
- /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+ int maxCorners;
+ double qualityLevel;
+ double minDistance;
- class CV_EXPORTS PyrLKOpticalFlow
+ int blockSize;
+ bool useHarrisDetector;
+ double harrisK;
+ void releaseMemory()
+ {
+ Dx_.release();
+ Dy_.release();
+ eig_.release();
+ minMaxbuf_.release();
+ tmpCorners_.release();
+ }
+ private:
+ oclMat Dx_;
+ oclMat Dy_;
+ oclMat eig_;
+ oclMat minMaxbuf_;
+ oclMat tmpCorners_;
+ };
+ inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
+ int blockSize_, bool useHarrisDetector_, double harrisK_)
{
+ maxCorners = maxCorners_;
+ qualityLevel = qualityLevel_;
+ minDistance = minDistance_;
+ blockSize = blockSize_;
+ useHarrisDetector = useHarrisDetector_;
+ harrisK = harrisK_;
+ }
+ /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+ class CV_EXPORTS PyrLKOpticalFlow
+ {
public:
-
PyrLKOpticalFlow()
-
{
-
winSize = Size(21, 21);
-
maxLevel = 3;
-
iters = 30;
-
derivLambda = 0.5;
-
useInitialFlow = false;
-
minEigThreshold = 1e-4f;
-
getMinEigenVals = false;
-
isDeviceArch11_ = false;
-
}
-
-
void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
-
oclMat &status, oclMat *err = 0);
-
-
-
void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
-
-
-
Size winSize;
-
int maxLevel;
-
int iters;
-
double derivLambda;
-
bool useInitialFlow;
-
float minEigThreshold;
-
bool getMinEigenVals;
-
-
-
void releaseMemory()
-
{
-
dx_calcBuf_.release();
-
dy_calcBuf_.release();
-
-
prevPyr_.clear();
-
nextPyr_.clear();
-
-
dx_buf_.release();
-
dy_buf_.release();
-
}
-
-
-
private:
-
void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
-
-
-
void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
-
-
oclMat dx_calcBuf_;
-
oclMat dy_calcBuf_;
-
-
vector<oclMat> prevPyr_;
-
vector<oclMat> nextPyr_;
-
-
oclMat dx_buf_;
-
oclMat dy_buf_;
-
-
-
oclMat uPyr_[2];
-
oclMat vPyr_[2];
-
-
-
bool isDeviceArch11_;
-
};
//////////////// build warping maps ////////////////////
//! builds plane warping maps
private:
oclMat minSSD, leBuf, riBuf;
};
+
class CV_EXPORTS StereoBeliefPropagation
{
public:
std::vector<oclMat> datas;
oclMat out;
};
+
class CV_EXPORTS StereoConstantSpaceBP
{
public:
oclMat temp;
oclMat out;
};
+
+ // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
+ //
+ // see reference:
+ // [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
+ // [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
+ class CV_EXPORTS OpticalFlowDual_TVL1_OCL
+ {
+ public:
+ OpticalFlowDual_TVL1_OCL();
+
+ void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy);
+
+ void collectGarbage();
+
+ /**
+ * Time step of the numerical scheme.
+ */
+ double tau;
+
+ /**
+ * Weight parameter for the data term, attachment parameter.
+ * This is the most relevant parameter, which determines the smoothness of the output.
+ * The smaller this parameter is, the smoother the solutions we obtain.
+ * It depends on the range of motions of the images, so its value should be adapted to each image sequence.
+ */
+ double lambda;
+
+ /**
+ * Weight parameter for (u - v)^2, tightness parameter.
+ * It serves as a link between the attachment and the regularization terms.
+ * In theory, it should have a small value in order to maintain both parts in correspondence.
+ * The method is stable for a large range of values of this parameter.
+ */
+ double theta;
+
+ /**
+ * Number of scales used to create the pyramid of images.
+ */
+ int nscales;
+
+ /**
+ * Number of warpings per scale.
+ * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale.
+ * This is a parameter that assures the stability of the method.
+ * It also affects the running time, so it is a compromise between speed and accuracy.
+ */
+ int warps;
+
+ /**
+ * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
+ * A small value will yield more accurate solutions at the expense of a slower convergence.
+ */
+ double epsilon;
+
+ /**
+ * Stopping criterion iterations number used in the numerical scheme.
+ */
+ int iterations;
+
+ bool useInitialFlow;
+
+ private:
+ void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2);
+
+ std::vector<oclMat> I0s;
+ std::vector<oclMat> I1s;
+ std::vector<oclMat> u1s;
+ std::vector<oclMat> u2s;
+
+ oclMat I1x_buf;
+ oclMat I1y_buf;
+
+ oclMat I1w_buf;
+ oclMat I1wx_buf;
+ oclMat I1wy_buf;
+
+ oclMat grad_buf;
+ oclMat rho_c_buf;
+
+ oclMat p11_buf;
+ oclMat p12_buf;
+ oclMat p21_buf;
+ oclMat p22_buf;
+
+ oclMat diff_buf;
+ oclMat norm_buf;
+ };
}
}
#if defined _MSC_VER && _MSC_VER >= 1200
#include "opencv2/ocl/ocl.hpp"
#if defined __APPLE__
-#include <OpenCL/OpenCL.h>
+#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
void CV_EXPORTS releaseTexture(cl_mem& texture);
+ //Represents an image texture object
+ class CV_EXPORTS TextureCL
+ {
+ public:
+ TextureCL(cl_mem tex, int r, int c, int t)
+ : tex_(tex), rows(r), cols(c), type(t) {}
+ ~TextureCL()
+ {
+ openCLFree(tex_);
+ }
+ operator cl_mem()
+ {
+ return tex_;
+ }
+ cl_mem const tex_;
+ const int rows;
+ const int cols;
+ const int type;
+ private:
+ //disable assignment
+ void operator=(const TextureCL&);
+ };
+ // bind oclMat to OpenCL image textures and retunrs an TextureCL object
+ // note:
+ // for faster clamping, there is no buffer padding for the constructed texture
+ Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
+
// returns whether the current context supports image2d_t format or not
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
enum DEVICE_INFO
{
WAVEFRONT_SIZE, //in AMD speak
- WARP_SIZE = WAVEFRONT_SIZE, //in nvidia speak
IS_CPU_DEVICE //check if the device is CPU
};
- //info should have been pre-allocated
- void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info);
+ template<DEVICE_INFO _it, typename _ty>
+ _ty queryDeviceInfo(cl_kernel kernel = NULL);
+
+ template<>
+ int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
+ template<>
+ size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
+ template<>
+ bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
}//namespace ocl
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// Lut ////////////////////////
-TEST(lut)
+PERFTEST(lut)
{
- Mat src, lut, dst;
+ Mat src, lut, dst, ocl_dst;
ocl::oclMat d_src, d_lut, d_dst;
int all_type[] = {CV_8UC1, CV_8UC3};
gen(src, size, size, all_type[j], 0, 256);
gen(lut, 1, 256, CV_8UC1, 0, 1);
- gen(dst, size, size, all_type[j], 0, 256);
LUT(src, lut, dst);
GPU_ON;
ocl::LUT(d_src, d_lut, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_lut.upload(lut);
ocl::LUT(d_src, d_lut, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0);
}
}
}
///////////// Exp ////////////////////////
-TEST(Exp)
+PERFTEST(Exp)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; CV_32FC1";
- gen(src, size, size, CV_32FC1, 0, 256);
- gen(dst, size, size, CV_32FC1, 0, 256);
+ gen(src, size, size, CV_32FC1, 5, 16);
exp(src, dst);
GPU_ON;
ocl::exp(d_src, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::exp(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 2);
}
}
///////////// LOG ////////////////////////
-TEST(Log)
+PERFTEST(Log)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
GPU_ON;
ocl::log(d_src, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::log(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
}
}
///////////// Add ////////////////////////
-TEST(Add)
+PERFTEST(Add)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_32FC1};
CPU_ON;
add(src1, src2, dst);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
GPU_ON;
ocl::add(d_src1, d_src2, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::add(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
}
///////////// Mul ////////////////////////
-TEST(Mul)
+PERFTEST(Mul)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
- gen(dst, size, size, all_type[j], 0, 256);
-
multiply(src1, src2, dst);
GPU_ON;
ocl::multiply(d_src1, d_src2, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::multiply(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
}
///////////// Div ////////////////////////
-TEST(Div)
+PERFTEST(Div)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
- gen(dst, size, size, all_type[j], 0, 256);
-
divide(src1, src2, dst);
CPU_ON;
divide(src1, src2, dst);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
GPU_ON;
ocl::divide(d_src1, d_src2, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::divide(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
}
}
}
///////////// Absdiff ////////////////////////
-TEST(Absdiff)
+PERFTEST(Absdiff)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
absdiff(src1, src2, dst);
CPU_ON;
absdiff(src1, src2, dst);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
GPU_ON;
ocl::absdiff(d_src1, d_src2, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::absdiff(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
}
///////////// CartToPolar ////////////////////////
-TEST(CartToPolar)
+PERFTEST(CartToPolar)
{
- Mat src1, src2, dst, dst1;
+ Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
int all_type[] = {CV_32FC1};
CPU_ON;
cartToPolar(src1, src2, dst, dst1, 1);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
GPU_ON;
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
- d_dst.download(dst);
- d_dst1.download(dst1);
+ d_dst.download(ocl_dst);
+ d_dst1.download(ocl_dst1);
GPU_FULL_OFF;
+
+ double diff1 = checkNorm(ocl_dst1, dst1);
+ double diff2 = checkNorm(ocl_dst, dst);
+ double max_diff = max(diff1, diff2);
+ TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
+
}
}
}
///////////// PolarToCart ////////////////////////
-TEST(PolarToCart)
+PERFTEST(PolarToCart)
{
- Mat src1, src2, dst, dst1;
+ Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
int all_type[] = {CV_32FC1};
GPU_ON;
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
- d_dst.download(dst);
- d_dst1.download(dst1);
+ d_dst.download(ocl_dst);
+ d_dst1.download(ocl_dst1);
GPU_FULL_OFF;
+
+ double diff1 = checkNorm(ocl_dst1, dst1);
+ double diff2 = checkNorm(ocl_dst, dst);
+ double max_diff = max(diff1, diff2);
+ TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
+
}
}
}
///////////// Magnitude ////////////////////////
-TEST(magnitude)
+PERFTEST(magnitude)
{
- Mat x, y, mag;
+ Mat x, y, mag, ocl_mag;
ocl::oclMat d_x, d_y, d_mag;
int all_type[] = {CV_32FC1};
GPU_ON;
ocl::magnitude(d_x, d_y, d_mag);
- ;
GPU_OFF;
GPU_FULL_ON;
d_x.upload(x);
d_y.upload(y);
ocl::magnitude(d_x, d_y, d_mag);
- d_mag.download(mag);
+ d_mag.download(ocl_mag);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_mag, mag, 1e-5);
}
}
}
///////////// Transpose ////////////////////////
-TEST(Transpose)
+PERFTEST(Transpose)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
GPU_ON;
ocl::transpose(d_src, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::transpose(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
}
///////////// Flip ////////////////////////
-TEST(Flip)
+PERFTEST(Flip)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
GPU_ON;
ocl::flip(d_src, d_dst, 0);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::flip(d_src, d_dst, 0);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
}
///////////// minMax ////////////////////////
-TEST(minMax)
+PERFTEST(minMax)
{
Mat src;
ocl::oclMat d_src;
- double min_val, max_val;
+ double min_val = 0.0, max_val = 0.0;
+ double min_val_ = 0.0, max_val_ = 0.0;
Point min_loc, max_loc;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
d_src.upload(src);
WARMUP_ON;
- ocl::minMax(d_src, &min_val, &max_val);
+ ocl::minMax(d_src, &min_val_, &max_val_);
WARMUP_OFF;
+ if(EeceptDoubleEQ<double>(max_val_, max_val) && EeceptDoubleEQ<double>(min_val_, min_val))
+ TestSystem::instance().setAccurate(1, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
+ else
+ TestSystem::instance().setAccurate(0, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
+
GPU_ON;
ocl::minMax(d_src, &min_val, &max_val);
- ;
GPU_OFF;
GPU_FULL_ON;
}
///////////// minMaxLoc ////////////////////////
-TEST(minMaxLoc)
+PERFTEST(minMaxLoc)
{
Mat src;
ocl::oclMat d_src;
- double min_val, max_val;
+ double min_val = 0.0, max_val = 0.0;
+ double min_val_ = 0.0, max_val_ = 0.0;
Point min_loc, max_loc;
+ Point min_loc_, max_loc_;
int all_type[] = {CV_8UC1, CV_32FC1};
std::string type_name[] = {"CV_8UC1", "CV_32FC1"};
d_src.upload(src);
WARMUP_ON;
- ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
+ ocl::minMaxLoc(d_src, &min_val_, &max_val_, &min_loc_, &max_loc_);
WARMUP_OFF;
+ double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.;
+ if(src.depth() == 0)
+ {
+ minlocVal = src.at<unsigned char>(min_loc);
+ minlocVal_ = src.at<unsigned char>(min_loc_);
+ maxlocVal = src.at<unsigned char>(max_loc);
+ maxlocVal_ = src.at<unsigned char>(max_loc_);
+ }
+ if(src.depth() == 1)
+ {
+ minlocVal = src.at<signed char>(min_loc);
+ minlocVal_ = src.at<signed char>(min_loc_);
+ maxlocVal = src.at<signed char>(max_loc);
+ maxlocVal_ = src.at<signed char>(max_loc_);
+ }
+ if(src.depth() == 2)
+ {
+ minlocVal = src.at<unsigned short>(min_loc);
+ minlocVal_ = src.at<unsigned short>(min_loc_);
+ maxlocVal = src.at<unsigned short>(max_loc);
+ maxlocVal_ = src.at<unsigned short>(max_loc_);
+ }
+ if(src.depth() == 3)
+ {
+ minlocVal = src.at<signed short>(min_loc);
+ minlocVal_ = src.at<signed short>(min_loc_);
+ maxlocVal = src.at<signed short>(max_loc);
+ maxlocVal_ = src.at<signed short>(max_loc_);
+ }
+ if(src.depth() == 4)
+ {
+ minlocVal = src.at<int>(min_loc);
+ minlocVal_ = src.at<int>(min_loc_);
+ maxlocVal = src.at<int>(max_loc);
+ maxlocVal_ = src.at<int>(max_loc_);
+ }
+ if(src.depth() == 5)
+ {
+ minlocVal = src.at<float>(min_loc);
+ minlocVal_ = src.at<float>(min_loc_);
+ maxlocVal = src.at<float>(max_loc);
+ maxlocVal_ = src.at<float>(max_loc_);
+ }
+ if(src.depth() == 6)
+ {
+ minlocVal = src.at<double>(min_loc);
+ minlocVal_ = src.at<double>(min_loc_);
+ maxlocVal = src.at<double>(max_loc);
+ maxlocVal_ = src.at<double>(max_loc_);
+ }
+ error0 = ::abs(minlocVal_ - minlocVal);
+ error1 = ::abs(maxlocVal_ - maxlocVal);
+ if( EeceptDoubleEQ<double>(maxlocVal_, maxlocVal)
+ &&EeceptDoubleEQ<double>(minlocVal_, minlocVal)
+ &&EeceptDoubleEQ<double>(max_val_, max_val)
+ &&EeceptDoubleEQ<double>(min_val_, min_val))
+ TestSystem::instance().setAccurate(1, 0.);
+ else
+ TestSystem::instance().setAccurate(0, max(error0, error1));
+
GPU_ON;
ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
- ;
GPU_OFF;
GPU_FULL_ON;
}
///////////// Sum ////////////////////////
-TEST(Sum)
+PERFTEST(Sum)
{
Mat src;
Scalar cpures, gpures;
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
- gen(src, size, size, all_type[j], 0, 256);
+ gen(src, size, size, all_type[j], 0, 60);
cpures = sum(src);
gpures = ocl::sum(d_src);
WARMUP_OFF;
+ vector<double> diffs(4);
+ diffs[3] = fabs(cpures[3] - gpures[3]);
+ diffs[2] = fabs(cpures[2] - gpures[2]);
+ diffs[1] = fabs(cpures[1] - gpures[1]);
+ diffs[0] = fabs(cpures[0] - gpures[0]);
+ double max_diff = *max_element(diffs.begin(), diffs.end());
+ TestSystem::instance().setAccurate(max_diff<0.1?1:0, max_diff);
+
GPU_ON;
gpures = ocl::sum(d_src);
- ;
GPU_OFF;
GPU_FULL_ON;
}
///////////// countNonZero ////////////////////////
-TEST(countNonZero)
+PERFTEST(countNonZero)
{
Mat src;
ocl::oclMat d_src;
countNonZero(src);
+ int cpures = 0, gpures = 0;
CPU_ON;
- countNonZero(src);
+ cpures = countNonZero(src);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
- ocl::countNonZero(d_src);
+ gpures = ocl::countNonZero(d_src);
WARMUP_OFF;
+ int diff = abs(cpures - gpures);
+ if(diff == 0)
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, diff);
+
GPU_ON;
ocl::countNonZero(d_src);
- ;
GPU_OFF;
GPU_FULL_ON;
}
///////////// Phase ////////////////////////
-TEST(Phase)
+PERFTEST(Phase)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_32FC1};
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
phase(src1, src2, dst, 1);
CPU_ON;
phase(src1, src2, dst, 1);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
GPU_ON;
ocl::phase(d_src1, d_src2, d_dst, 1);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::phase(d_src1, d_src2, d_dst, 1);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-2);
}
}
}
///////////// bitwise_and////////////////////////
-TEST(bitwise_and)
+PERFTEST(bitwise_and)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_8UC1, CV_32SC1};
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
bitwise_and(src1, src2, dst);
CPU_ON;
GPU_ON;
ocl::bitwise_and(d_src1, d_src2, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::bitwise_and(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
- }
-
- }
-}
-
-///////////// bitwise_or////////////////////////
-TEST(bitwise_or)
-{
- Mat src1, src2, dst;
- ocl::oclMat d_src1, d_src2, d_dst;
-
- int all_type[] = {CV_8UC1, CV_32SC1};
- std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
-
- for (int size = Min_Size; size <= Max_Size; size *= Multiple)
- {
- for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
- {
- SUBTEST << size << 'x' << size << "; " << type_name[j];
-
- gen(src1, size, size, all_type[j], 0, 256);
- gen(src2, size, size, all_type[j], 0, 256);
- gen(dst, size, size, all_type[j], 0, 256);
-
- bitwise_or(src1, src2, dst);
-
- CPU_ON;
- bitwise_or(src1, src2, dst);
- CPU_OFF;
- d_src1.upload(src1);
- d_src2.upload(src2);
-
- WARMUP_ON;
- ocl::bitwise_or(d_src1, d_src2, d_dst);
- WARMUP_OFF;
-
- GPU_ON;
- ocl::bitwise_or(d_src1, d_src2, d_dst);
- ;
- GPU_OFF;
-
- GPU_FULL_ON;
- d_src1.upload(src1);
- d_src2.upload(src2);
- ocl::bitwise_or(d_src1, d_src2, d_dst);
- d_dst.download(dst);
- GPU_FULL_OFF;
- }
-
- }
-}
-
-///////////// bitwise_xor////////////////////////
-TEST(bitwise_xor)
-{
- Mat src1, src2, dst;
- ocl::oclMat d_src1, d_src2, d_dst;
-
- int all_type[] = {CV_8UC1, CV_32SC1};
- std::string type_name[] = {"CV_8UC1", "CV_32SC1"};
-
- for (int size = Min_Size; size <= Max_Size; size *= Multiple)
- {
- for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
- {
- SUBTEST << size << 'x' << size << "; " << type_name[j];
-
- gen(src1, size, size, all_type[j], 0, 256);
- gen(src2, size, size, all_type[j], 0, 256);
- gen(dst, size, size, all_type[j], 0, 256);
-
-
- bitwise_xor(src1, src2, dst);
-
- CPU_ON;
- bitwise_xor(src1, src2, dst);
- CPU_OFF;
- d_src1.upload(src1);
- d_src2.upload(src2);
-
- WARMUP_ON;
- ocl::bitwise_xor(d_src1, d_src2, d_dst);
- WARMUP_OFF;
-
- GPU_ON;
- ocl::bitwise_xor(d_src1, d_src2, d_dst);
- ;
- GPU_OFF;
-
- GPU_FULL_ON;
- d_src1.upload(src1);
- d_src2.upload(src2);
- ocl::bitwise_xor(d_src1, d_src2, d_dst);
- d_dst.download(dst);
- GPU_FULL_OFF;
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
}
///////////// bitwise_not////////////////////////
-TEST(bitwise_not)
+PERFTEST(bitwise_not)
{
- Mat src1, dst;
+ Mat src1, dst, ocl_dst;
ocl::oclMat d_src1, d_dst;
int all_type[] = {CV_8UC1, CV_32SC1};
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
bitwise_not(src1, dst);
CPU_ON;
GPU_ON;
ocl::bitwise_not(d_src1, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::bitwise_not(d_src1, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
}
///////////// compare////////////////////////
-TEST(compare)
+PERFTEST(compare)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int CMP_EQ = 0;
gen(src2, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
compare(src1, src2, dst, CMP_EQ);
CPU_ON;
compare(src1, src2, dst, CMP_EQ);
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
GPU_ON;
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
}
}
}
///////////// pow ////////////////////////
-TEST(pow)
+PERFTEST(pow)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_32FC1};
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
- gen(src, size, size, all_type[j], 0, 100);
- gen(dst, size, size, all_type[j], 0, 100);
+ gen(src, size, size, all_type[j], 5, 16);
pow(src, -2.0, dst);
GPU_ON;
ocl::pow(d_src, -2.0, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pow(d_src, -2.0, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
}
}
}
///////////// MagnitudeSqr////////////////////////
-TEST(MagnitudeSqr)
+PERFTEST(MagnitudeSqr)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
int all_type[] = {CV_32FC1};
gen(src2, size, size, all_type[t], 0, 256);
gen(dst, size, size, all_type[t], 0, 256);
-
- for (int i = 0; i < src1.rows; ++i)
-
- for (int j = 0; j < src1.cols; ++j)
- {
- float val1 = src1.at<float>(i, j);
- float val2 = src2.at<float>(i, j);
-
- ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
-
- }
-
CPU_ON;
-
for (int i = 0; i < src1.rows; ++i)
for (int j = 0; j < src1.cols; ++j)
{
float val1 = src1.at<float>(i, j);
float val2 = src2.at<float>(i, j);
-
((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
}
-
CPU_OFF;
+
d_src1.upload(src1);
d_src2.upload(src2);
GPU_ON;
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
}
}
}
///////////// AddWeighted////////////////////////
-TEST(AddWeighted)
+PERFTEST(AddWeighted)
{
- Mat src1, src2, dst;
+ Mat src1, src2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_dst;
double alpha = 2.0, beta = 1.0, gama = 3.0;
GPU_ON;
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
}
}
}
-TEST(blend)
+PERFTEST(blend)
{
- Mat src1, src2, weights1, weights2, dst;
+ Mat src1, src2, weights1, weights2, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
GPU_ON;
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_weights1.upload(weights1);
d_weights2.upload(weights2);
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f);
}
}
}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
//////////////////// BruteForceMatch /////////////////
-TEST(BruteForceMatcher)
+PERFTEST(BruteForceMatcher)
{
Mat trainIdx_cpu;
Mat distance_cpu;
gen(train, size, desc_len, CV_32F, 0, 1);
// Output
vector< vector<DMatch> > matches(2);
+ vector< vector<DMatch> > d_matches(2);
// Init GPU matcher
ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
GPU_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
- ;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
- d_matcher.match(d_query, d_train, matches[0]);
+ d_matcher.match(d_query, d_train, d_matches[0]);
GPU_FULL_OFF;
+ int diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+ if(diff == 0)
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, diff);
+
SUBTEST << size << "; knnMatch";
matcher.knnMatch(query, train, matches, 2);
GPU_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
- ;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
- d_matcher.knnMatch(d_query, d_train, matches, 2);
+ d_matcher.knnMatch(d_query, d_train, d_matches, 2);
GPU_FULL_OFF;
+ diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+ if(diff == 0)
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, diff);
+
SUBTEST << size << "; radiusMatch";
float max_distance = 2.0f;
GPU_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
- ;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
- d_matcher.radiusMatch(d_query, d_train, matches, max_distance);
+ d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance);
GPU_FULL_OFF;
+
+ diff = abs((int)d_matches[0].size() - (int)matches[0].size());
+ if(diff == 0)
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, diff);
}
}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// Canny ////////////////////////
-TEST(Canny)
+PERFTEST(Canny)
{
Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
- Mat edges(img.size(), CV_8UC1);
+ Mat edges(img.size(), CV_8UC1), ocl_edges;
CPU_ON;
Canny(img, edges, 50.0, 100.0);
GPU_ON;
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
- ;
GPU_OFF;
GPU_FULL_ON;
d_img.upload(img);
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
- d_edges.download(edges);
+ d_edges.download(ocl_edges);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2);
}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// cvtColor////////////////////////
-TEST(cvtColor)
+PERFTEST(cvtColor)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC4};
GPU_ON;
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExceptedMatSimilar(dst, ocl_dst, 1e-5);
}
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// columnSum////////////////////////
-TEST(columnSum)
+PERFTEST(columnSum)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
CPU_ON;
dst.create(src.size(), src.type());
+ for (int j = 0; j < src.cols; j++)
+ dst.at<float>(0, j) = src.at<float>(0, j);
for (int i = 1; i < src.rows; ++i)
- {
for (int j = 0; j < src.cols; ++j)
- {
- dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j);
- }
- }
-
+ dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
CPU_OFF;
d_src.upload(src);
+
WARMUP_ON;
ocl::columnSum(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::columnSum(d_src, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::columnSum(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1);
}
}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// dft ////////////////////////
-TEST(dft)
+PERFTEST(dft)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
- int all_type[] = {CV_32FC1, CV_32FC2};
- std::string type_name[] = {"CV_32FC1", "CV_32FC2"};
+ int all_type[] = {CV_32FC2};
+ std::string type_name[] = {"CV_32FC2"};
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
GPU_ON;
ocl::dft(d_src, d_dst, Size(size, size));
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::dft(d_src, d_dst, Size(size, size));
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, src.size().area() * 1e-4);
}
}
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// Blur////////////////////////
-TEST(Blur)
+PERFTEST(Blur)
{
- Mat src1, dst;
+ Mat src1, dst, ocl_dst;
ocl::oclMat d_src1, d_dst;
Size ksize = Size(3, 3);
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
blur(src1, dst, ksize, Point(-1, -1), bordertype);
CPU_ON;
GPU_ON;
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
}
}
}
///////////// Laplacian////////////////////////
-TEST(Laplacian)
+PERFTEST(Laplacian)
{
- Mat src1, dst;
+ Mat src1, dst, ocl_dst;
ocl::oclMat d_src1, d_dst;
int ksize = 3;
gen(src1, size, size, all_type[j], 0, 256);
gen(dst, size, size, all_type[j], 0, 256);
-
Laplacian(src1, dst, -1, ksize, 1);
CPU_ON;
GPU_ON;
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
}
///////////// Erode ////////////////////
-TEST(Erode)
+PERFTEST(Erode)
{
- Mat src, dst, ker;
+ Mat src, dst, ker, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
GPU_ON;
ocl::erode(d_src, d_dst, ker);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::erode(d_src, d_dst, ker);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
}
}
///////////// Sobel ////////////////////////
-TEST(Sobel)
+PERFTEST(Sobel)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int dx = 1;
GPU_ON;
ocl::Sobel(d_src, d_dst, -1, dx, dy);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::Sobel(d_src, d_dst, -1, dx, dy);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
}
}
}
///////////// Scharr ////////////////////////
-TEST(Scharr)
+PERFTEST(Scharr)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int dx = 1;
GPU_ON;
ocl::Scharr(d_src, d_dst, -1, dx, dy);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::Scharr(d_src, d_dst, -1, dx, dy);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
}
}
}
///////////// GaussianBlur ////////////////////////
-TEST(GaussianBlur)
+PERFTEST(GaussianBlur)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
- gen(src, size, size, all_type[j], 0, 256);
+ gen(src, size, size, all_type[j], 5, 16);
GaussianBlur(src, dst, Size(9, 9), 0);
GPU_ON;
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
}
}
}
///////////// filter2D////////////////////////
-TEST(filter2D)
+PERFTEST(filter2D)
{
Mat src;
{
gen(src, size, size, all_type[j], 0, 256);
- for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1)
- {
- SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
+ const int ksize = 3;
- Mat kernel;
- gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
+ SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
- Mat dst;
- cv::filter2D(src, dst, -1, kernel);
+ Mat kernel;
+ gen(kernel, ksize, ksize, CV_32SC1, -3.0, 3.0);
- CPU_ON;
- cv::filter2D(src, dst, -1, kernel);
- CPU_OFF;
+ Mat dst, ocl_dst;
- ocl::oclMat d_src(src);
- ocl::oclMat d_dst;
+ cv::filter2D(src, dst, -1, kernel);
- WARMUP_ON;
- ocl::filter2D(d_src, d_dst, -1, kernel);
- WARMUP_OFF;
+ CPU_ON;
+ cv::filter2D(src, dst, -1, kernel);
+ CPU_OFF;
- GPU_ON;
- ocl::filter2D(d_src, d_dst, -1, kernel);
- ;
- GPU_OFF;
+ ocl::oclMat d_src(src), d_dst;
+
+ WARMUP_ON;
+ ocl::filter2D(d_src, d_dst, -1, kernel);
+ WARMUP_OFF;
+
+ GPU_ON;
+ ocl::filter2D(d_src, d_dst, -1, kernel);
+ GPU_OFF;
+
+ GPU_FULL_ON;
+ d_src.upload(src);
+ ocl::filter2D(d_src, d_dst, -1, kernel);
+ d_dst.download(ocl_dst);
+ GPU_FULL_OFF;
- GPU_FULL_ON;
- d_src.upload(src);
- ocl::filter2D(d_src, d_dst, -1, kernel);
- d_dst.download(dst);
- GPU_FULL_OFF;
- }
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// gemm ////////////////////////
-TEST(gemm)
+PERFTEST(gemm)
{
- Mat src1, src2, src3, dst;
+ Mat src1, src2, src3, dst, ocl_dst;
ocl::oclMat d_src1, d_src2, d_src3, d_dst;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
GPU_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src2.upload(src2);
d_src3.upload(src3);
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4);
}
}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
}
}
-TEST(Haar)
+PERFTEST(Haar)
{
Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
CPU_OFF;
+
+ vector<Rect> oclfaces;
ocl::CascadeClassifier_GPU faceCascade;
if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml")))
ocl::oclMat d_img(img);
- faces.clear();
-
WARMUP_ON;
- faceCascade.detectMultiScale(d_img, faces,
+ faceCascade.detectMultiScale(d_img, oclfaces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
WARMUP_OFF;
+ if(faces.size() == oclfaces.size())
+ TestSystem::instance().setAccurate(1, 0);
+ else
+ TestSystem::instance().setAccurate(0, abs((int)faces.size() - (int)oclfaces.size()));
+
faces.clear();
GPU_ON;
- faceCascade.detectMultiScale(d_img, faces,
+ faceCascade.detectMultiScale(d_img, oclfaces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
- ;
GPU_OFF;
GPU_FULL_ON;
d_img.upload(img);
- faceCascade.detectMultiScale(d_img, faces,
+ faceCascade.detectMultiScale(d_img, oclfaces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
GPU_FULL_OFF;
}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// HOG////////////////////////
-TEST(HOG)
+bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
+{
+ return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
+ (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
+}
+
+PERFTEST(HOG)
{
Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE);
cv::HOGDescriptor hog;
hog.setSVMDetector(hog.getDefaultPeopleDetector());
std::vector<cv::Rect> found_locations;
+ std::vector<cv::Rect> d_found_locations;
SUBTEST << 768 << 'x' << 576 << "; road.png";
d_src.upload(src);
WARMUP_ON;
- ocl_hog.detectMultiScale(d_src, found_locations);
+ ocl_hog.detectMultiScale(d_src, d_found_locations);
WARMUP_OFF;
+
+ // Ground-truth rectangular people window
+ cv::Rect win1_64x128(231, 190, 72, 144);
+ cv::Rect win2_64x128(621, 156, 97, 194);
+ cv::Rect win1_48x96(238, 198, 63, 126);
+ cv::Rect win2_48x96(619, 161, 92, 185);
+ cv::Rect win3_48x96(488, 136, 56, 112);
+
+ // Compare whether ground-truth windows are detected and compare the number of windows detected.
+ std::vector<int> d_comp(4);
+ std::vector<int> comp(4);
+ for(int i = 0; i < (int)d_comp.size(); i++)
+ {
+ d_comp[i] = 0;
+ comp[i] = 0;
+ }
+
+ int threshold = 10;
+ int val = 32;
+ d_comp[0] = (int)d_found_locations.size();
+ comp[0] = (int)found_locations.size();
+
+ cv::Size winSize = hog.winSize;
+
+ if (winSize == cv::Size(48, 96))
+ {
+ for(int i = 0; i < (int)d_found_locations.size(); i++)
+ {
+ if (match_rect(d_found_locations[i], win1_48x96, threshold))
+ d_comp[1] = val;
+ if (match_rect(d_found_locations[i], win2_48x96, threshold))
+ d_comp[2] = val;
+ if (match_rect(d_found_locations[i], win3_48x96, threshold))
+ d_comp[3] = val;
+ }
+ for(int i = 0; i < (int)found_locations.size(); i++)
+ {
+ if (match_rect(found_locations[i], win1_48x96, threshold))
+ comp[1] = val;
+ if (match_rect(found_locations[i], win2_48x96, threshold))
+ comp[2] = val;
+ if (match_rect(found_locations[i], win3_48x96, threshold))
+ comp[3] = val;
+ }
+ }
+ else if (winSize == cv::Size(64, 128))
+ {
+ for(int i = 0; i < (int)d_found_locations.size(); i++)
+ {
+ if (match_rect(d_found_locations[i], win1_64x128, threshold))
+ d_comp[1] = val;
+ if (match_rect(d_found_locations[i], win2_64x128, threshold))
+ d_comp[2] = val;
+ }
+ for(int i = 0; i < (int)found_locations.size(); i++)
+ {
+ if (match_rect(found_locations[i], win1_64x128, threshold))
+ comp[1] = val;
+ if (match_rect(found_locations[i], win2_64x128, threshold))
+ comp[2] = val;
+ }
+ }
+
+ cv::Mat gpu_rst(d_comp), cpu_rst(comp);
+ TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3);
GPU_ON;
ocl_hog.detectMultiScale(d_src, found_locations);
- ;
GPU_OFF;
GPU_FULL_ON;
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// equalizeHist ////////////////////////
-TEST(equalizeHist)
+PERFTEST(equalizeHist)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1};
std::string type_name[] = {"CV_8UC1"};
GPU_ON;
ocl::equalizeHist(d_src, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::equalizeHist(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.1);
}
}
}
/////////// CopyMakeBorder //////////////////////
-TEST(CopyMakeBorder)
+PERFTEST(CopyMakeBorder)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_dst;
int bordertype = BORDER_CONSTANT;
GPU_ON;
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
}
///////////// cornerMinEigenVal ////////////////////////
-TEST(cornerMinEigenVal)
+PERFTEST(cornerMinEigenVal)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_dst;
int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4);
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
-
gen(src, size, size, all_type[j], 0, 256);
cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
GPU_ON;
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
}
///////////// cornerHarris ////////////////////////
-TEST(cornerHarris)
+PERFTEST(cornerHarris)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_32FC1};
GPU_ON;
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
}
///////////// integral ////////////////////////
-TEST(integral)
+PERFTEST(integral)
{
- Mat src, sum;
+ Mat src, sum, ocl_sum;
ocl::oclMat d_src, d_sum, d_buf;
int all_type[] = {CV_8UC1};
GPU_ON;
ocl::integral(d_src, d_sum);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::integral(d_src, d_sum);
- d_sum.download(sum);
+ d_sum.download(ocl_sum);
GPU_FULL_OFF;
+
+ if(sum.type() == ocl_sum.type()) //we won't test accuracy when cpu function overlow
+ TestSystem::instance().ExpectedMatNear(sum, ocl_sum, 0.0);
+
}
}
}
///////////// WarpAffine ////////////////////////
-TEST(WarpAffine)
+PERFTEST(WarpAffine)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
static const double coeffs[2][3] =
{
- {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
- {sin(3.14 / 6), cos(3.14 / 6), -100.0}
+ {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+ {sin(CV_PI / 6), cos(CV_PI / 6), -100.0}
};
Mat M(2, 3, CV_64F, (void *)coeffs);
int interpolation = INTER_NEAREST;
GPU_ON;
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
}
///////////// WarpPerspective ////////////////////////
-TEST(WarpPerspective)
+PERFTEST(WarpPerspective)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
static const double coeffs[3][3] =
{
- {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
- {sin(3.14 / 6), cos(3.14 / 6), -100.0},
+ {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+ {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
{0.0, 0.0, 1.0}
};
Mat M(3, 3, CV_64F, (void *)coeffs);
- int interpolation = INTER_NEAREST;
+ int interpolation = INTER_LINEAR;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
GPU_ON;
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
}
///////////// resize ////////////////////////
-TEST(resize)
+PERFTEST(resize)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
GPU_ON;
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
GPU_ON;
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
}
///////////// threshold////////////////////////
-TEST(threshold)
+PERFTEST(threshold)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
-
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY";
GPU_ON;
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
GPU_ON;
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
- }
-}
-///////////// meanShiftFiltering////////////////////////
-TEST(meanShiftFiltering)
-{
- int sp = 10, sr = 10;
- Mat src, dst;
-
- ocl::oclMat d_src, d_dst;
-
- for (int size = Min_Size; size <= Max_Size; size *= Multiple)
- {
- SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";
-
- gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256));
- pyrMeanShiftFiltering(src, dst, sp, sr);
-
- CPU_ON;
- pyrMeanShiftFiltering(src, dst, sp, sr);
- CPU_OFF;
-
- gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
-
- d_src.upload(src);
-
- WARMUP_ON;
- ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
- WARMUP_OFF;
-
- GPU_ON;
- ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
- ;
- GPU_OFF;
-
- GPU_FULL_ON;
- d_src.upload(src);
- ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
- d_dst.download(dst);
- GPU_FULL_OFF;
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
}
}
-///////////// meanShiftProc////////////////////////
+///////////// meanShiftFiltering////////////////////////
COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
{
c1 = sptr[1];
c2 = sptr[2];
c3 = sptr[3];
-
// iterate meanshift procedure
- for (iter = 0; iter < maxIter; iter++)
+ for(iter = 0; iter < maxIter; iter++ )
{
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
int maxy = y0 + sp;
//deal with the image boundary
- if (minx < 0)
- {
- minx = 0;
- }
-
- if (miny < 0)
- {
- miny = 0;
- }
-
- if (maxx >= size.width)
- {
- maxx = size.width - 1;
- }
-
- if (maxy >= size.height)
- {
- maxy = size.height - 1;
- }
-
- if (iter == 0)
+ if(minx < 0) minx = 0;
+ if(miny < 0) miny = 0;
+ if(maxx >= size.width) maxx = size.width - 1;
+ if(maxy >= size.height) maxy = size.height - 1;
+ if(iter == 0)
{
pstart = sptr;
}
{
pstart = pstart + revy * sstep + (revx << 2); //point to the new position
}
-
ptr = pstart;
ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
- for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
+ for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
{
int rowCount = 0;
int x = minx;
#if CV_ENABLE_UNROLLED
-
- for (; x + 4 <= maxx; x += 4, ptr += 16)
+ for( ; x + 4 <= maxx; x += 4, ptr += 16)
{
int t0, t1, t2;
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
-
- if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
sx += x;
rowCount++;
}
-
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
-
- if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
sx += x + 1;
rowCount++;
}
-
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
-
- if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
sx += x + 2;
rowCount++;
}
-
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
-
- if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
rowCount++;
}
}
-
#endif
-
- for (; x <= maxx; x++, ptr += 4)
+ for(; x <= maxx; x++, ptr += 4)
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
-
- if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
rowCount++;
}
}
-
- if (rowCount == 0)
- {
+ if(rowCount == 0)
continue;
- }
-
count += rowCount;
sy += y * rowCount;
}
- if (count == 0)
- {
+ if( count == 0 )
break;
- }
int x1 = sx / count;
int y1 = sy / count;
s2 = s2 / count;
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
- tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
+ tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
//revise the pointer corresponding to the new (y0,x0)
revx = x1 - x0;
c1 = s1;
c2 = s2;
- if (stopFlag)
- {
+ if( stopFlag )
break;
- }
} //for iter
dptr[0] = (uchar)c0;
return coor;
}
-void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
+static void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit)
{
+ if( src_roi.empty() )
+ CV_Error( CV_StsBadArg, "The input image is empty" );
+ if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
+ CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
+
+ dst_roi.create(src_roi.size(), src_roi.type());
+
+ CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
+ CV_Assert( !(dst_roi.step & 0x3) );
+
+ if( !(crit.type & cv::TermCriteria::MAX_ITER) )
+ crit.maxCount = 5;
+ int maxIter = std::min(std::max(crit.maxCount, 1), 100);
+ float eps;
+ if( !(crit.type & cv::TermCriteria::EPS) )
+ eps = 1.f;
+ eps = (float)std::max(crit.epsilon, 0.0);
+
+ int tab[512];
+ for(int i = 0; i < 512; i++)
+ tab[i] = (i - 255) * (i - 255);
+ uchar *sptr = src_roi.data;
+ uchar *dptr = dst_roi.data;
+ int sstep = (int)src_roi.step;
+ int dstep = (int)dst_roi.step;
+ cv::Size size = src_roi.size();
+
+ for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
+ dptr += dstep - (size.width << 2))
+ {
+ for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4)
+ {
+ do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
+ }
+ }
+}
+
+PERFTEST(meanShiftFiltering)
+{
+ int sp = 5, sr = 6;
+ Mat src, dst, ocl_dst;
+
+ ocl::oclMat d_src, d_dst;
+
+ for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+ {
+ SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";
+
+ gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
+
+ cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1);
+
+ meanShiftFiltering_(src, dst, sp, sr, crit);
+
+ CPU_ON;
+ meanShiftFiltering_(src, dst, sp, sr, crit);
+ CPU_OFF;
+
+ d_src.upload(src);
+
+ WARMUP_ON;
+ ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
+ WARMUP_OFF;
+
+ GPU_ON;
+ ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
+ GPU_OFF;
+
+ GPU_FULL_ON;
+ d_src.upload(src);
+ ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
+ d_dst.download(ocl_dst);
+ GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
+ }
+}
+
+void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
+{
if (src_roi.empty())
{
CV_Error(CV_StsBadArg, "The input image is empty");
}
-
if (src_roi.depth() != CV_8U || src_roi.channels() != 4)
{
CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
}
+ dst_roi.create(src_roi.size(), src_roi.type());
+ dstCoor_roi.create(src_roi.size(), CV_16SC2);
+
CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
(src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
CV_Assert(!(dstCoor_roi.step & 0x3));
}
}
-TEST(meanShiftProc)
+PERFTEST(meanShiftProc)
{
- Mat src, dst, dstCoor_roi;
- ocl::oclMat d_src, d_dst, d_dstCoor_roi;
+ Mat src;
+ vector<Mat> dst(2), ocl_dst(2);
+ ocl::oclMat d_src, d_dst, d_dstCoor;
TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 ";
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
- gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
- gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
- meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
+ meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
CPU_ON;
- meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
+ meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
- ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
+ ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
WARMUP_OFF;
GPU_ON;
- ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
- ;
+ ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
- ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
- d_dst.download(dst);
- d_dstCoor_roi.download(dstCoor_roi);
+ ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
+ d_dst.download(ocl_dst[0]);
+ d_dstCoor.download(ocl_dst[1]);
GPU_FULL_OFF;
+ vector<double> eps(2, 0.);
+ TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
}
}
///////////// remap////////////////////////
-TEST(remap)
+PERFTEST(remap)
{
- Mat src, dst, xmap, ymap;
+ Mat src, dst, xmap, ymap, ocl_dst;
ocl::oclMat d_src, d_dst, d_xmap, d_ymap;
int all_type[] = {CV_8UC1, CV_8UC4};
}
}
-
remap(src, dst, xmap, ymap, interpolation, borderMode);
CPU_ON;
GPU_ON;
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 2.0);
}
}
-}
\ No newline at end of file
+}
+///////////// CLAHE ////////////////////////
+PERFTEST(CLAHE)
+{
+ Mat src, dst, ocl_dst;
+ cv::ocl::oclMat d_src, d_dst;
+ int all_type[] = {CV_8UC1};
+ std::string type_name[] = {"CV_8UC1"};
+
+ double clipLimit = 40.0;
+
+ cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(clipLimit);
+ cv::Ptr<cv::ocl::CLAHE> d_clahe = cv::ocl::createCLAHE(clipLimit);
+
+ for (int size = Min_Size; size <= Max_Size; size *= Multiple)
+ {
+ for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+ {
+ SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+ gen(src, size, size, all_type[j], 0, 256);
+
+ CPU_ON;
+ clahe->apply(src, dst);
+ CPU_OFF;
+
+ d_src.upload(src);
+
+ WARMUP_ON;
+ d_clahe->apply(d_src, d_dst);
+ WARMUP_OFF;
+
+ ocl_dst = d_dst;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
+
+ GPU_ON;
+ d_clahe->apply(d_src, d_dst);
+ GPU_OFF;
+
+ GPU_FULL_ON;
+ d_src.upload(src);
+ d_clahe->apply(d_src, d_dst);
+ d_dst.download(dst);
+ GPU_FULL_OFF;
+ }
+ }
+}
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// ocl::oclMat d_src(src), d_templ(templ), d_dst;
// ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
//}
-TEST(matchTemplate)
+PERFTEST(matchTemplate)
{
//InitMatchTemplate();
-
- Mat src, templ, dst;
+ Mat src, templ, dst, ocl_dst;
int templ_size = 5;
-
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
int all_type[] = {CV_32FC1, CV_32FC4};
matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_OFF;
- ocl::oclMat d_src(src), d_templ, d_dst;
-
- d_templ.upload(templ);
+ ocl::oclMat d_src(src), d_templ(templ), d_dst;
WARMUP_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
}
}
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
}
}
}
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// ConvertTo////////////////////////
-TEST(ConvertTo)
+PERFTEST(ConvertTo)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
GPU_ON;
d_src.convertTo(d_dst, CV_32FC1);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.convertTo(d_dst, CV_32FC1);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
}
///////////// copyTo////////////////////////
-TEST(copyTo)
+PERFTEST(copyTo)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
GPU_ON;
d_src.copyTo(d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_src.copyTo(d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
}
///////////// setTo////////////////////////
-TEST(setTo)
+PERFTEST(setTo)
{
- Mat src, dst;
+ Mat src, ocl_src;
Scalar val(1, 2, 3, 4);
- ocl::oclMat d_src, d_dst;
+ ocl::oclMat d_src;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
d_src.setTo(val);
WARMUP_OFF;
- GPU_ON;
+ d_src.download(ocl_src);
+ TestSystem::instance().ExpectedMatNear(src, ocl_src, 1.0);
+
+ GPU_ON;;
d_src.setTo(val);
- ;
GPU_OFF;
GPU_FULL_ON;
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// norm////////////////////////
-TEST(norm)
+PERFTEST(norm)
{
- Mat src, buf;
- ocl::oclMat d_src, d_buf;
-
+ Mat src1, src2, ocl_src1;
+ ocl::oclMat d_src1, d_src2;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
- gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
- gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+ gen(src1, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
+ gen(src2, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
- norm(src, NORM_INF);
+ norm(src1, src2, NORM_INF);
CPU_ON;
- norm(src, NORM_INF);
+ norm(src1, src2, NORM_INF);
CPU_OFF;
- d_src.upload(src);
- d_buf.upload(buf);
+ d_src1.upload(src1);
+ d_src2.upload(src2);
WARMUP_ON;
- ocl::norm(d_src, d_buf, NORM_INF);
+ ocl::norm(d_src1, d_src2, NORM_INF);
WARMUP_OFF;
+ d_src1.download(ocl_src1);
+ TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);
+
GPU_ON;
- ocl::norm(d_src, d_buf, NORM_INF);
- ;
+ ocl::norm(d_src1, d_src2, NORM_INF);
GPU_OFF;
GPU_FULL_ON;
- d_src.upload(src);
- ocl::norm(d_src, d_buf, NORM_INF);
+ d_src1.upload(src1);
+ d_src2.upload(src2);
+ ocl::norm(d_src1, d_src2, NORM_INF);
GPU_FULL_OFF;
}
}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// PyrLKOpticalFlow ////////////////////////
-TEST(PyrLKOpticalFlow)
+PERFTEST(PyrLKOpticalFlow)
{
- std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"};
- std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"};
+ std::string images1[] = {"rubberwhale1.png", "basketball1.png"};
+ std::string images2[] = {"rubberwhale2.png", "basketball2.png"};
for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++)
{
SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
else
SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
- Mat nextPts_cpu;
- Mat status_cpu;
+ Mat ocl_nextPts;
+ Mat ocl_status;
vector<Point2f> pts;
goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
GPU_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
- ;
GPU_OFF;
GPU_FULL_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
if (!d_nextPts.empty())
- {
- d_nextPts.download(nextPts_cpu);
- }
+ d_nextPts.download(ocl_nextPts);
if (!d_status.empty())
+ d_status.download(ocl_status);
+ GPU_FULL_OFF;
+
+ size_t mismatch = 0;
+ for (int i = 0; i < (int)nextPts.size(); ++i)
{
- d_status.download(status_cpu);
+ if(status[i] != ocl_status.at<unsigned char>(0, i)){
+ mismatch++;
+ continue;
+ }
+ if(status[i]){
+ Point2f gpu_rst = ocl_nextPts.at<Point2f>(0, i);
+ Point2f cpu_rst = nextPts[i];
+ if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.)
+ mismatch++;
+ }
}
-
- GPU_FULL_OFF;
+ double ratio = (double)mismatch / (double)nextPts.size();
+ if(ratio < .02)
+ TestSystem::instance().setAccurate(1, ratio);
+ else
+ TestSystem::instance().setAccurate(0, ratio);
}
}
}
+
+
+PERFTEST(tvl1flow)
+{
+ cv::Mat frame0 = imread("rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+ assert(!frame0.empty());
+
+ cv::Mat frame1 = imread("rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+ assert(!frame1.empty());
+
+ cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
+ cv::ocl::oclMat d_flowx(frame0.size(), CV_32FC1);
+ cv::ocl::oclMat d_flowy(frame1.size(), CV_32FC1);
+
+ cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+ cv::Mat flow;
+
+
+ SUBTEST << frame0.cols << 'x' << frame0.rows << "; rubberwhale1.png; "<<frame1.cols<<'x'<<frame1.rows<<"; rubberwhale2.png";
+
+ alg->calc(frame0, frame1, flow);
+
+ CPU_ON;
+ alg->calc(frame0, frame1, flow);
+ CPU_OFF;
+
+ cv::Mat gold[2];
+ cv::split(flow, gold);
+
+ cv::ocl::oclMat d0(frame0.size(), CV_32FC1);
+ d0.upload(frame0);
+ cv::ocl::oclMat d1(frame1.size(), CV_32FC1);
+ d1.upload(frame1);
+
+ WARMUP_ON;
+ d_alg(d0, d1, d_flowx, d_flowy);
+ WARMUP_OFF;
+/*
+ double diff1 = 0.0, diff2 = 0.0;
+ if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1
+ &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1)
+ TestSystem::instance().setAccurate(1);
+ else
+ TestSystem::instance().setAccurate(0);
+
+ TestSystem::instance().setDiff(diff1);
+ TestSystem::instance().setDiff(diff2);
+*/
+
+
+ GPU_ON;
+ d_alg(d0, d1, d_flowx, d_flowy);
+ d_alg.collectGarbage();
+ GPU_OFF;
+
+
+ cv::Mat flowx, flowy;
+
+ GPU_FULL_ON;
+ d0.upload(frame0);
+ d1.upload(frame1);
+ d_alg(d0, d1, d_flowx, d_flowy);
+ d_alg.collectGarbage();
+ d_flowx.download(flowx);
+ d_flowy.download(flowy);
+ GPU_FULL_OFF;
+
+ TestSystem::instance().ExceptedMatSimilar(gold[0], flowx, 3e-3);
+ TestSystem::instance().ExceptedMatSimilar(gold[1], flowy, 3e-3);
+}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// pyrDown //////////////////////
-TEST(pyrDown)
+PERFTEST(pyrDown)
{
- Mat src, dst;
+ Mat src, dst, ocl_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
GPU_ON;
ocl::pyrDown(d_src, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pyrDown(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, dst.depth() == CV_32F ? 1e-4f : 1.0f);
+ }
+ }
+}
+
+///////////// pyrUp ////////////////////////
+PERFTEST(pyrUp)
+{
+ Mat src, dst, ocl_dst;
+ int all_type[] = {CV_8UC1, CV_8UC4};
+ std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
+
+ for (int size = 500; size <= 2000; size *= 2)
+ {
+ for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
+ {
+ SUBTEST << size << 'x' << size << "; " << type_name[j] ;
+
+ gen(src, size, size, all_type[j], 0, 256);
+
+ pyrUp(src, dst);
+
+ CPU_ON;
+ pyrUp(src, dst);
+ CPU_OFF;
+
+ ocl::oclMat d_src(src);
+ ocl::oclMat d_dst;
+
+ WARMUP_ON;
+ ocl::pyrUp(d_src, d_dst);
+ WARMUP_OFF;
+
+ GPU_ON;
+ ocl::pyrUp(d_src, d_dst);
+ GPU_OFF;
+
+ GPU_FULL_ON;
+ d_src.upload(src);
+ ocl::pyrUp(d_src, d_dst);
+ d_dst.download(ocl_dst);
+ GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0));
}
}
}
\ No newline at end of file
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-// Fangfang Bai, fangfang@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include "precomp.hpp"
-
-///////////// pyrUp ////////////////////////
-TEST(pyrUp)
-{
- Mat src, dst;
- int all_type[] = {CV_8UC1, CV_8UC4};
- std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
-
- for (int size = 500; size <= 2000; size *= 2)
- {
- for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
- {
- SUBTEST << size << 'x' << size << "; " << type_name[j] ;
-
- gen(src, size, size, all_type[j], 0, 256);
-
- pyrUp(src, dst);
-
- CPU_ON;
- pyrUp(src, dst);
- CPU_OFF;
-
- ocl::oclMat d_src(src);
- ocl::oclMat d_dst;
-
- WARMUP_ON;
- ocl::pyrUp(d_src, d_dst);
- WARMUP_OFF;
-
- GPU_ON;
- ocl::pyrUp(d_src, d_dst);
- ;
- GPU_OFF;
-
- GPU_FULL_ON;
- d_src.upload(src);
- ocl::pyrUp(d_src, d_dst);
- d_dst.download(dst);
- GPU_FULL_OFF;
- }
- }
-}
\ No newline at end of file
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
+// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
#include "precomp.hpp"
///////////// Merge////////////////////////
-TEST(Merge)
+PERFTEST(Merge)
{
- Mat dst;
+ Mat dst, ocl_dst;
ocl::oclMat d_dst;
int channels = 4;
GPU_ON;
ocl::merge(d_src, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
-
for (int i = 0; i < channels; ++i)
{
- d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i));
+ d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
}
-
ocl::merge(d_src, d_dst);
- d_dst.download(dst);
+ d_dst.download(ocl_dst);
GPU_FULL_OFF;
+
+ TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
}
///////////// Split////////////////////////
-TEST(Split)
+PERFTEST(Split)
{
//int channels = 4;
int all_type[] = {CV_8UC1, CV_32FC1};
Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
- std::vector<cv::Mat> dst;
+ std::vector<cv::Mat> dst, ocl_dst(4);
split(src, dst);
WARMUP_ON;
ocl::split(d_src, d_dst);
- WARMUP_OFF;
+ WARMUP_OFF;
GPU_ON;
ocl::split(d_src, d_dst);
- ;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::split(d_src, d_dst);
+ for(size_t i = 0; i < dst.size(); i++)
+ d_dst[i].download(ocl_dst[i]);
GPU_FULL_OFF;
+
+ vector<double> eps(4, 0.);
+ TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
}
}
//M*/
#include "precomp.hpp"
+#if GTEST_OS_WINDOWS
+#define NOMINMAX
+# include <windows.h>
+#endif
// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files
// All images needed in this test are in samples/gpu folder.
deviation = std::sqrt(sum / gpu_times_.size());
}
- printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
+ printMetrics(is_accurate_, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
num_subtests_called_++;
void TestSystem::printHeading()
{
cout << endl;
- cout << setiosflags(ios_base::left);
- cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
- << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP"
- << "DESCRIPTION\n";
+ cout<< setiosflags(ios_base::left);
+
+#if 0
+ cout<<TAB<<setw(7)<< "Accu." << setw(10) << "CPU (ms)" << setw(10) << "GPU, ms"
+ << setw(8) << "Speedup"<< setw(10)<<"GPUTotal" << setw(10) << "Total"
+ << "Description\n";
+ cout<<TAB<<setw(7)<<""<<setw(10)<<""<<setw(10)<<""<<setw(8)<<""<<setw(10)<<"(ms)"<<setw(10)<<"Speedup\n";
+#endif
+
+ cout<<TAB<< setw(10) << "CPU (ms)" << setw(10) << "GPU, ms"
+ << setw(8) << "Speedup"<< setw(10)<<"GPUTotal" << setw(10) << "Total"
+ << "Description\n";
+ cout<<TAB<<setw(10)<<""<<setw(10)<<""<<setw(8)<<""<<setw(10)<<"(ms)"<<setw(10)<<"Speedup\n";
cout << resetiosflags(ios_base::left);
}
{
recordname_ += "_OCL.csv";
record_ = fopen(recordname_.c_str(), "w");
+ if(record_ == NULL)
+ {
+ cout<<".csv file open failed.\n";
+ exit(0);
+ }
}
- fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
+ fprintf(record_, "NAME,DESCRIPTION,ACCURACY,DIFFERENCE,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
fflush(record_);
}
{
cout << setiosflags(ios_base::fixed);
cout << "\naverage GPU speedup: x"
- << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
- << endl;
+ << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
+ << endl;
cout << "\nGPU exceeded: "
- << setprecision(3) << speedup_faster_count_
- << "\nGPU passed: "
- << setprecision(3) << speedup_equal_count_
- << "\nGPU failed: "
- << setprecision(3) << speedup_slower_count_
- << endl;
+ << setprecision(3) << speedup_faster_count_
+ << "\nGPU passed: "
+ << setprecision(3) << speedup_equal_count_
+ << "\nGPU failed: "
+ << setprecision(3) << speedup_slower_count_
+ << endl;
cout << "\nGPU exceeded rate: "
- << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
- << "%"
- << "\nGPU passed rate: "
- << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
- << "%"
- << "\nGPU failed rate: "
- << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
- << "%"
- << endl;
+ << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
+ << "%"
+ << "\nGPU passed rate: "
+ << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
+ << "%"
+ << "\nGPU failed rate: "
+ << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
+ << "%"
+ << endl;
cout << "\naverage GPUTOTAL speedup: x"
- << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
- << endl;
+ << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
+ << endl;
cout << "\nGPUTOTAL exceeded: "
- << setprecision(3) << speedup_full_faster_count_
- << "\nGPUTOTAL passed: "
- << setprecision(3) << speedup_full_equal_count_
- << "\nGPUTOTAL failed: "
- << setprecision(3) << speedup_full_slower_count_
- << endl;
+ << setprecision(3) << speedup_full_faster_count_
+ << "\nGPUTOTAL passed: "
+ << setprecision(3) << speedup_full_equal_count_
+ << "\nGPUTOTAL failed: "
+ << setprecision(3) << speedup_full_slower_count_
+ << endl;
cout << "\nGPUTOTAL exceeded rate: "
- << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
- << "%"
- << "\nGPUTOTAL passed rate: "
- << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
- << "%"
- << "\nGPUTOTAL failed rate: "
- << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
- << "%"
- << endl;
+ << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
+ << "%"
+ << "\nGPUTOTAL passed rate: "
+ << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
+ << "%"
+ << "\nGPUTOTAL failed rate: "
+ << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
+ << "%"
+ << endl;
cout << resetiosflags(ios_base::fixed);
}
-void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
+enum GTestColor {
+ COLOR_DEFAULT,
+ COLOR_RED,
+ COLOR_GREEN,
+ COLOR_YELLOW
+};
+#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
+// Returns the character attribute for the given color.
+WORD GetColorAttribute(GTestColor color) {
+ switch (color) {
+ case COLOR_RED: return FOREGROUND_RED;
+ case COLOR_GREEN: return FOREGROUND_GREEN;
+ case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
+ default: return 0;
+ }
+}
+#else
+static const char* GetAnsiColorCode(GTestColor color) {
+ switch (color) {
+ case COLOR_RED: return "1";
+ case COLOR_GREEN: return "2";
+ case COLOR_YELLOW: return "3";
+ default: return NULL;
+ };
+}
+#endif
+
+static void printMetricsUti(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, std::stringstream& stream, std::stringstream& cur_subtest_description)
{
- cout << TAB << setiosflags(ios_base::left);
- stringstream stream;
+ //cout <<TAB<< setw(7) << stream.str();
+ cout <<TAB;
+ stream.str("");
stream << cpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << speedup;
- cout << setw(14) << stream.str();
+ cout << setw(8) << stream.str();
stream.str("");
stream << gpu_full_time;
- cout << setw(14) << stream.str();
+ cout << setw(10) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << fullspeedup;
- cout << setw(14) << stream.str();
+ cout << setw(10) << stream.str();
- cout << cur_subtest_description_.str();
+ cout << cur_subtest_description.str();
cout << resetiosflags(ios_base::left) << endl;
}
+void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
+{
+ cout << setiosflags(ios_base::left);
+ stringstream stream;
+
+#if 0
+ if(is_accurate == 1)
+ stream << "Pass";
+ else if(is_accurate_ == 0)
+ stream << "Fail";
+ else if(is_accurate == -1)
+ stream << " ";
+ else
+ {
+ std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
+ exit(-1);
+ }
+#endif
+
+ std::stringstream &cur_subtest_description = getCurSubtestDescription();
+
+#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
+
+ WORD color;
+ const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+ // Gets the current text color.
+ CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+ GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+ const WORD old_color_attrs = buffer_info.wAttributes;
+ // We need to flush the stream buffers into the console before each
+ // SetConsoleTextAttribute call lest it affect the text that is already
+ // printed but has not yet reached the console.
+ fflush(stdout);
+
+ if(is_accurate == 1||is_accurate == -1)
+ {
+ color = old_color_attrs;
+ printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+
+ }else
+ {
+ color = GetColorAttribute(COLOR_RED);
+ SetConsoleTextAttribute(stdout_handle,
+ color| FOREGROUND_INTENSITY);
+
+ printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+ fflush(stdout);
+ // Restores the text color.
+ SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+ }
+#else
+ GTestColor color = COLOR_RED;
+ if(is_accurate == 1|| is_accurate == -1)
+ {
+ printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+
+ }else
+ {
+ printf("\033[0;3%sm", GetAnsiColorCode(color));
+ printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+ printf("\033[m"); // Resets the terminal to default.
+ }
+#endif
+}
+
void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
{
if (!record_)
record_ = fopen(recordname_.c_str(), "w");
}
- fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
- cur_subtest_description_.str().c_str(),
- cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
- gpu_min, gpu_max, std_dev);
+ string _is_accurate_;
+
+ if(is_accurate_ == 1)
+ _is_accurate_ = "Pass";
+ else if(is_accurate_ == 0)
+ _is_accurate_ = "Fail";
+ else if(is_accurate_ == -1)
+ _is_accurate_ = " ";
+ else
+ {
+ std::cout<<"is_accurate errer: "<<is_accurate_<<"\n";
+ exit(-1);
+ }
+
+ fprintf(record_, "%s,%s,%s,%.2f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n",
+ itname_changed_ ? itname_.c_str() : "",
+ cur_subtest_description_.str().c_str(),
+ _is_accurate_.c_str(),
+ accurate_diff_,
+ cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
+ gpu_min, gpu_max, std_dev);
if (itname_changed_)
{
}
fprintf(record_, "\nAverage GPU speedup: %.3f\n"
- "exceeded: %d (%.3f%%)\n"
- "passed: %d (%.3f%%)\n"
- "failed: %d (%.3f%%)\n"
- "\nAverage GPUTOTAL speedup: %.3f\n"
- "exceeded: %d (%.3f%%)\n"
- "passed: %d (%.3f%%)\n"
- "failed: %d (%.3f%%)\n",
- speedup_total_ / std::max(1, num_subtests_called_),
- speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
- speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
- speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
- speedup_full_total_ / std::max(1, num_subtests_called_),
- speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
- speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
- speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
- );
+ "exceeded: %d (%.3f%%)\n"
+ "passed: %d (%.3f%%)\n"
+ "failed: %d (%.3f%%)\n"
+ "\nAverage GPUTOTAL speedup: %.3f\n"
+ "exceeded: %d (%.3f%%)\n"
+ "passed: %d (%.3f%%)\n"
+ "failed: %d (%.3f%%)\n",
+ speedup_total_ / std::max(1, num_subtests_called_),
+ speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
+ speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
+ speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
+ speedup_full_total_ / std::max(1, num_subtests_called_),
+ speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
+ speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
+ speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
+ );
fflush(record_);
}
void TestSystem::printError(const std::string &msg)
{
- if(msg != "CL_INVALID_BUFFER_SIZE")
- {
- cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
- }
+ if(msg != "CL_INVALID_BUFFER_SIZE")
+ {
+ cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
+ }
}
void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
rng.fill(mat, RNG::UNIFORM, low, high);
}
-
string abspath(const string &relpath)
{
return TestSystem::instance().workingDir() + relpath;
int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/,
- const char *err_msg, const char * /*file_name*/,
- int /*line*/, void * /*userdata*/)
+ const char *err_msg, const char * /*file_name*/,
+ int /*line*/, void * /*userdata*/)
{
TestSystem::instance().printError(err_msg);
return 0;
}
+double checkNorm(const Mat &m)
+{
+ return norm(m, NORM_INF);
+}
+
+double checkNorm(const Mat &m1, const Mat &m2)
+{
+ return norm(m1, m2, NORM_INF);
+}
+
+double checkSimilarity(const Mat &m1, const Mat &m2)
+{
+ Mat diff;
+ matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+ return std::abs(diff.at<float>(0, 0) - 1.f);
+}
+
+
+
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/video/video.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/ts/ts.hpp"
+#include "opencv2/ts/ts_perf.hpp"
+#include "opencv2/ts/ts_gtest.h"
+
#define Min_Size 1000
#define Max_Size 4000
using namespace cv;
void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high);
+void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n);
+
string abspath(const string &relpath);
int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *);
typedef struct
void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi,
int sp, int sr, cv::TermCriteria crit);
+
+template<class T1, class T2>
+int ExpectedEQ(T1 expected, T2 actual)
+{
+ if(expected == actual)
+ return 1;
+
+ return 0;
+}
+
+template<class T1>
+int EeceptDoubleEQ(T1 expected, T1 actual)
+{
+ testing::internal::Double lhs(expected);
+ testing::internal::Double rhs(actual);
+
+ if (lhs.AlmostEquals(rhs))
+ {
+ return 1;
+ }
+
+ return 0;
+}
+
+template<class T>
+int AssertEQ(T expected, T actual)
+{
+ if(expected == actual)
+ {
+ return 1;
+ }
+ return 0;
+}
+
+int ExceptDoubleNear(double val1, double val2, double abs_error);
+bool match_rect(cv::Rect r1, cv::Rect r2, int threshold);
+
+double checkNorm(const cv::Mat &m);
+double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
+double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
+
+int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps);
+int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps);
+
class Runnable
{
public:
return cur_iter_idx_ >= cpu_num_iters_;
}
+ int get_cur_iter_idx()
+ {
+ return cur_iter_idx_;
+ }
+
+ int get_cpu_num_iters()
+ {
+ return cpu_num_iters_;
+ }
+
bool warmupStop()
{
return cur_warmup_idx_++ >= gpu_warmup_iters_;
itname_changed_ = true;
}
+ void setAccurate(int accurate, double diff)
+ {
+ is_accurate_ = accurate;
+ accurate_diff_ = diff;
+ }
+
+ void ExpectMatsNear(vector<Mat>& dst, vector<Mat>& cpu_dst, vector<double>& eps)
+ {
+ assert(dst.size() == cpu_dst.size());
+ assert(cpu_dst.size() == eps.size());
+ is_accurate_ = 1;
+ for(size_t i=0; i<dst.size(); i++)
+ {
+ double cur_diff = checkNorm(dst[i], cpu_dst[i]);
+ accurate_diff_ = max(accurate_diff_, cur_diff);
+ if(cur_diff > eps[i])
+ is_accurate_ = 0;
+ }
+ }
+
+ void ExpectedMatNear(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
+ {
+ assert(dst.type() == cpu_dst.type());
+ assert(dst.size() == cpu_dst.size());
+ accurate_diff_ = checkNorm(dst, cpu_dst);
+ if(accurate_diff_ <= eps)
+ is_accurate_ = 1;
+ else
+ is_accurate_ = 0;
+ }
+
+ void ExceptedMatSimilar(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
+ {
+ assert(dst.type() == cpu_dst.type());
+ assert(dst.size() == cpu_dst.size());
+ accurate_diff_ = checkSimilarity(cpu_dst, dst);
+ if(accurate_diff_ <= eps)
+ is_accurate_ = 1;
+ else
+ is_accurate_ = 0;
+ }
+
+ std::stringstream &getCurSubtestDescription()
+ {
+ return cur_subtest_description_;
+ }
+
private:
TestSystem():
cur_subtest_is_empty_(true), cpu_elapsed_(0),
speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false),
num_iters_(10), cpu_num_iters_(2),
gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
- record_(0), recordname_("performance"), itname_changed_(true)
+ record_(0), recordname_("performance"), itname_changed_(true),
+ is_accurate_(-1), accurate_diff_(0.)
{
cpu_times_.reserve(num_iters_);
gpu_times_.reserve(num_iters_);
cur_subtest_description_.str("");
cur_subtest_is_empty_ = true;
cur_iter_idx_ = 0;
+ cur_warmup_idx_ = 0;
cpu_times_.clear();
gpu_times_.clear();
gpu_full_times_.clear();
+ is_accurate_ = -1;
+ accurate_diff_ = 0.;
}
double meanTime(const std::vector<int64> &samples);
void printHeading();
void printSummary();
- void printMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f);
+ void printMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f);
void writeHeading();
void writeSummary();
std::string recordname_;
std::string itname_;
bool itname_changed_;
+
+ int is_accurate_;
+ double accurate_diff_;
};
void name##_init::run()
-#define TEST(name) \
+#define PERFTEST(name) \
struct name##_test: Runnable { \
name##_test(): Runnable(#name) { \
TestSystem::instance().addTest(this); \
while (!TestSystem::instance().stop()) { \
TestSystem::instance().gpuOn()
#define GPU_OFF \
- ocl::finish(); \
+ ocl::finish();\
TestSystem::instance().gpuOff(); \
} TestSystem::instance().gpuComplete()
#define WARMUP_ON \
while (!TestSystem::instance().warmupStop()) {
#define WARMUP_OFF \
- ocl::finish(); \
+ ocl::finish();\
} TestSystem::instance().warmupComplete()
// Jiang Liyuan, jlyuan001.good@163.com
// Rock Li, Rock.Li@amd.com
// Zailong Wu, bullet@yeah.net
+// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
else
arithmetic_run<float>(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
}
+
void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
{
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ float f_scalar = (float)scalar;
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
args.push_back( make_pair( sizeof(cl_double), (void *)&scalar ));
else
{
- float f_scalar = (float)scalar;
args.push_back( make_pair( sizeof(cl_float), (void *)&f_scalar));
}
const char **kernelString = mask.data ? &arithm_add_scalar_mask : &arithm_add_scalar;
arithmetic_scalar( src1, src2, dst, mask, kernelName, kernelString, -1);
}
+void cv::ocl::multiply(double scalar, const oclMat &src, oclMat &dst)
+{
+ string kernelName = "arithm_muls";
+ arithmetic_scalar_run( src, dst, kernelName, &arithm_mul, scalar);
+}
void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst)
{
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
}
}
-template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
+template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
+ const oclMat &mask, oclMat &buf)
{
size_t groupnum = src.clCxt->computeUnits();
CV_Assert(groupnum != 0);
groupnum = groupnum * 2;
int vlen = 8;
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
- Context *clCxt = src.clCxt;
- cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
- *minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
+
+ ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
+
+ cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
+
if (mask.empty())
{
- arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax");
+ arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax");
}
else
{
- arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask");
+ arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask");
}
- T *p = new T[groupnum * vlen * 2];
- memset(p, 0, dbsize);
- openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
- if(minVal != NULL){
+
+ Mat matbuf = Mat(buf);
+ T *p = matbuf.ptr<T>();
+ if(minVal != NULL)
+ {
+ *minVal = std::numeric_limits<double>::max();
for(int i = 0; i < vlen * (int)groupnum; i++)
{
*minVal = *minVal < p[i] ? *minVal : p[i];
}
}
- if(maxVal != NULL){
+ if(maxVal != NULL)
+ {
+ *maxVal = -std::numeric_limits<double>::max();
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
{
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
}
}
- delete[] p;
- openCLFree(dstBuffer);
}
-typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
+typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{
+ oclMat buf;
+ minMax_buf(src, minVal, maxVal, mask, buf);
+}
+void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
+{
CV_Assert(src.oclchannels() == 1);
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
};
minMaxFunc func;
func = functab[src.depth()];
- func(src, minVal, maxVal, mask);
+ func(src, minVal, maxVal, mask, buf);
}
//////////////////////////////////////////////////////////////////////////////
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ T scalar;
if(_scalar != NULL)
{
double scalar1 = *((double *)_scalar);
- T scalar = (T)scalar1;
+ scalar = (T)scalar1;
args.push_back( make_pair( sizeof(T), (void *)&scalar ));
}
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+ float pf = p;
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
{
- float pf = p;
args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
}
else
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
}
- else if (query.cols <= 128)
+ else if (query.cols <= 128 && !is_cpu)
{
matchUnrolledCached<16, 128>(query, train, tempMask, trainIdx, distance, distType);
}
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
}
- else if (query.cols <= 128)
+ else if (query.cols <= 128 && !is_cpu)
{
matchUnrolledCached<16, 128>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
}
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
}
- else if (query.cols <= 128)
+ else if (query.cols <= 128 && !is_cpu)
{
matchUnrolledCached<16, 128>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
}
static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (query.cols <= 64)
{
knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
}
- else if (query.cols <= 128)
+ else if (query.cols <= 128 && !is_cpu)
{
knn_matchUnrolledCached<16, 128>(query, train, mask, trainIdx, distance, distType);
}
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
}
}
- ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
+ ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
{
using namespace ::cv::ocl::canny;
- calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+ oclMat mapBuf = buf.edgeBuf(Rect(0, buf.edgeBuf.rows / 2, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
- edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
+ calcMap_gpu(buf.dx, buf.dy, magBuf, mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
- edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
+ edgesHysteresisLocal_gpu(mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
- getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols);
+ edgesHysteresisGlobal_gpu(mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
+
+ getEdges_gpu(mapBuf, dst, dst.rows, dst.cols);
}
}
buf.create(src.size(), apperture_size);
buf.edgeBuf.setTo(Scalar::all(0));
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+
if (apperture_size == 3)
{
calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
- calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
+ calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
}
else
{
buf.filterDX->apply(src, buf.dx);
buf.filterDY->apply(src, buf.dy);
- calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient);
+ calcMagnitude_gpu(buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
}
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
buf.dy = dy;
buf.create(dx.size(), -1);
buf.edgeBuf.setTo(Scalar::all(0));
- calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
+
+ oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
+
+ calcMagnitude_gpu(buf.dx, buf.dy, magBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- char build_options [15] = "";
- if(L2Grad)
- {
- strcat(build_options, "-D L2GRAD");
- }
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ const char * build_options = L2Grad ? "-D L2GRAD":"";
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
{
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- char build_options [15] = "";
- if(L2Grad)
- {
- strcat(build_options, "-D L2GRAD");
- }
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
+ const char * build_options = L2Grad ? "-D L2GRAD":"";
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
string kernelName = "calcMap";
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
+ openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s",
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
- rectKernel?"-D RECTKERNEL":"",
- s);
+ s, rectKernel?"-D RECTKERNEL":"");
vector< pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
- openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth);
+ const int buffer_size = 100;
+ char opt_buffer [buffer_size] = "";
+ sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y);
+
+ openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer);
}
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor, int borderType)
oclMat gpu_krnl;
int nDivisor;
- normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
+ normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false);
normalizeAnchor(anchor, ksize);
return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy));
args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
- openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH);
+ openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option);
}
Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include <iomanip>
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::ocl;
+
+static bool use_cpu_sorter = true;
+
+namespace cv
+{
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *imgproc_gfft;
+ }
+}
+
+namespace
+{
+enum SortMethod
+{
+ CPU_STL,
+ BITONIC,
+ SELECTION
+};
+
+const int GROUP_SIZE = 256;
+
+template<SortMethod method>
+struct Sorter
+{
+ //typedef EigType;
+};
+
+//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
+template<>
+struct Sorter<CPU_STL>
+{
+ typedef oclMat EigType;
+ static cv::Mutex cs;
+ static Mat mat_eig;
+
+ //prototype
+ static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
+ {
+ float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
+ float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
+ return v1 > v2;
+ }
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ cv::AutoLock lock(cs);
+ //temporarily use STL's sort function
+ Mat mat_corners = corners;
+ mat_eig = eig_tex;
+ std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
+ corners = mat_corners;
+ }
+};
+cv::Mutex Sorter<CPU_STL>::cs;
+cv::Mat Sorter<CPU_STL>::mat_eig;
+
+template<>
+struct Sorter<BITONIC>
+{
+ typedef TextureCL EigType;
+
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ Context * cxt = Context::getContext();
+ size_t globalThreads[3] = {count / 2, 1, 1};
+ size_t localThreads[3] = {GROUP_SIZE, 1, 1};
+
+ // 2^numStages should be equal to count or the output is invalid
+ int numStages = 0;
+ for(int i = count; i > 1; i >>= 1)
+ {
+ ++numStages;
+ }
+ const int argc = 5;
+ std::vector< std::pair<size_t, const void *> > args(argc);
+ std::string kernelname = "sortCorners_bitonicSort";
+ args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
+ args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
+ args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
+ for(int stage = 0; stage < numStages; ++stage)
+ {
+ args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
+ for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
+ {
+ args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+ }
+ }
+ }
+};
+
+template<>
+struct Sorter<SELECTION>
+{
+ typedef TextureCL EigType;
+
+ static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
+ {
+ Context * cxt = Context::getContext();
+
+ size_t globalThreads[3] = {count, 1, 1};
+ size_t localThreads[3] = {GROUP_SIZE, 1, 1};
+
+ std::vector< std::pair<size_t, const void *> > args;
+ //local
+ std::string kernelname = "sortCorners_selectionSortLocal";
+ int lds_size = GROUP_SIZE * sizeof(cl_float2);
+ args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
+ args.push_back( std::make_pair( lds_size, (void*)NULL) );
+
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+
+ //final
+ kernelname = "sortCorners_selectionSortFinal";
+ args.pop_back();
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
+ }
+};
+
+int findCorners_caller(
+ const TextureCL& eig,
+ const float threshold,
+ const oclMat& mask,
+ oclMat& corners,
+ const int max_count)
+{
+ std::vector<int> k;
+ Context * cxt = Context::getContext();
+
+ std::vector< std::pair<size_t, const void*> > args;
+ std::string kernelname = "findCorners";
+
+ const int mask_strip = mask.step / mask.elemSize1();
+
+ oclMat g_counter(1, 1, CV_32SC1);
+ g_counter.setTo(0);
+
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&eig ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip));
+ args.push_back(make_pair( sizeof(cl_float), (void*)&threshold ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols ));
+ args.push_back(make_pair( sizeof(cl_int), (void*)&max_count ));
+ args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
+
+ size_t globalThreads[3] = {eig.cols, eig.rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+
+ const char * opt = mask.empty() ? "" : "-D WITH_MASK";
+ openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt);
+ return std::min(Mat(g_counter).at<int>(0), max_count);
+}
+}//unnamed namespace
+
+void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
+{
+ CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
+ CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
+
+ CV_DbgAssert(support_image2d());
+
+ ensureSizeIsEnough(image.size(), CV_32F, eig_);
+
+ if (useHarrisDetector)
+ cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
+ else
+ cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
+
+ double maxVal = 0;
+ minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_);
+
+ ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
+
+ Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
+ int total = findCorners_caller(
+ *eig_tex,
+ static_cast<float>(maxVal * qualityLevel),
+ mask,
+ tmpCorners_,
+ tmpCorners_.cols);
+
+ if (total == 0)
+ {
+ corners.release();
+ return;
+ }
+ if(use_cpu_sorter)
+ {
+ Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
+ }
+ else
+ {
+ //if total is power of 2
+ if(((total - 1) & (total)) == 0)
+ {
+ Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
+ }
+ else
+ {
+ Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
+ }
+ }
+
+ if (minDistance < 1)
+ {
+ Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
+ tmpCorners_(roi_range).copyTo(corners);
+ }
+ else
+ {
+ vector<Point2f> tmp(total);
+ downloadPoints(tmpCorners_, tmp);
+
+ vector<Point2f> tmp2;
+ tmp2.reserve(total);
+
+ const int cell_size = cvRound(minDistance);
+ const int grid_width = (image.cols + cell_size - 1) / cell_size;
+ const int grid_height = (image.rows + cell_size - 1) / cell_size;
+
+ std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
+
+ for (int i = 0; i < total; ++i)
+ {
+ Point2f p = tmp[i];
+
+ bool good = true;
+
+ int x_cell = static_cast<int>(p.x / cell_size);
+ int y_cell = static_cast<int>(p.y / cell_size);
+
+ int x1 = x_cell - 1;
+ int y1 = y_cell - 1;
+ int x2 = x_cell + 1;
+ int y2 = y_cell + 1;
+
+ // boundary check
+ x1 = std::max(0, x1);
+ y1 = std::max(0, y1);
+ x2 = std::min(grid_width - 1, x2);
+ y2 = std::min(grid_height - 1, y2);
+
+ for (int yy = y1; yy <= y2; yy++)
+ {
+ for (int xx = x1; xx <= x2; xx++)
+ {
+ vector<Point2f>& m = grid[yy * grid_width + xx];
+
+ if (!m.empty())
+ {
+ for(size_t j = 0; j < m.size(); j++)
+ {
+ float dx = p.x - m[j].x;
+ float dy = p.y - m[j].y;
+
+ if (dx * dx + dy * dy < minDistance * minDistance)
+ {
+ good = false;
+ goto break_out;
+ }
+ }
+ }
+ }
+ }
+
+ break_out:
+
+ if(good)
+ {
+ grid[y_cell * grid_width + x_cell].push_back(p);
+
+ tmp2.push_back(p);
+
+ if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
+ break;
+ }
+ }
+
+ corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
+ }
+}
+void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector<Point2f> &points_v)
+{
+ CV_DbgAssert(points.type() == CV_32FC2);
+ points_v.resize(points.cols);
+ openCLSafeCall(clEnqueueReadBuffer(
+ *reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
+ reinterpret_cast<cl_mem>(points.data),
+ CL_TRUE,
+ 0,
+ points.cols * sizeof(Point2f),
+ &points_v[0],
+ 0,
+ NULL,
+ NULL));
+}
+
+
};
typedef struct
{
- //int rows;
- //int ystep;
int width_height;
- //int height;
int grpnumperline_totalgrp;
- //int totalgrp;
int imgoff;
float factor;
} detect_piramid_info;
-
-#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__
+#ifdef WIN32
#define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
-typedef _ALIGNED_ON(128) struct GpuHidHaarFeature
-{
- _ALIGNED_ON(32) struct
- {
- _ALIGNED_ON(4) int p0 ;
- _ALIGNED_ON(4) int p1 ;
- _ALIGNED_ON(4) int p2 ;
- _ALIGNED_ON(4) int p3 ;
- _ALIGNED_ON(4) float weight ;
- }
- /*_ALIGNED_ON(32)*/ rect[CV_HAAR_FEATURE_MAX] ;
-}
-GpuHidHaarFeature;
-
typedef _ALIGNED_ON(128) struct GpuHidHaarTreeNode
{
_ALIGNED_ON(64) int p[CV_HAAR_FEATURE_MAX][4];
- //_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ;
- //_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ;
- //_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ;
- /*_ALIGNED_ON(16)*/
float weight[CV_HAAR_FEATURE_MAX] ;
- /*_ALIGNED_ON(4)*/
float threshold ;
- _ALIGNED_ON(8) float alpha[2] ;
+ _ALIGNED_ON(16) float alpha[3] ;
_ALIGNED_ON(4) int left ;
_ALIGNED_ON(4) int right ;
- // GpuHidHaarFeature feature __attribute__((aligned (128)));
}
GpuHidHaarTreeNode;
typedef _ALIGNED_ON(32) struct GpuHidHaarClassifier
{
_ALIGNED_ON(4) int count;
- //CvHaarFeature* orig_feature;
_ALIGNED_ON(8) GpuHidHaarTreeNode *node ;
_ALIGNED_ON(8) float *alpha ;
}
_ALIGNED_ON(4) int p2 ;
_ALIGNED_ON(4) int p3 ;
_ALIGNED_ON(4) float inv_window_area ;
- // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
} GpuHidHaarClassifierCascade;
#else
#define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
-typedef struct _ALIGNED_ON(128) GpuHidHaarFeature
-{
- struct _ALIGNED_ON(32)
-{
- int p0 _ALIGNED_ON(4);
- int p1 _ALIGNED_ON(4);
- int p2 _ALIGNED_ON(4);
- int p3 _ALIGNED_ON(4);
- float weight _ALIGNED_ON(4);
-}
-rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32);
-}
-GpuHidHaarFeature;
-
-
typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode
{
int p[CV_HAAR_FEATURE_MAX][4] _ALIGNED_ON(64);
float weight[CV_HAAR_FEATURE_MAX];// _ALIGNED_ON(16);
float threshold;// _ALIGNED_ON(4);
- float alpha[2] _ALIGNED_ON(8);
+ float alpha[3] _ALIGNED_ON(16);
int left _ALIGNED_ON(4);
int right _ALIGNED_ON(4);
}
int p2 _ALIGNED_ON(4);
int p3 _ALIGNED_ON(4);
float inv_window_area _ALIGNED_ON(4);
- // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
} GpuHidHaarClassifierCascade;
#endif
const float icv_stage_threshold_bias = 0.0001f;
double globaltime = 0;
-
-// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count )
-// {
-// CvHaarClassifierCascade *cascade = 0;
-
-// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier);
-
-// if( stage_count <= 0 )
-// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" );
-
-// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size );
-// memset( cascade, 0, block_size );
-
-// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1);
-// cascade->flags = CV_HAAR_MAGIC_VAL;
-// cascade->count = stage_count;
-
-// return cascade;
-// }
-
-//static int globalcounter = 0;
-
-// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade )
-// {
-// if( _cascade && *_cascade )
-// {
-// cvFree( _cascade );
-// }
-// }
-
/* create more efficient internal representation of haar classifier cascade */
static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier)
{
hid_stage_classifier->two_rects = 1;
haar_classifier_ptr += stage_classifier->count;
- /*
- hid_stage_classifier->parent = (stage_classifier->parent == -1)
- ? NULL : stage_classifier_ptr + stage_classifier->parent;
- hid_stage_classifier->next = (stage_classifier->next == -1)
- ? NULL : stage_classifier_ptr + stage_classifier->next;
- hid_stage_classifier->child = (stage_classifier->child == -1)
- ? NULL : stage_classifier_ptr + stage_classifier->child;
-
- out->is_tree |= hid_stage_classifier->next != NULL;
- */
-
for( j = 0; j < stage_classifier->count; j++ )
{
CvHaarClassifier *classifier = stage_classifier->classifier + j;
GpuHidHaarClassifier *hid_classifier = hid_stage_classifier->classifier + j;
int node_count = classifier->count;
- // float* alpha_ptr = (float*)(haar_node_ptr + node_count);
float *alpha_ptr = &haar_node_ptr->alpha[0];
hid_classifier->count = node_count;
node->p[2][3] = 0;
node->weight[2] = 0;
}
- // memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) );
else
hid_stage_classifier->two_rects = 0;
- }
-
- memcpy( alpha_ptr, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
- haar_node_ptr = haar_node_ptr + 1;
- // (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*));
- // (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1);
+ memcpy( node->alpha, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
+ haar_node_ptr = haar_node_ptr + 1;
+ }
out->is_stump_based &= node_count == 1;
}
}
#define sum_elem_ptr(sum,row,col) \
- ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
+ ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
#define sqsum_elem_ptr(sqsum,row,col) \
- ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
+ ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
#define calc_sum(rect,offset) \
- ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
+ ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade,
- /* const CvArr* _sum,
- const CvArr* _sqsum,
- const CvArr* _tilted_sum,*/
double scale,
int step)
{
- // CvMat sum_stub, *sum = (CvMat*)_sum;
- // CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
- // CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
GpuHidHaarClassifierCascade *cascade;
int coi0 = 0, coi1 = 0;
int i;
if( scale <= 0 )
CV_Error( CV_StsOutOfRange, "Scale must be positive" );
- // sum = cvGetMat( sum, &sum_stub, &coi0 );
- // sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
-
if( coi0 || coi1 )
CV_Error( CV_BadCOI, "COI is not supported" );
- // if( !CV_ARE_SIZES_EQ( sum, sqsum ))
- // CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
-
- // if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
- // CV_MAT_TYPE(sum->type) != CV_32SC1 )
- // CV_Error( CV_StsUnsupportedFormat,
- // "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
-
if( !_cascade->hid_cascade )
gpuCreateHidHaarClassifierCascade(_cascade, &datasize, &total);
cascade = (GpuHidHaarClassifierCascade *) _cascade->hid_cascade;
stage_classifier = (GpuHidHaarStageClassifier *) (cascade + 1);
- if( cascade->has_tilted_features )
- {
- // tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
-
- // if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
- // CV_Error( CV_StsUnsupportedFormat,
- // "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
-
- // if( sum->step != tilted->step )
- // CV_Error( CV_StsUnmatchedSizes,
- // "Sum and tilted_sum must have the same stride (step, widthStep)" );
-
- // if( !CV_ARE_SIZES_EQ( sum, tilted ))
- // CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
- // cascade->tilted = *tilted;
- }
-
_cascade->scale = scale;
_cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale );
_cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale );
- //cascade->sum = *sum;
- //cascade->sqsum = *sqsum;
-
equRect.x = equRect.y = cvRound(scale);
equRect.width = cvRound((_cascade->orig_window_size.width - 2) * scale);
equRect.height = cvRound((_cascade->orig_window_size.height - 2) * scale);
weight_scale = 1. / (equRect.width * equRect.height);
cascade->inv_window_area = weight_scale;
- // cascade->pq0 = equRect.y * step + equRect.x;
- // cascade->pq1 = equRect.y * step + equRect.x + equRect.width ;
- // cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x;
- // cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ;
-
cascade->pq0 = equRect.x;
cascade->pq1 = equRect.y;
cascade->pq2 = equRect.x + equRect.width;
{
CvHaarFeature *feature =
&_cascade->stage_classifier[i].classifier[j].haar_feature[l];
- /* GpuHidHaarClassifier* classifier =
- cascade->stage_classifier[i].classifier + j; */
- //GpuHidHaarFeature* hidfeature =
- // &cascade->stage_classifier[i].classifier[j].node[l].feature;
GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l];
double sum0 = 0, area0 = 0;
CvRect r[3];
/* align blocks */
for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
{
- //if( !hidfeature->rect[k].p0 )
- // break;
if(!hidnode->p[k][0])
break;
r[k] = feature->rect[k].r;
if( !feature->tilted )
{
- /* hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x;
- hidfeature->rect[k].p1 = tr.y * sum->cols + tr.x + tr.width;
- hidfeature->rect[k].p2 = (tr.y + tr.height) * sum->cols + tr.x;
- hidfeature->rect[k].p3 = (tr.y + tr.height) * sum->cols + tr.x + tr.width;
- */
- /*hidnode->p0[k] = tr.y * step + tr.x;
- hidnode->p1[k] = tr.y * step + tr.x + tr.width;
- hidnode->p2[k] = (tr.y + tr.height) * step + tr.x;
- hidnode->p3[k] = (tr.y + tr.height) * step + tr.x + tr.width;*/
hidnode->p[k][0] = tr.x;
hidnode->p[k][1] = tr.y;
hidnode->p[k][2] = tr.x + tr.width;
}
else
{
- /* hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width;
- hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * tilted->cols + tr.x + tr.width - tr.height;
- hidfeature->rect[k].p0 = tr.y * tilted->cols + tr.x;
- hidfeature->rect[k].p1 = (tr.y + tr.height) * tilted->cols + tr.x - tr.height;
- */
-
hidnode->p[k][2] = (tr.y + tr.width) * step + tr.x + tr.width;
hidnode->p[k][3] = (tr.y + tr.width + tr.height) * step + tr.x + tr.width - tr.height;
hidnode->p[k][0] = tr.y * step + tr.x;
hidnode->p[k][1] = (tr.y + tr.height) * step + tr.x - tr.height;
}
-
- //hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
if( k == 0 )
area0 = tr.width * tr.height;
else
- //sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
sum0 += hidnode->weight[k] * tr.width * tr.height;
}
-
- // hidfeature->rect[0].weight = (float)(-sum0/area0);
hidnode->weight[0] = (float)(-sum0 / area0);
} /* l */
} /* j */
}
}
-static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
- /*double scale=0.0,*/
- /*int step*/)
+static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade)
{
GpuHidHaarClassifierCascade *cascade;
int i;
if(!hidnode->p[k][0])
break;
r[k] = feature->rect[k].r;
- // base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
- // base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
- // base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
- // base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
- }
+ }
nr = k;
for( k = 0; k < nr; k++ )
hidnode->p[k][3] = tr.height;
hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
}
- //hidnode->weight[0]=(float)(-sum0/area0);
} /* l */
} /* j */
}
const double GROUP_EPS = 0.2;
CvSeq *result_seq = 0;
- cv::Ptr<CvMemStorage> temp_storage;
cv::ConcurrentRectVector allCandidates;
std::vector<cv::Rect> rectList;
if( gimg.cols < minSize.width || gimg.rows < minSize.height )
CV_Error(CV_StsError, "Image too small");
+ cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
if( (flags & CV_HAAR_SCALE_IMAGE) )
{
CvSize winSize0 = cascade->orig_window_size;
size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 };
- size_t globalThreads[3] = { grp_per_CU * gsum.clCxt->computeUnits() *localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0],
localThreads[1], 1
};
int outputsz = 256 * globalThreads[0] / localThreads[0];
gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
- cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
openCLSafeCall(clReleaseMemObject(nodebuffer));
openCLSafeCall(clReleaseMemObject(candidatebuffer));
+
}
else
{
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
nodenum * sizeof(GpuHidHaarTreeNode));
- cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL));
args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
size_t globalThreads2[3] = {nodenum, 1, 1};
-
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
}
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
-
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
int blocksize = 8;
int grp_per_CU = 12;
size_t localThreads[3] = { blocksize, blocksize, 1 };
- size_t globalThreads[3] = { grp_per_CU * Context::getContext()->computeUnits() * localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0],
localThreads[1],
1 };
int outputsz = 256 * globalThreads[0] / localThreads[0];
CvHaarClassifierCascade *cascade = oldCascade;
GpuHidHaarClassifierCascade *gcascade;
GpuHidHaarStageClassifier *stage;
- GpuHidHaarClassifier *classifier;
- GpuHidHaarTreeNode *node;
if( CV_MAT_DEPTH(gimg.type()) != CV_8U )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
}
int *candidate;
-
+ cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
if( (flags & CV_HAAR_SCALE_IMAGE) )
{
int indexy = 0;
gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
- classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
- node = (GpuHidHaarTreeNode *)(classifier->node);
-
- gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
-
- cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
- sizeof(GpuHidHaarStageClassifier) * gcascade->count,
- stage, 0, NULL, NULL));
-
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
- m_nodenum * sizeof(GpuHidHaarTreeNode),
- node, 0, NULL, NULL));
int startstage = 0;
int endstage = gcascade->count;
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
candidate = (int *)malloc(4 * sizeof(int) * outputsz);
memset(candidate, 0, 4 * sizeof(int) * outputsz);
+
openCLReadBuffer( gsum.clCxt, ((OclBuffers *)buffers)->candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
for(int i = 0; i < outputsz; i++)
+ {
if(candidate[4 * i + 2] != 0)
+ {
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
candidate[4 * i + 2], candidate[4 * i + 3]));
-
+ }
+ }
free((void *)candidate);
candidate = NULL;
}
{
cv::ocl::integral(gimg, gsum, gsqsum);
- gpuSetHaarClassifierCascade(cascade);
-
gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
- stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
- classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
- node = (GpuHidHaarTreeNode *)(classifier->node);
-
- cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
- m_nodenum * sizeof(GpuHidHaarTreeNode),
- node, 0, NULL, NULL));
-
- cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
- float *correction = (float *)malloc(sizeof(float) * m_loopcount);
- int startstage = 0;
- int endstage = gcascade->count;
- double factor;
- for(int i = 0; i < m_loopcount; i++)
- {
- factor = scalev[i];
- int equRect_x = (int)(factor * gcascade->p0 + 0.5);
- int equRect_y = (int)(factor * gcascade->p1 + 0.5);
- int equRect_w = (int)(factor * gcascade->p3 + 0.5);
- int equRect_h = (int)(factor * gcascade->p2 + 0.5);
- p[i].s[0] = equRect_x;
- p[i].s[1] = equRect_y;
- p[i].s[2] = equRect_x + equRect_w;
- p[i].s[3] = equRect_y + equRect_h;
- correction[i] = 1. / (equRect_w * equRect_h);
- int startnodenum = m_nodenum * i;
- float factor2 = (float)factor;
-
- vector<pair<size_t, const void *> > args1;
- args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
- args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
- args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
- args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
- args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
-
- size_t globalThreads2[3] = {m_nodenum, 1, 1};
-
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
- }
int step = gsum.step / 4;
int startnode = 0;
int splitstage = 3;
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
- openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
+
+ int startstage = 0;
+ int endstage = gcascade->count;
vector<pair<size_t, const void *> > args;
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer ));
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum ));
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
+ const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL);
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
candidate[4 * i + 2], candidate[4 * i + 3]));
}
-
- free(p);
- free(correction);
clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0);
}
-
rectList.resize(allCandidates.size());
if(!allCandidates.empty())
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
const int outputsz, const size_t localThreads[],
CvSize minSize, CvSize maxSize)
{
+ if(initialized)
+ {
+ return; // we only allow one time initialization
+ }
CvHaarClassifierCascade *cascade = oldCascade;
if( !CV_IS_HAAR_CLASSIFIER(cascade) )
int totalclassifier=0;
if( !cascade->hid_cascade )
+ {
gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
+ }
if( maxSize.height == 0 || maxSize.width == 0 )
{
m_minSize = minSize;
m_maxSize = maxSize;
+ // initialize nodes
+ GpuHidHaarClassifierCascade *gcascade;
+ GpuHidHaarStageClassifier *stage;
+ GpuHidHaarClassifier *classifier;
+ GpuHidHaarTreeNode *node;
+ cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
+ if( (flags & CV_HAAR_SCALE_IMAGE) )
+ {
+ gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
+ stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
+ classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
+ node = (GpuHidHaarTreeNode *)(classifier->node);
+
+ gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
+
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
+ sizeof(GpuHidHaarStageClassifier) * gcascade->count,
+ stage, 0, NULL, NULL));
+
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
+ m_nodenum * sizeof(GpuHidHaarTreeNode),
+ node, 0, NULL, NULL));
+ }
+ else
+ {
+ gpuSetHaarClassifierCascade(cascade);
+
+ gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
+ stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
+ classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
+ node = (GpuHidHaarTreeNode *)(classifier->node);
+
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
+ m_nodenum * sizeof(GpuHidHaarTreeNode),
+ node, 0, NULL, NULL));
+
+ cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
+ float *correction = (float *)malloc(sizeof(float) * m_loopcount);
+ double factor;
+ for(int i = 0; i < m_loopcount; i++)
+ {
+ factor = scalev[i];
+ int equRect_x = (int)(factor * gcascade->p0 + 0.5);
+ int equRect_y = (int)(factor * gcascade->p1 + 0.5);
+ int equRect_w = (int)(factor * gcascade->p3 + 0.5);
+ int equRect_h = (int)(factor * gcascade->p2 + 0.5);
+ p[i].s[0] = equRect_x;
+ p[i].s[1] = equRect_y;
+ p[i].s[2] = equRect_x + equRect_w;
+ p[i].s[3] = equRect_y + equRect_h;
+ correction[i] = 1. / (equRect_w * equRect_h);
+ int startnodenum = m_nodenum * i;
+ float factor2 = (float)factor;
+
+ vector<pair<size_t, const void *> > args1;
+ args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->nodebuffer ));
+ args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
+ args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
+ args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
+ args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
+
+ size_t globalThreads2[3] = {m_nodenum, 1, 1};
+
+ openCLExecuteKernel(Context::getContext(), &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
+ }
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
+
+ free(p);
+ free(correction);
+ }
initialized = true;
}
CvSize sz;
CvSize winSize0 = oldCascade->orig_window_size;
detect_piramid_info *scaleinfo;
+ cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
if (flags & CV_HAAR_SCALE_IMAGE)
{
for(factor = 1.f;; factor *= scaleFactor)
((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
}
- openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
+ openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
sizeof(detect_piramid_info)*loopcount,
scaleinfo, 0, NULL, NULL));
free(scaleinfo);
const std::vector<cv::Rect> &rectList,
const std::vector<int> &rweights)
{
- CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) );
+ MemStorage tempStorage(cvCreateMemStorage(0));
+ CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), tempStorage );
if( findBiggestObject && rectList.size() )
{
void cv::ocl::OclCascadeClassifierBuf::release()
{
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
-
- if( (m_flags & CV_HAAR_SCALE_IMAGE) )
- {
- cvFree(&oldCascade->hid_cascade);
- }
- else
+ if(initialized)
{
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
- openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
- }
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
+
+ if( (m_flags & CV_HAAR_SCALE_IMAGE) )
+ {
+ cvFree(&oldCascade->hid_cascade);
+ }
+ else
+ {
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
+ openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
+ }
- free(buffers);
- buffers = NULL;
+ free(buffers);
+ buffers = NULL;
+ initialized = false;
+ }
}
#ifndef _MAX_PATH
#define _MAX_PATH 1024
#endif
-
-
-/****************************************************************************************\
-* Persistence functions *
-\****************************************************************************************/
-
-/* field names */
-
-#define ICV_HAAR_SIZE_NAME "size"
-#define ICV_HAAR_STAGES_NAME "stages"
-#define ICV_HAAR_TREES_NAME "trees"
-#define ICV_HAAR_FEATURE_NAME "feature"
-#define ICV_HAAR_RECTS_NAME "rects"
-#define ICV_HAAR_TILTED_NAME "tilted"
-#define ICV_HAAR_THRESHOLD_NAME "threshold"
-#define ICV_HAAR_LEFT_NODE_NAME "left_node"
-#define ICV_HAAR_LEFT_VAL_NAME "left_val"
-#define ICV_HAAR_RIGHT_NODE_NAME "right_node"
-#define ICV_HAAR_RIGHT_VAL_NAME "right_val"
-#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
-#define ICV_HAAR_PARENT_NAME "parent"
-#define ICV_HAAR_NEXT_NAME "next"
-
-static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */)
-{
- return 1;
-}
-
-namespace cv
-{
-namespace ocl
-{
-
-struct gpuHaarDetectObjects_ScaleImage_Invoker
-{
- gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade,
- int _stripSize, double _factor,
- const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1,
- Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec )
- {
- cascade = _cascade;
- stripSize = _stripSize;
- factor = _factor;
- sum1 = _sum1;
- sqsum1 = _sqsum1;
- norm1 = _norm1;
- mask1 = _mask1;
- equRect = _equRect;
- vec = &_vec;
- }
-
- void operator()( const BlockedRange &range ) const
- {
- Size winSize0 = cascade->orig_window_size;
- Size winSize(cvRound(winSize0.width * factor), cvRound(winSize0.height * factor));
- int y1 = range.begin() * stripSize, y2 = min(range.end() * stripSize, sum1.rows - 1 - winSize0.height);
- Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1);
- int x, y, ystep = factor > 2 ? 1 : 2;
-
- for( y = y1; y < y2; y += ystep )
- for( x = 0; x < ssz.width; x += ystep )
- {
- if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 )
- vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
- winSize.width, winSize.height));
- }
- }
-
- const CvHaarClassifierCascade *cascade;
- int stripSize;
- double factor;
- Mat sum1, sqsum1, *norm1, *mask1;
- Rect equRect;
- ConcurrentRectVector *vec;
-};
-
-
-struct gpuHaarDetectObjects_ScaleCascade_Invoker
-{
- gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade,
- Size _winsize, const Range &_xrange, double _ystep,
- size_t _sumstep, const int **_p, const int **_pq,
- ConcurrentRectVector &_vec )
- {
- cascade = _cascade;
- winsize = _winsize;
- xrange = _xrange;
- ystep = _ystep;
- sumstep = _sumstep;
- p = _p;
- pq = _pq;
- vec = &_vec;
- }
-
- void operator()( const BlockedRange &range ) const
- {
- int iy, startY = range.begin(), endY = range.end();
- const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3];
- const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3];
- bool doCannyPruning = p0 != 0;
- int sstep = (int)(sumstep / sizeof(p0[0]));
-
- for( iy = startY; iy < endY; iy++ )
- {
- int ix, y = cvRound(iy * ystep), ixstep = 1;
- for( ix = xrange.start; ix < xrange.end; ix += ixstep )
- {
- int x = cvRound(ix * ystep); // it should really be ystep, not ixstep
-
- if( doCannyPruning )
- {
- int offset = y * sstep + x;
- int s = p0[offset] - p1[offset] - p2[offset] + p3[offset];
- int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset];
- if( s < 100 || sq < 20 )
- {
- ixstep = 2;
- continue;
- }
- }
-
- int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */);
- if( result > 0 )
- vec->push_back(Rect(x, y, winsize.width, winsize.height));
- ixstep = result != 0 ? 1 : 2;
- }
- }
- }
-
- const CvHaarClassifierCascade *cascade;
- double ystep;
- size_t sumstep;
- Size winsize;
- Range xrange;
- const int **p;
- const int **pq;
- ConcurrentRectVector *vec;
-};
-
-}
-}
size_t globalThreads[3], size_t localThreads[3],
vector< pair<size_t, const void *> > &args)
{
- size_t wave_size = 0;
- queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, source, kernelName);
+ size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
+ openCLSafeCall(clReleaseKernel(kernel));
if (wave_size <= 16)
{
char build_options[64];
// Xu Pang, pangxu010@163.com
// Wu Zailong, bullet@yeah.net
// Wenju He, wenju@multicorewareinc.com
+// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
extern const char *imgproc_calcHarris;
extern const char *imgproc_calcMinEigenVal;
extern const char *imgproc_convolve;
+ extern const char *imgproc_clahe;
////////////////////////////////////OpenCL call wrappers////////////////////////////
template <typename T> struct index_and_sizeof;
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
-
+ float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
vector< pair<size_t, const void *> > args;
if(map1.channels() == 2)
{
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
- float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
-
- if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
+
+ if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
}
}
else
{
- float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
}
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
double k, int borderType)
{
+ oclMat dx, dy;
+ cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType);
+ }
+
+ void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
+ double k, int borderType)
+ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
- oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
- extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
+ extractCovData(src, dx, dy, blockSize, ksize, borderType);
dst.create(src.size(), CV_32F);
- corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), Dx, Dy, dst, borderType);
+ corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
}
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
{
+ oclMat dx, dy;
+ cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType);
+ }
+
+ void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
+ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
}
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
- oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
- extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
+ extractCovData(src, dx, dy, blockSize, ksize, borderType);
dst.create(src.size(), CV_32F);
- corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType);
+ corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
}
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1);
LUT(mat_src, lut, mat_dst);
}
+
+ ////////////////////////////////////////////////////////////////////////
+ // CLAHE
+ namespace clahe
+ {
+ inline int divUp(int total, int grain)
+ {
+ return (total + grain - 1) / grain * grain;
+ }
+
+ static void calcLut(const oclMat &src, oclMat &dst,
+ const int tilesX, const int tilesY, const cv::Size tileSize,
+ const int clipLimit, const float lutScale)
+ {
+ cl_int2 tile_size;
+ tile_size.s[0] = tileSize.width;
+ tile_size.s[1] = tileSize.height;
+
+ std::vector<pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
+ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit ));
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale ));
+
+ String kernelName = "calcLut";
+ size_t localThreads[3] = { 32, 8, 1 };
+ size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+ if (is_cpu)
+ {
+ openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU");
+ }
+ else
+ {
+ cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName);
+ int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+ openCLSafeCall(clReleaseKernel(kernel));
+
+ static char opt[20] = {0};
+ sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
+ openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt);
+ }
+ }
+
+ static void transform(const oclMat &src, oclMat &dst, const oclMat &lut,
+ const int tilesX, const int tilesY, const cv::Size tileSize)
+ {
+ cl_int2 tile_size;
+ tile_size.s[0] = tileSize.width;
+ tile_size.s[1] = tileSize.height;
+
+ std::vector<pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&lut.data ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.step ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows ));
+ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY ));
+
+ String kernelName = "transform";
+ size_t localThreads[3] = { 32, 8, 1 };
+ size_t globalThreads[3] = { divUp(src.cols, localThreads[0]), divUp(src.rows, localThreads[1]), 1 };
+
+ openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1);
+ }
+ }
+
+ namespace
+ {
+ class CLAHE_Impl : public cv::ocl::CLAHE
+ {
+ public:
+ CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
+
+ cv::AlgorithmInfo* info() const;
+
+ void apply(const oclMat &src, oclMat &dst);
+
+ void setClipLimit(double clipLimit);
+ double getClipLimit() const;
+
+ void setTilesGridSize(cv::Size tileGridSize);
+ cv::Size getTilesGridSize() const;
+
+ void collectGarbage();
+
+ private:
+ double clipLimit_;
+ int tilesX_;
+ int tilesY_;
+
+ oclMat srcExt_;
+ oclMat lut_;
+ };
+
+ CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
+ clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
+ {
+ }
+
+ void CLAHE_Impl::apply(const oclMat &src, oclMat &dst)
+ {
+ CV_Assert( src.type() == CV_8UC1 );
+
+ dst.create( src.size(), src.type() );
+
+ const int histSize = 256;
+
+ ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_8UC1, lut_);
+
+ cv::Size tileSize;
+ oclMat srcForLut;
+
+ if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
+ {
+ tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
+ srcForLut = src;
+ }
+ else
+ {
+ cv::ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar());
+
+ tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
+ srcForLut = srcExt_;
+ }
+
+ const int tileSizeTotal = tileSize.area();
+ const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
+
+ int clipLimit = 0;
+ if (clipLimit_ > 0.0)
+ {
+ clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
+ clipLimit = std::max(clipLimit, 1);
+ }
+
+ clahe::calcLut(srcForLut, lut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale);
+ //finish();
+ clahe::transform(src, dst, lut_, tilesX_, tilesY_, tileSize);
+ }
+
+ void CLAHE_Impl::setClipLimit(double clipLimit)
+ {
+ clipLimit_ = clipLimit;
+ }
+
+ double CLAHE_Impl::getClipLimit() const
+ {
+ return clipLimit_;
+ }
+
+ void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
+ {
+ tilesX_ = tileGridSize.width;
+ tilesY_ = tileGridSize.height;
+ }
+
+ cv::Size CLAHE_Impl::getTilesGridSize() const
+ {
+ return cv::Size(tilesX_, tilesY_);
+ }
+
+ void CLAHE_Impl::collectGarbage()
+ {
+ srcExt_.release();
+ lut_.release();
+ }
+ }
+
+ cv::Ptr<cv::ocl::CLAHE> createCLAHE(double clipLimit, cv::Size tileGridSize)
+ {
+ return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
+ }
+
//////////////////////////////////bilateralFilter////////////////////////////////////////////////////
static void
oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d,
oclinfo.push_back(ocltmpinfo);
}
}
+ if(devcienums > 0)
+ {
+ setDevice(oclinfo[0]);
+ }
return devcienums;
}
clFinish(Context::getContext()->impl->clCmdQueue);
}
- void queryDeviceInfo(DEVICE_INFO info_type, void* info)
+ //template specializations of queryDeviceInfo
+ template<>
+ bool queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel)
{
- static Info::Impl* impl = Context::getContext()->impl;
- switch(info_type)
- {
- case WAVEFRONT_SIZE:
- {
- bool is_cpu = false;
- queryDeviceInfo(IS_CPU_DEVICE, &is_cpu);
- if(is_cpu)
- {
- *(int*)info = 1;
- return;
- }
-#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
- try
- {
- openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0],
- CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0));
- }
- catch(const cv::Exception&)
-#elif defined (CL_DEVICE_WARP_SIZE_NV)
- const int EXT_LEN = 4096 + 1 ;
- char extends_set[EXT_LEN];
- size_t extends_size;
- openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size));
- extends_set[EXT_LEN - 1] = 0;
- if(std::string(extends_set).find("cl_nv_device_attribute_query") != std::string::npos)
- {
- openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0],
- CL_DEVICE_WARP_SIZE_NV, sizeof(size_t), info, 0));
- }
- else
-#endif
- {
- // if no way left for us to query the warp size, we can get it from kernel group info
- static const char * _kernel_string = "__kernel void test_func() {}";
- cl_kernel kernel;
- kernel = openCLGetKernelFromSource(Context::getContext(), &_kernel_string, "test_func");
- openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
- CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), info, NULL));
- }
+ Info::Impl* impl = Context::getContext()->impl;
+ cl_device_type devicetype;
+ openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
+ CL_DEVICE_TYPE, sizeof(cl_device_type),
+ &devicetype, NULL));
+ return (devicetype == CVCL_DEVICE_TYPE_CPU);
+ }
- }
- break;
- case IS_CPU_DEVICE:
- {
- cl_device_type devicetype;
- openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
- CL_DEVICE_TYPE, sizeof(cl_device_type),
- &devicetype, NULL));
- *(bool*)info = (devicetype == CVCL_DEVICE_TYPE_CPU);
- }
- break;
- default:
- CV_Error(-1, "Invalid device info type");
- break;
+ template<typename _ty>
+ static _ty queryWavesize(cl_kernel kernel)
+ {
+ size_t info = 0;
+ Info::Impl* impl = Context::getContext()->impl;
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
+ if(is_cpu)
+ {
+ return 1;
}
+ CV_Assert(kernel != NULL);
+ openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
+ CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &info, NULL));
+ return static_cast<_ty>(info);
+ }
+
+ template<>
+ size_t queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel)
+ {
+ return queryWavesize<size_t>(kernel);
+ }
+ template<>
+ int queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel)
+ {
+ return queryWavesize<int>(kernel);
}
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size)
int Context::val = 0;
static Mutex cs;
static volatile int context_tear_down = 0;
+
+ bool initialized()
+ {
+ return *((volatile int*)&Context::val) != 0 &&
+ Context::clCxt->impl->clCmdQueue != NULL&&
+ Context::clCxt->impl->oclcontext != NULL;
+ }
+
Context* Context::getContext()
{
if(*((volatile int*)&val) != 1)
clCxt.reset(new Context);
std::vector<Info> oclinfo;
CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0);
- oclinfo[0].impl->setDevice(0, 0, 0);
- clCxt.get()->impl = oclinfo[0].impl->copy();
*((volatile int*)&val) = 1;
}
Context* cv_ctx = Context::getContext();
if(cv_ctx)
{
- cl_context ctx = (cl_context)&(cv_ctx->impl->oclcontext);
+ cl_context ctx = cv_ctx->impl->oclcontext;
if(ctx)
openCLSafeCall(clReleaseContext(ctx));
}
//
//M*/
-#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include "precomp.hpp"
+#ifdef __GNUC__
+#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402
+#define GCC_DIAG_STR(s) #s
+#define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y)
+# define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x)
+# define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x)
+# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406
+# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \
+GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
+# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop)
+# else
+# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
+# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x))
+# endif
+#else
+# define GCC_DIAG_OFF(x)
+# define GCC_DIAG_ON(x)
+#endif
+#endif /* __GNUC__ */
+
using namespace std;
namespace cv
build_options, finish_mode);
}
+#ifdef __GNUC__
+ GCC_DIAG_OFF(deprecated-declarations)
+#endif
cl_mem bindTexture(const oclMat &mat)
{
cl_mem texture;
format.image_channel_order = CL_RGBA;
break;
default:
- CV_Error(-1, "Image forma is not supported");
+ CV_Error(-1, "Image format is not supported");
break;
}
#ifdef CL_VERSION_1_2
else
#endif
{
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
texture = clCreateImage2D(
(cl_context)mat.clCxt->oclContext(),
CL_MEM_READ_WRITE,
0,
NULL,
&err);
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
}
size_t origin[] = { 0, 0, 0 };
size_t region[] = { mat.cols, mat.rows, 1 };
openCLSafeCall(err);
return texture;
}
+#ifdef __GNUC__
+ GCC_DIAG_ON(deprecated-declarations)
+#endif
+
+ Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
+ {
+ return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
+ }
void releaseTexture(cl_mem& texture)
{
openCLFree(texture);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 3)
+#define dst_align ((dst_offset / 2) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 3)
+#define dst_align ((dst_offset / 2) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
#ifdef dst_align
#undef dst_align
#endif
-#define dst_align ((dst_offset >> 1) & 1)
+#define dst_align ((dst_offset / 2) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
}
#endif
+#ifdef DOUBLE_SUPPORT
+#define SCALAR_TYPE double
+#else
+#define SCALAR_TYPE float
+#endif
+
__kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *dst, int dst_step, int dst_offset,
- int rows, int cols, int dst_step1, float scalar)
+ int rows, int cols, int dst_step1, SCALAR_TYPE scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////Macro for define elements number per thread/////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
-#define ANCHOR 3
-#define ANX 1
-#define ANY 1
+//#define ANCHOR 3
+//#define ANX 1
+//#define ANY 1
#define ROWS_PER_GROUP 4
#define ROWS_PER_GROUP_BITS 2
for(int i = 0; i < ANCHOR; i++)
{
-#pragma unroll 3
+#pragma unroll
for(int j = 0; j < ANCHOR; j++)
{
if(dst_rows_index < dst_rows_end)
for(int i = 0; i < ANCHOR; i++)
{
-#pragma unroll 3
+#pragma unroll
for(int j = 0; j < ANCHOR; j++)
{
if(dst_rows_index < dst_rows_end)
for(int i = 0; i < ANCHOR; i++)
{
-#pragma unroll 3
+#pragma unroll
for(int j = 0; j < ANCHOR; j++)
{
if(dst_rows_index < dst_rows_end)
int gidy = get_global_id(1);
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
- if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3)==0)
+ if(gidx+3<cols && gidy<rows && ((dst_offset_in_pixel&3)==0))
{
*(__global uchar4*)&dst[out_addr] = res;
}
// Wang Weiyan, wangweiyanster@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Nathan, liujun@multicorewareinc.com
+// Peng Xiao, pengxiao@outlook.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
typedef int sumtype;
typedef float sqsumtype;
-typedef struct __attribute__((aligned (128))) GpuHidHaarFeature
-{
- struct __attribute__((aligned (32)))
-{
- int p0 __attribute__((aligned (4)));
- int p1 __attribute__((aligned (4)));
- int p2 __attribute__((aligned (4)));
- int p3 __attribute__((aligned (4)));
- float weight __attribute__((aligned (4)));
-}
-rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32)));
-}
-GpuHidHaarFeature;
-
+#ifndef STUMP_BASED
+#define STUMP_BASED 1
+#endif
typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode
{
int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned (64)));
- float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
- float threshold /*__attribute__((aligned (4)))*/;
- float alpha[2] __attribute__((aligned (8)));
+ float weight[CV_HAAR_FEATURE_MAX];
+ float threshold;
+ float alpha[3] __attribute__((aligned (16)));
int left __attribute__((aligned (4)));
int right __attribute__((aligned (4)));
}
float inv_window_area __attribute__((aligned (4)));
} GpuHidHaarClassifierCascade;
-
__kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(
global GpuHidHaarStageClassifier * stagecascadeptr,
global int4 * info,
float stage_sum = 0.f;
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
float stagethreshold = as_float(stageinfo.y);
- for(int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++ )
+ for(int nodeloop = 0; nodeloop < stageinfo.x; )
{
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter);
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
- float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
+ float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
+
float nodethreshold = w.w * variance_norm_factor;
info1.x +=lcl_off;
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
- stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
+ bool passThres = classsum >= nodethreshold;
+#if STUMP_BASED
+ stage_sum += passThres ? alpha3.y : alpha3.x;
nodecounter++;
+ nodeloop++;
+#else
+ bool isRootNode = (nodecounter & 1) == 0;
+ if(isRootNode)
+ {
+ if( (passThres && currentnodeptr->right) ||
+ (!passThres && currentnodeptr->left))
+ {
+ nodecounter ++;
+ }
+ else
+ {
+ stage_sum += alpha3.x;
+ nodecounter += 2;
+ nodeloop ++;
+ }
+ }
+ else
+ {
+ stage_sum += passThres ? alpha3.z : alpha3.y;
+ nodecounter ++;
+ nodeloop ++;
+ }
+#endif
}
result = (stage_sum >= stagethreshold);
if(lcl_compute_win_id < queuecount)
{
-
int tempnodecounter = lcl_compute_id;
float part_sum = 0.f;
- for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
+ const int stump_factor = STUMP_BASED ? 1 : 2;
+ int root_offset = 0;
+ for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;)
{
- __global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
+ __global GpuHidHaarTreeNode* currentnodeptr =
+ nodeptr + (nodecounter + tempnodecounter) * stump_factor + root_offset;
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
- float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
+ float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor;
info1.x +=queue_pixel;
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
- part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
- tempnodecounter +=lcl_compute_win;
+ bool passThres = classsum >= nodethreshold;
+#if STUMP_BASED
+ part_sum += passThres ? alpha3.y : alpha3.x;
+ tempnodecounter += lcl_compute_win;
+ lcl_loop++;
+#else
+ if(root_offset == 0)
+ {
+ if( (passThres && currentnodeptr->right) ||
+ (!passThres && currentnodeptr->left))
+ {
+ root_offset = 1;
+ }
+ else
+ {
+ part_sum += alpha3.x;
+ tempnodecounter += lcl_compute_win;
+ lcl_loop++;
+ }
+ }
+ else
+ {
+ part_sum += passThres ? alpha3.z : alpha3.y;
+ tempnodecounter += lcl_compute_win;
+ lcl_loop++;
+ root_offset = 0;
+ }
+#endif
}//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
partialsum[lcl_id]=part_sum;
}
}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-/*
-if(stagecascade->two_rects)
-{
- #pragma unroll
- for( n = 0; n < stagecascade->count; n++ )
- {
- t1 = *(node + counter);
- t = t1.threshold * variance_norm_factor;
- classsum = calc_sum1(t1,p_offset,0) * t1.weight[0];
-
- classsum += calc_sum1(t1, p_offset,1) * t1.weight[1];
- stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0];
-
- counter++;
- }
-}
-else
-{
- #pragma unroll
- for( n = 0; n < stagecascade->count; n++ )
- {
- t = node[counter].threshold*variance_norm_factor;
- classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0];
- classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1];
-
- if( node[counter].p0[2] )
- classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2];
-
- stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify
-
- counter++;
- }
-}
-*/
-/*
-__kernel void gpuRunHaarClassifierCascade_ScaleWindow(
- constant GpuHidHaarClassifierCascade * _cascade,
- global GpuHidHaarStageClassifier * stagecascadeptr,
- //global GpuHidHaarClassifier * classifierptr,
- global GpuHidHaarTreeNode * nodeptr,
- global int * sum,
- global float * sqsum,
- global int * _candidate,
- int pixel_step,
- int cols,
- int rows,
- int start_stage,
- int end_stage,
- //int counts,
- int nodenum,
- int ystep,
- int detect_width,
- //int detect_height,
- int loopcount,
- int outputstep)
- //float scalefactor)
-{
-unsigned int x1 = get_global_id(0);
-unsigned int y1 = get_global_id(1);
-int p_offset;
-int m, n;
-int result;
-int counter;
-float mean, variance_norm_factor;
-for(int i=0;i<loopcount;i++)
-{
-constant GpuHidHaarClassifierCascade * cascade = _cascade + i;
-global int * candidate = _candidate + i*outputstep;
-int window_width = cascade->p1 - cascade->p0;
-int window_height = window_width;
-result = 1;
-counter = 0;
-unsigned int x = mul24(x1,ystep);
-unsigned int y = mul24(y1,ystep);
-if((x < cols - window_width - 1) && (y < rows - window_height -1))
-{
-global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage;
-//global GpuHidHaarClassifier *classifier = classifierptr;
-global GpuHidHaarTreeNode *node = nodeptr + nodenum*i;
-
-p_offset = mad24(y, pixel_step, x);// modify
-
-mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) -
- *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3))
- *cascade->inv_window_area;
-
-variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) -
- *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset);
-variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean;
-variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify
-
-// if( cascade->is_stump_based )
-//{
-for( m = start_stage; m < end_stage; m++ )
-{
-float stage_sum = 0.f;
-float t, classsum;
-GpuHidHaarTreeNode t1;
-
-//#pragma unroll
-for( n = 0; n < stagecascade->count; n++ )
-{
- t1 = *(node + counter);
- t = t1.threshold * variance_norm_factor;
- classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1];
-
- if((t1.p0[2]) && (!stagecascade->two_rects))
- classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2];
-
- stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify
- counter++;
-}
-
-if (stage_sum < stagecascade->threshold)
-{
- result = 0;
- break;
-}
-
-stagecascade++;
-
-}
-if(result)
-{
- candidate[4 * (y1 * detect_width + x1)] = x;
- candidate[4 * (y1 * detect_width + x1) + 1] = y;
- candidate[4 * (y1 * detect_width + x1)+2] = window_width;
- candidate[4 * (y1 * detect_width + x1) + 3] = window_height;
-}
-//}
-}
-}
-}
-*/
-
-
-
-
// @Authors
// Wu Xinglong, wxl370@126.com
// Sen Liu, swjtuls1987@126.com
-//
+// Peng Xiao, pengxiao@outlook.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
#define CV_HAAR_FEATURE_MAX 3
typedef int sumtype;
typedef float sqsumtype;
-typedef struct __attribute__((aligned(128))) GpuHidHaarFeature
-{
- struct __attribute__((aligned(32)))
-{
- int p0 __attribute__((aligned(4)));
- int p1 __attribute__((aligned(4)));
- int p2 __attribute__((aligned(4)));
- int p3 __attribute__((aligned(4)));
- float weight __attribute__((aligned(4)));
-}
-rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32)));
-}
-GpuHidHaarFeature;
+
typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode
{
int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned(64)));
float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
float threshold /*__attribute__((aligned (4)))*/;
- float alpha[2] __attribute__((aligned(8)));
+ float alpha[3] __attribute__((aligned(16)));
int left __attribute__((aligned(4)));
int right __attribute__((aligned(4)));
}
const int p_offset = mad24(y, step, x);
cascadeinfo.x += p_offset;
cascadeinfo.z += p_offset;
- mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
- sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
+ mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
+ - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
+ sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
+ + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
* correction_t;
- variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
- sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
+ variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
+ - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
+ sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
+ + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
variance_norm_factor = variance_norm_factor * correction_t - mean * mean;
variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f;
bool result = true;
nodecounter = startnode + nodecount * scalei;
-
for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
{
float stage_sum = 0.f;
int stagecount = stagecascadeptr[stageloop].count;
- for (int nodeloop = 0; nodeloop < stagecount; nodeloop++)
+ for (int nodeloop = 0; nodeloop < stagecount;)
{
__global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
- float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0]));
+ float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor;
+
info1.x += p_offset;
info1.z += p_offset;
info2.x += p_offset;
info2.z += p_offset;
- float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
- sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
- classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
- sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
info3.x += p_offset;
info3.z += p_offset;
- classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
- sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
- stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
+ float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)]
+ - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
+ sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)]
+ + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
+ classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)]
+ - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
+ sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)]
+ + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
+ classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)]
+ - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
+ sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
+ + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
+
+ bool passThres = classsum >= nodethreshold;
+
+#if STUMP_BASED
+ stage_sum += passThres ? alpha3.y : alpha3.x;
nodecounter++;
+ nodeloop++;
+#else
+ bool isRootNode = (nodecounter & 1) == 0;
+ if(isRootNode)
+ {
+ if( (passThres && currentnodeptr->right) ||
+ (!passThres && currentnodeptr->left))
+ {
+ nodecounter ++;
+ }
+ else
+ {
+ stage_sum += alpha3.x;
+ nodecounter += 2;
+ nodeloop ++;
+ }
+ }
+ else
+ {
+ stage_sum += (passThres ? alpha3.z : alpha3.y);
+ nodecounter ++;
+ nodeloop ++;
+ }
+#endif
}
- result = (bool)(stage_sum >= stagecascadeptr[stageloop].threshold);
+ result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold);
}
barrier(CLK_LOCAL_MEM_FENCE);
int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex] = (y << 16) | x;
}
-
barrier(CLK_LOCAL_MEM_FENCE);
int queuecount = lclcount[0];
newnode[counter].threshold = t1.threshold;
newnode[counter].alpha[0] = t1.alpha[0];
newnode[counter].alpha[1] = t1.alpha[1];
+ newnode[counter].alpha[2] = t1.alpha[2];
}
data[2][i] = dy_data[i] * dy_data[i];
}
#else
- for(int i=0; i < ksY+1; i++)
- {
+ int clamped_col = min(dst_cols, col);
+ for(int i=0; i < ksY+1; i++)
+ {
int dx_selected_row;
int dx_selected_col;
dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows);
dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row);
- dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols);
- dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col);
+ dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols);
+ dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col);
dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
int dy_selected_row;
int dy_selected_col;
dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows);
dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row);
- dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols);
- dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col);
+ dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols);
+ dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col);
dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
data[0][i] = dx_data[i] * dx_data[i];
data[1][i] = dx_data[i] * dy_data[i];
data[2][i] = dy_data[i] * dy_data[i];
- }
+ }
#endif
float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
for(int i=1; i < ksY; i++)
data[2][i] = dy_data[i] * dy_data[i];
}
#else
- for(int i=0; i < ksY+1; i++)
- {
+ int clamped_col = min(dst_cols, col);
+
+ for(int i=0; i < ksY+1; i++)
+ {
int dx_selected_row;
int dx_selected_col;
dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows);
dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row);
- dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols);
- dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col);
+ dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols);
+ dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col);
dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
int dy_selected_row;
int dy_selected_col;
dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows);
dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row);
- dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols);
- dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col);
+ dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols);
+ dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col);
dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
data[0][i] = dx_data[i] * dx_data[i];
data[1][i] = dx_data[i] * dy_data[i];
data[2][i] = dy_data[i] * dy_data[i];
- }
+ }
#endif
float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
for(int i=1; i < ksY; i++)
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ mag += mag_offset;
+ map += map_offset;
+
__local float smem[18][18];
int gidx = get_global_id(0);
(
__global int * map,
__global ushort2 * st,
- volatile __global unsigned int * counter,
+ __global unsigned int * counter,
int rows,
int cols,
int map_step,
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ map += map_offset;
+
__local int smem[18][18];
int gidx = get_global_id(0);
if(ly < 14)
{
smem[ly][lx] =
- map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset];
+ map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
}
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
{
smem[ly + 14][lx] =
- map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset];
+ map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
}
barrier(CLK_LOCAL_MEM_FENCE);
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
+
#define stack_size 512
__kernel
-void edgesHysteresisGlobal
+void
+__attribute__((reqd_work_group_size(128,1,1)))
+edgesHysteresisGlobal
(
__global int * map,
__global ushort2 * st1,
__global ushort2 * st2,
- volatile __global int * counter,
+ __global int * counter,
int rows,
int cols,
int count,
map_step /= sizeof(*map);
map_offset /= sizeof(*map);
+ map += map_offset;
+
int gidx = get_global_id(0);
int gidy = get_global_id(1);
int grp_idx = get_group_id(0);
int grp_idy = get_group_id(1);
- volatile __local unsigned int s_counter;
+ __local unsigned int s_counter;
__local unsigned int s_ind;
__local ushort2 s_st[stack_size];
pos.x += c_dx[lidx & 7];
pos.y += c_dy[lidx & 7];
- if (map[pos.x + map_offset + pos.y * map_step] == 1)
+ if (map[pos.x + pos.y * map_step] == 1)
{
- map[pos.x + map_offset + pos.y * map_step] = 2;
+ map[pos.x + pos.y * map_step] = 2;
ind = atomic_inc(&s_counter);
if(gidy < rows && gidx < cols)
{
- dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1));
+ dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
}
}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Sen Liu, swjtuls1987@126.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef WAVE_SIZE
+#define WAVE_SIZE 1
+#endif
+
+int calc_lut(__local int* smem, int val, int tid)
+{
+ smem[tid] = val;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid == 0)
+ {
+ for (int i = 1; i < 256; ++i)
+ {
+ smem[i] += smem[i - 1];
+ }
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ return smem[tid];
+}
+
+#ifdef CPU
+void reduce(volatile __local int* smem, int val, int tid)
+{
+ smem[tid] = val;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 128)
+ {
+ smem[tid] = val += smem[tid + 128];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 64)
+ {
+ smem[tid] = val += smem[tid + 64];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 32)
+ {
+ smem[tid] += smem[tid + 32];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 16)
+ {
+ smem[tid] += smem[tid + 16];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 8)
+ {
+ smem[tid] += smem[tid + 8];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 4)
+ {
+ smem[tid] += smem[tid + 4];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 2)
+ {
+ smem[tid] += smem[tid + 2];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 1)
+ {
+ smem[256] = smem[tid] + smem[tid + 1];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+}
+#else
+void reduce(__local volatile int* smem, int val, int tid)
+{
+ smem[tid] = val;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 128)
+ {
+ smem[tid] = val += smem[tid + 128];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 64)
+ {
+ smem[tid] = val += smem[tid + 64];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (tid < 32)
+ {
+ smem[tid] += smem[tid + 32];
+#if WAVE_SIZE < 32
+ } barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 16) {
+#endif
+ smem[tid] += smem[tid + 16];
+#if WAVE_SIZE < 16
+ } barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 8) {
+#endif
+ smem[tid] += smem[tid + 8];
+ smem[tid] += smem[tid + 4];
+ smem[tid] += smem[tid + 2];
+ smem[tid] += smem[tid + 1];
+ }
+}
+#endif
+
+__kernel void calcLut(__global __const uchar * src, __global uchar * lut,
+ const int srcStep, const int dstStep,
+ const int2 tileSize, const int tilesX,
+ const int clipLimit, const float lutScale)
+{
+ __local int smem[512];
+
+ const int tx = get_group_id(0);
+ const int ty = get_group_id(1);
+ const unsigned int tid = get_local_id(1) * get_local_size(0)
+ + get_local_id(0);
+
+ smem[tid] = 0;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
+ {
+ __global const uchar* srcPtr = src + mad24( ty * tileSize.y + i,
+ srcStep, tx * tileSize.x );
+ for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
+ {
+ const int data = srcPtr[j];
+ atomic_inc(&smem[data]);
+ }
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int tHistVal = smem[tid];
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (clipLimit > 0)
+ {
+ // clip histogram bar
+
+ int clipped = 0;
+ if (tHistVal > clipLimit)
+ {
+ clipped = tHistVal - clipLimit;
+ tHistVal = clipLimit;
+ }
+
+ // find number of overall clipped samples
+
+ reduce(smem, clipped, tid);
+ barrier(CLK_LOCAL_MEM_FENCE);
+#ifdef CPU
+ clipped = smem[256];
+#else
+ clipped = smem[0];
+#endif
+
+ // broadcast evaluated value
+
+ __local int totalClipped;
+
+ if (tid == 0)
+ totalClipped = clipped;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ // redistribute clipped samples evenly
+
+ int redistBatch = totalClipped / 256;
+ tHistVal += redistBatch;
+
+ int residual = totalClipped - redistBatch * 256;
+ if (tid < residual)
+ ++tHistVal;
+ }
+
+ const int lutVal = calc_lut(smem, tHistVal, tid);
+ uint ires = (uint)convert_int_rte(lutScale * lutVal);
+ lut[(ty * tilesX + tx) * dstStep + tid] =
+ convert_uchar(clamp(ires, (uint)0, (uint)255));
+}
+
+__kernel void transform(__global __const uchar * src,
+ __global uchar * dst,
+ __global uchar * lut,
+ const int srcStep, const int dstStep, const int lutStep,
+ const int cols, const int rows,
+ const int2 tileSize,
+ const int tilesX, const int tilesY)
+{
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+
+ if (x >= cols || y >= rows)
+ return;
+
+ const float tyf = (convert_float(y) / tileSize.y) - 0.5f;
+ int ty1 = convert_int_rtn(tyf);
+ int ty2 = ty1 + 1;
+ const float ya = tyf - ty1;
+ ty1 = max(ty1, 0);
+ ty2 = min(ty2, tilesY - 1);
+
+ const float txf = (convert_float(x) / tileSize.x) - 0.5f;
+ int tx1 = convert_int_rtn(txf);
+ int tx2 = tx1 + 1;
+ const float xa = txf - tx1;
+ tx1 = max(tx1, 0);
+ tx2 = min(tx2, tilesX - 1);
+
+ const int srcVal = src[mad24(y, srcStep, x)];
+
+ float res = 0;
+
+ res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (1.0f - ya));
+ res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (1.0f - ya));
+ res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (ya));
+ res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (ya));
+
+ uint ires = (uint)convert_int_rte(res);
+ dst[mad24(y, dstStep, x)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
+}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Peng Xiao, pengxiao@outlook.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef WITH_MASK
+#define WITH_MASK 0
+#endif
+
+__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
+{
+ return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
+}
+
+inline float ELEM_FLT2(image2d_t _eig, float2 pt)
+{
+ return read_imagef(_eig, sampler, pt).x;
+}
+
+__kernel
+ void findCorners
+ (
+ image2d_t eig,
+ __global const char * mask,
+ __global float2 * corners,
+ const int mask_strip,// in pixels
+ const float threshold,
+ const int rows,
+ const int cols,
+ const int max_count,
+ __global int * g_counter
+ )
+{
+ const int j = get_global_id(0);
+ const int i = get_global_id(1);
+
+ if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
+#if WITH_MASK
+ && mask[i * mask_strip + j] != 0
+#endif
+ )
+ {
+ const float val = ELEM_INT2(eig, j, i);
+
+ if (val > threshold)
+ {
+ float maxVal = val;
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
+
+ maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
+ maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
+
+ if (val == maxVal)
+ {
+ const int ind = atomic_inc(g_counter);
+
+ if (ind < max_count)
+ corners[ind] = (float2)(j, i);
+ }
+ }
+ }
+}
+
+//bitonic sort
+__kernel
+ void sortCorners_bitonicSort
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count,
+ const int stage,
+ const int passOfStage
+ )
+{
+ const int threadId = get_global_id(0);
+ if(threadId >= count / 2)
+ {
+ return;
+ }
+
+ const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
+
+ const int pairDistance = 1 << (stage - passOfStage);
+ const int blockWidth = 2 * pairDistance;
+
+ const int leftId = min( (threadId % pairDistance)
+ + (threadId / pairDistance) * blockWidth, count );
+
+ const int rightId = min( leftId + pairDistance, count );
+
+ const float2 leftPt = corners[leftId];
+ const float2 rightPt = corners[rightId];
+
+ const float leftVal = ELEM_FLT2(eig, leftPt);
+ const float rightVal = ELEM_FLT2(eig, rightPt);
+
+ const bool compareResult = leftVal > rightVal;
+
+ float2 greater = compareResult ? leftPt:rightPt;
+ float2 lesser = compareResult ? rightPt:leftPt;
+
+ corners[leftId] = sortOrder ? lesser : greater;
+ corners[rightId] = sortOrder ? greater : lesser;
+}
+
+//selection sort for gfft
+//kernel is ported from Bolt library:
+//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
+// Local sort will firstly sort elements of each workgroup using selection sort
+// its performance is O(n)
+__kernel
+ void sortCorners_selectionSortLocal
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count,
+ __local float2 * scratch
+ )
+{
+ int i = get_local_id(0); // index in workgroup
+ int numOfGroups = get_num_groups(0); // index in workgroup
+ int groupID = get_group_id(0);
+ int wg = get_local_size(0); // workgroup size = block size
+ int n; // number of elements to be processed for this work group
+
+ int offset = groupID * wg;
+ int same = 0;
+ corners += offset;
+ n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
+ float2 pt1, pt2;
+
+ pt1 = corners[min(i, n)];
+ scratch[i] = pt1;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if(i >= n)
+ {
+ return;
+ }
+
+ float val1 = ELEM_FLT2(eig, pt1);
+ float val2;
+
+ int pos = 0;
+ for (int j=0;j<n;++j)
+ {
+ pt2 = scratch[j];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val2 > val1)
+ pos++;//calculate the rank of this element in this work group
+ else
+ {
+ if(val1 > val2)
+ continue;
+ else
+ {
+ // val1 and val2 are same
+ same++;
+ }
+ }
+ }
+ for (int j=0; j< same; j++)
+ corners[pos + j] = pt1;
+}
+__kernel
+ void sortCorners_selectionSortFinal
+ (
+ image2d_t eig,
+ __global float2 * corners,
+ const int count
+ )
+{
+ const int i = get_local_id(0); // index in workgroup
+ const int numOfGroups = get_num_groups(0); // index in workgroup
+ const int groupID = get_group_id(0);
+ const int wg = get_local_size(0); // workgroup size = block size
+ int pos = 0, same = 0;
+ const int offset = get_group_id(0) * wg;
+ const int remainder = count - wg*(numOfGroups-1);
+
+ if((offset + i ) >= count)
+ return;
+ float2 pt1, pt2;
+ pt1 = corners[groupID*wg + i];
+
+ float val1 = ELEM_FLT2(eig, pt1);
+ float val2;
+
+ for(int j=0; j<numOfGroups-1; j++ )
+ {
+ for(int k=0; k<wg; k++)
+ {
+ pt2 = corners[j*wg + k];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val1 > val2)
+ break;
+ else
+ {
+ //Increment only if the value is not the same.
+ if( val2 > val1 )
+ pos++;
+ else
+ same++;
+ }
+ }
+ }
+
+ for(int k=0; k<remainder; k++)
+ {
+ pt2 = corners[(numOfGroups-1)*wg + k];
+ val2 = ELEM_FLT2(eig, pt2);
+ if(val1 > val2)
+ break;
+ else
+ {
+ //Don't increment if the value is the same.
+ //Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
+ if(val2 > val1)
+ pos++;
+ else
+ same++;
+ }
+ }
+ for (int j=0; j< same; j++)
+ corners[pos + j] = pt1;
+}
+
int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3);
float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart);
int4 con = dpos >= 0 && dpos < dst_cols;
- ddata = convert_float4(con) != 0 ? ddata : dVal;
+ ddata = convert_float4(con) != (float4)(0) ? ddata : dVal;
if(dstart < dst_cols)
{
*(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
// Zhang Chunpeng chunpeng@multicorewareinc.com
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang, yao@multicorewareinc.com
+// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//#pragma OPENCL EXTENSION cl_amd_printf : enable
-uchar get_valid_uchar(uchar data)
+uchar get_valid_uchar(float data)
{
return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0);
}
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][tidx];
if ((x < dstCols) && (y < dstRows))
- dst[x + y * dstStep] = (float)(4.0f * sum);
+ dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum);
}
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)];
if ((x < dstCols) && (y < dstRows))
- dst[x + y * dstStep] = (float)(4.0f * sum);
+ dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum);
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_8UC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
-float4 covert_uchar4_to_float4(uchar4 data)
-{
- float4 f4Data = {0,0,0,0};
-
- f4Data.x = (float)data.x;
- f4Data.y = (float)data.y;
- f4Data.z = (float)data.z;
- f4Data.w = (float)data.w;
-
- return f4Data;
-}
-
-
-uchar4 convert_float4_to_uchar4(float4 data)
-{
- uchar4 u4Data;
-
- u4Data.x = get_valid_uchar(data.x);
- u4Data.y = get_valid_uchar(data.y);
- u4Data.z = get_valid_uchar(data.z);
- u4Data.w = get_valid_uchar(data.w);
-
- return u4Data;
-}
-
__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
- s_srcPatch[tidy][tidx] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]);
+ s_srcPatch[tidy][tidx] = convert_float4(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float4 sum = (float4)(0,0,0,0);
- const int evenFlag = (int)((tidx & 1) == 0);
- const int oddFlag = (int)((tidx & 1) != 0);
+ const float4 evenFlag = (float4)((tidx & 1) == 0);
+ const float4 oddFlag = (float4)((tidx & 1) != 0);
const bool eveny = ((tidy & 1) == 0);
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
if ((x < dstCols) && (y < dstRows))
{
- dst[x + y * dstStep] = convert_float4_to_uchar4(4.0f * sum);
+ dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum);
}
}
+
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
-float4 covert_ushort4_to_float4(ushort4 data)
-{
- float4 f4Data = {0,0,0,0};
-
- f4Data.x = (float)data.x;
- f4Data.y = (float)data.y;
- f4Data.z = (float)data.z;
- f4Data.w = (float)data.w;
-
- return f4Data;
-}
-
-
-ushort4 convert_float4_to_ushort4(float4 data)
-{
- ushort4 u4Data;
-
- u4Data.x = (float)data.x;
- u4Data.y = (float)data.y;
- u4Data.z = (float)data.z;
- u4Data.w = (float)data.w;
-
- return u4Data;
-}
-
-
__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
int srcRows,int dstRows,int srcCols,int dstCols,
int srcOffset,int dstOffset,int srcStep,int dstStep)
srcy = abs(srcy);
srcy = min(srcRows -1 ,srcy);
- s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]);
+ s_srcPatch[get_local_id(1)][get_local_id(0)] = convert_float4(src[srcx + srcy * srcStep]);
}
barrier(CLK_LOCAL_MEM_FENCE);
float4 sum = (float4)(0,0,0,0);
- const int evenFlag = (int)((get_local_id(0) & 1) == 0);
- const int oddFlag = (int)((get_local_id(0) & 1) != 0);
+ const float4 evenFlag = (float4)((get_local_id(0) & 1) == 0);
+ const float4 oddFlag = (float4)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
if (eveny)
{
- sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
- sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
+ sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx - 2) >> 1)];
+ sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1 ) * s_srcPatch[0][1 + ((tidx ) >> 1)];
- sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
- sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
+ sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)];
+ sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum;
if ((x < dstCols) && (y < dstRows))
{
- dst[x + y * dstStep] = convert_float4_to_ushort4(4.0f * sum);
+ dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum);
}
}
float4 sum = (float4)(0,0,0,0);
- const int evenFlag = (int)((tidx & 1) == 0);
- const int oddFlag = (int)((tidx & 1) != 0);
+ const float4 evenFlag = (float4)((tidx & 1) == 0);
+ const float4 oddFlag = (float4)((tidx & 1) != 0);
const bool eveny = ((tidy & 1) == 0);
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
if (eveny)
{
- sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
- sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
+ sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
+ sum = sum + (oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)];
- sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
- sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
+ sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
+ sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[tidy][tidx] = sum;
{
dst[x + y * dstStep] = 4.0f * sum;
}
-}
\ No newline at end of file
+}
//#pragma OPENCL EXTENSION cl_amd_printf : enable
-__kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
-{
- const int x = get_global_id(0);
- const int y = get_global_id(1);
-
- if (y < rows && x < cols * cn)
- {
- const uchar src_val0 = (src + (y > 0 ? y-1 : rows > 1 ? 1 : 0) * srcStep)[x];
- const uchar src_val1 = (src + y * srcStep)[x];
- const uchar src_val2 = (src + (y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0) * srcStep)[x];
-
- ((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
- ((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
- }
-}
-
-__kernel void calcSharrDeriv_vertical_C4_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
-{
- const int x = get_global_id(0);
- const int y = get_global_id(1);
-
- if (y < rows && x < cols * cn)
- {
- const uchar src_val0 = (src + (y > 0 ? y - 1 : 1) * srcStep)[x];
- const uchar src_val1 = (src + y * srcStep)[x];
- const uchar src_val2 = (src + (y < rows - 1 ? y + 1 : rows - 2) * srcStep)[x];
-
- ((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
- ((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
- }
-}
-
-__kernel void calcSharrDeriv_horizontal_C1_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
-{
- const int x = get_global_id(0);
- const int y = get_global_id(1);
-
- const int colsn = cols * cn;
-
- if (y < rows && x < colsn)
- {
- __global const short* dx_buf_row = dx_buf + y * dx_bufStep;
- __global const short* dy_buf_row = dy_buf + y * dy_bufStep;
-
- const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
- const int xl = x - cn >= 0 ? x - cn : cn + x;
-
- ((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
- ((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
- }
-}
-
-__kernel void calcSharrDeriv_horizontal_C4_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
-{
- const int x = get_global_id(0);
- const int y = get_global_id(1);
-
- const int colsn = cols * cn;
-
- if (y < rows && x < colsn)
- {
- __global const short* dx_buf_row = dx_buf + y * dx_bufStep;
- __global const short* dy_buf_row = dy_buf + y * dy_bufStep;
-
- const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
- const int xl = x - cn >= 0 ? x - cn : cn + x;
-
- ((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
- ((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
- }
-}
-
-#define W_BITS 14
-#define W_BITS1 14
-
-#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
-
-int linearFilter_uchar(__global const uchar* src, int srcStep, int cn, float2 pt, int x, int y)
-{
- int2 ipt;
- ipt.x = convert_int_sat_rtn(pt.x);
- ipt.y = convert_int_sat_rtn(pt.y);
-
- float a = pt.x - ipt.x;
- float b = pt.y - ipt.y;
-
- int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
- int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
- int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
- int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-
- __global const uchar* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
- __global const uchar* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
-
- return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1 - 5);
-}
-
-int linearFilter_short(__global const short* src, int srcStep, int cn, float2 pt, int x, int y)
-{
- int2 ipt;
- ipt.x = convert_int_sat_rtn(pt.x);
- ipt.y = convert_int_sat_rtn(pt.y);
-
- float a = pt.x - ipt.x;
- float b = pt.y - ipt.y;
-
- int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
- int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
- int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
- int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-
- __global const short* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
- __global const short* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
-
- return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1);
-}
-
-float linearFilter_float(__global const float* src, int srcStep, int cn, float2 pt, float x, float y)
-{
- int2 ipt;
- ipt.x = convert_int_sat_rtn(pt.x);
- ipt.y = convert_int_sat_rtn(pt.y);
-
- float a = pt.x - ipt.x;
- float b = pt.y - ipt.y;
-
- float iw00 = ((1.0f - a) * (1.0f - b) * (1 << W_BITS));
- float iw01 = (a * (1.0f - b) * (1 << W_BITS));
- float iw10 = ((1.0f - a) * b * (1 << W_BITS));
- float iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-
- __global const float* src_row = src + (int)(ipt.y + y) * srcStep / 4 + ipt.x * cn;
- __global const float* src_row1 = src + (int)(ipt.y + y + 1) * srcStep / 4 + ipt.x * cn;
-
- return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5;
-}
-
#define BUFFER 64
-
+#ifndef WAVE_SIZE
+#define WAVE_SIZE 1
+#endif
#ifdef CPU
void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
{
smem3[tid] = val3;
barrier(CLK_LOCAL_MEM_FENCE);
-#if BUFFER > 128
- if (tid < 128)
- {
- smem1[tid] = val1 += smem1[tid + 128];
- smem2[tid] = val2 += smem2[tid + 128];
- smem3[tid] = val3 += smem3[tid + 128];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if BUFFER > 64
- if (tid < 64)
- {
- smem1[tid] = val1 += smem1[tid + 64];
- smem2[tid] = val2 += smem2[tid + 64];
- smem3[tid] = val3 += smem3[tid + 64];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
if (tid < 32)
{
- smem1[tid] = val1 += smem1[tid + 32];
- smem2[tid] = val2 += smem2[tid + 32];
- smem3[tid] = val3 += smem3[tid + 32];
+ smem1[tid] += smem1[tid + 32];
+ smem2[tid] += smem2[tid + 32];
+ smem3[tid] += smem3[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
- smem1[tid] = val1 += smem1[tid + 16];
- smem2[tid] = val2 += smem2[tid + 16];
- smem3[tid] = val3 += smem3[tid + 16];
+ smem1[tid] += smem1[tid + 16];
+ smem2[tid] += smem2[tid + 16];
+ smem3[tid] += smem3[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
- smem1[tid] = val1 += smem1[tid + 8];
- smem2[tid] = val2 += smem2[tid + 8];
- smem3[tid] = val3 += smem3[tid + 8];
+ smem1[tid] += smem1[tid + 8];
+ smem2[tid] += smem2[tid + 8];
+ smem3[tid] += smem3[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
- smem1[tid] = val1 += smem1[tid + 4];
- smem2[tid] = val2 += smem2[tid + 4];
- smem3[tid] = val3 += smem3[tid + 4];
+ smem1[tid] += smem1[tid + 4];
+ smem2[tid] += smem2[tid + 4];
+ smem3[tid] += smem3[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
- smem1[tid] = val1 += smem1[tid + 2];
- smem2[tid] = val2 += smem2[tid + 2];
- smem3[tid] = val3 += smem3[tid + 2];
+ smem1[tid] += smem1[tid + 2];
+ smem2[tid] += smem2[tid + 2];
+ smem3[tid] += smem3[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
- smem1[BUFFER] = val1 += smem1[tid + 1];
- smem2[BUFFER] = val2 += smem2[tid + 1];
- smem3[BUFFER] = val3 += smem3[tid + 1];
+ smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
+ smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
+ smem3[BUFFER] = smem3[tid] + smem3[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
smem2[tid] = val2;
barrier(CLK_LOCAL_MEM_FENCE);
-#if BUFFER > 128
- if (tid < 128)
- {
- smem1[tid] = (val1 += smem1[tid + 128]);
- smem2[tid] = (val2 += smem2[tid + 128]);
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if BUFFER > 64
- if (tid < 64)
- {
- smem1[tid] = (val1 += smem1[tid + 64]);
- smem2[tid] = (val2 += smem2[tid + 64]);
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
if (tid < 32)
{
- smem1[tid] = (val1 += smem1[tid + 32]);
- smem2[tid] = (val2 += smem2[tid + 32]);
+ smem1[tid] += smem1[tid + 32];
+ smem2[tid] += smem2[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
- smem1[tid] = (val1 += smem1[tid + 16]);
- smem2[tid] = (val2 += smem2[tid + 16]);
+ smem1[tid] += smem1[tid + 16];
+ smem2[tid] += smem2[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
- smem1[tid] = (val1 += smem1[tid + 8]);
- smem2[tid] = (val2 += smem2[tid + 8]);
+ smem1[tid] += smem1[tid + 8];
+ smem2[tid] += smem2[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
- smem1[tid] = (val1 += smem1[tid + 4]);
- smem2[tid] = (val2 += smem2[tid + 4]);
+ smem1[tid] += smem1[tid + 4];
+ smem2[tid] += smem2[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
- smem1[tid] = (val1 += smem1[tid + 2]);
- smem2[tid] = (val2 += smem2[tid + 2]);
+ smem1[tid] += smem1[tid + 2];
+ smem2[tid] += smem2[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
- smem1[BUFFER] = (val1 += smem1[tid + 1]);
- smem2[BUFFER] = (val2 += smem2[tid + 1]);
+ smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
+ smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
smem1[tid] = val1;
barrier(CLK_LOCAL_MEM_FENCE);
-#if BUFFER > 128
- if (tid < 128)
- {
- smem1[tid] = (val1 += smem1[tid + 128]);
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if BUFFER > 64
- if (tid < 64)
- {
- smem1[tid] = (val1 += smem1[tid + 64]);
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
if (tid < 32)
{
- smem1[tid] = (val1 += smem1[tid + 32]);
+ smem1[tid] += smem1[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
- smem1[tid] = (val1 += smem1[tid + 16]);
+ smem1[tid] += smem1[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
- smem1[tid] = (val1 += smem1[tid + 8]);
+ smem1[tid] += smem1[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
- smem1[tid] = (val1 += smem1[tid + 4]);
+ smem1[tid] += smem1[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
- smem1[tid] = (val1 += smem1[tid + 2]);
+ smem1[tid] += smem1[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
- smem1[BUFFER] = (val1 += smem1[tid + 1]);
+ smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
#else
-void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
+void reduce3(float val1, float val2, float val3,
+__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
smem3[tid] = val3;
barrier(CLK_LOCAL_MEM_FENCE);
-#if BUFFER > 128
- if (tid < 128)
+ if (tid < 32)
{
- smem1[tid] = val1 += smem1[tid + 128];
- smem2[tid] = val2 += smem2[tid + 128];
- smem3[tid] = val3 += smem3[tid + 128];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
+ smem1[tid] += smem1[tid + 32];
+ smem2[tid] += smem2[tid + 32];
+ smem3[tid] += smem3[tid + 32];
+#if WAVE_SIZE < 32
+ } barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 16) {
#endif
-
-#if BUFFER > 64
- if (tid < 64)
- {
- smem1[tid] = val1 += smem1[tid + 64];
- smem2[tid] = val2 += smem2[tid + 64];
- smem3[tid] = val3 += smem3[tid + 64];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
+ smem1[tid] += smem1[tid + 16];
+ smem2[tid] += smem2[tid + 16];
+ smem3[tid] += smem3[tid + 16];
+#if WAVE_SIZE <16
+ } barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 8) {
#endif
+ smem1[tid] += smem1[tid + 8];
+ smem2[tid] += smem2[tid + 8];
+ smem3[tid] += smem3[tid + 8];
- if (tid < 32)
- {
- volatile __local float* vmem1 = smem1;
- volatile __local float* vmem2 = smem2;
- volatile __local float* vmem3 = smem3;
-
- vmem1[tid] = val1 += vmem1[tid + 32];
- vmem2[tid] = val2 += vmem2[tid + 32];
- vmem3[tid] = val3 += vmem3[tid + 32];
-
- vmem1[tid] = val1 += vmem1[tid + 16];
- vmem2[tid] = val2 += vmem2[tid + 16];
- vmem3[tid] = val3 += vmem3[tid + 16];
-
- vmem1[tid] = val1 += vmem1[tid + 8];
- vmem2[tid] = val2 += vmem2[tid + 8];
- vmem3[tid] = val3 += vmem3[tid + 8];
+ smem1[tid] += smem1[tid + 4];
+ smem2[tid] += smem2[tid + 4];
+ smem3[tid] += smem3[tid + 4];
- vmem1[tid] = val1 += vmem1[tid + 4];
- vmem2[tid] = val2 += vmem2[tid + 4];
- vmem3[tid] = val3 += vmem3[tid + 4];
+ smem1[tid] += smem1[tid + 2];
+ smem2[tid] += smem2[tid + 2];
+ smem3[tid] += smem3[tid + 2];
- vmem1[tid] = val1 += vmem1[tid + 2];
- vmem2[tid] = val2 += vmem2[tid + 2];
- vmem3[tid] = val3 += vmem3[tid + 2];
-
- vmem1[tid] = val1 += vmem1[tid + 1];
- vmem2[tid] = val2 += vmem2[tid + 1];
- vmem3[tid] = val3 += vmem3[tid + 1];
+ smem1[tid] += smem1[tid + 1];
+ smem2[tid] += smem2[tid + 1];
+ smem3[tid] += smem3[tid + 1];
}
}
-void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
+void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
{
smem1[tid] = val1;
smem2[tid] = val2;
barrier(CLK_LOCAL_MEM_FENCE);
-#if BUFFER > 128
- if (tid < 128)
+ if (tid < 32)
{
- smem1[tid] = val1 += smem1[tid + 128];
- smem2[tid] = val2 += smem2[tid + 128];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
+ smem1[tid] += smem1[tid + 32];
+ smem2[tid] += smem2[tid + 32];
+#if WAVE_SIZE < 32
+ } barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 16) {
#endif
-
-#if BUFFER > 64
- if (tid < 64)
- {
- smem1[tid] = val1 += smem1[tid + 64];
- smem2[tid] = val2 += smem2[tid + 64];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
+ smem1[tid] += smem1[tid + 16];
+ smem2[tid] += smem2[tid + 16];
+#if WAVE_SIZE <16
+ } barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 8) {
#endif
+ smem1[tid] += smem1[tid + 8];
+ smem2[tid] += smem2[tid + 8];
- if (tid < 32)
- {
- volatile __local float* vmem1 = smem1;
- volatile __local float* vmem2 = smem2;
-
- vmem1[tid] = val1 += vmem1[tid + 32];
- vmem2[tid] = val2 += vmem2[tid + 32];
-
- vmem1[tid] = val1 += vmem1[tid + 16];
- vmem2[tid] = val2 += vmem2[tid + 16];
+ smem1[tid] += smem1[tid + 4];
+ smem2[tid] += smem2[tid + 4];
- vmem1[tid] = val1 += vmem1[tid + 8];
- vmem2[tid] = val2 += vmem2[tid + 8];
+ smem1[tid] += smem1[tid + 2];
+ smem2[tid] += smem2[tid + 2];
- vmem1[tid] = val1 += vmem1[tid + 4];
- vmem2[tid] = val2 += vmem2[tid + 4];
-
- vmem1[tid] = val1 += vmem1[tid + 2];
- vmem2[tid] = val2 += vmem2[tid + 2];
-
- vmem1[tid] = val1 += vmem1[tid + 1];
- vmem2[tid] = val2 += vmem2[tid + 1];
+ smem1[tid] += smem1[tid + 1];
+ smem2[tid] += smem2[tid + 1];
}
}
-void reduce1(float val1, __local float* smem1, int tid)
+void reduce1(float val1, __local volatile float* smem1, int tid)
{
smem1[tid] = val1;
barrier(CLK_LOCAL_MEM_FENCE);
-#if BUFFER > 128
- if (tid < 128)
+ if (tid < 32)
{
- smem1[tid] = val1 += smem1[tid + 128];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
+ smem1[tid] += smem1[tid + 32];
+#if WAVE_SIZE < 32
+ } barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 16) {
#endif
-
-#if BUFFER > 64
- if (tid < 64)
- {
- smem1[tid] = val1 += smem1[tid + 64];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
+ smem1[tid] += smem1[tid + 16];
+#if WAVE_SIZE <16
+ } barrier(CLK_LOCAL_MEM_FENCE);
+ if (tid < 8) {
#endif
-
- if (tid < 32)
- {
- volatile __local float* vmem1 = smem1;
-
- vmem1[tid] = val1 += vmem1[tid + 32];
- vmem1[tid] = val1 += vmem1[tid + 16];
- vmem1[tid] = val1 += vmem1[tid + 8];
- vmem1[tid] = val1 += vmem1[tid + 4];
- vmem1[tid] = val1 += vmem1[tid + 2];
- vmem1[tid] = val1 += vmem1[tid + 1];
+ smem1[tid] += smem1[tid + 8];
+ smem1[tid] += smem1[tid + 4];
+ smem1[tid] += smem1[tid + 2];
+ smem1[tid] += smem1[tid + 1];
}
}
#endif
#define SCALE (1.0f / (1 << 20))
#define THRESHOLD 0.01f
-#define DIMENSION 21
// Image read mode
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Jin Ma jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
+__global float* dx, __global float* dy, int dx_step)
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ if((x < src_col)&&(y < src_row))
+ {
+ int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
+ int src_x2 = (x - 1) > 0 ? (x -1) : 0;
+
+ //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
+ //{
+ // printf("y = %d\n", y);
+ // printf("src_x1 = %d\n", src_x1);
+ // printf("src_x2 = %d\n", src_x2);
+ //}
+ dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
+
+ int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
+ int src_y2 = (y - 1) > 0 ? (y - 1) : 0;
+ dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]);
+ }
+
+}
+
+float bicubicCoeff(float x_)
+{
+
+ float x = fabs(x_);
+ if (x <= 1.0f)
+ {
+ return x * x * (1.5f * x - 2.5f) + 1.0f;
+ }
+ else if (x < 2.0f)
+ {
+ return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
+ }
+ else
+ {
+ return 0.0f;
+ }
+
+}
+
+__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
+ image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
+ __global const float* u1, int u1_step,
+ __global const float* u2,
+ __global float* I1w,
+ __global float* I1wx, /*int I1wx_step,*/
+ __global float* I1wy, /*int I1wy_step,*/
+ __global float* grad, /*int grad_step,*/
+ __global float* rho,
+ int I1w_step,
+ int u2_step,
+ int u1_offset_x,
+ int u1_offset_y,
+ int u2_offset_x,
+ int u2_offset_y)
+{
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+
+ if(x < I0_col&&y < I0_row)
+ {
+ //const float u1Val = u1(y, x);
+ const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+ //const float u2Val = u2(y, x);
+ const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+
+ const float wx = x + u1Val;
+ const float wy = y + u2Val;
+
+ const int xmin = ceil(wx - 2.0f);
+ const int xmax = floor(wx + 2.0f);
+
+ const int ymin = ceil(wy - 2.0f);
+ const int ymax = floor(wy + 2.0f);
+
+ float sum = 0.0f;
+ float sumx = 0.0f;
+ float sumy = 0.0f;
+ float wsum = 0.0f;
+ sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
+
+ for (int cy = ymin; cy <= ymax; ++cy)
+ {
+ for (int cx = xmin; cx <= xmax; ++cx)
+ {
+ const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+
+ //sum += w * tex2D(tex_I1 , cx, cy);
+ int2 cood = (int2)(cx, cy);
+ sum += w * read_imagef(tex_I1, sampleri, cood).x;
+ //sumx += w * tex2D(tex_I1x, cx, cy);
+ sumx += w * read_imagef(tex_I1x, sampleri, cood).x;
+ //sumy += w * tex2D(tex_I1y, cx, cy);
+ sumy += w * read_imagef(tex_I1y, sampleri, cood).x;
+
+ wsum += w;
+ }
+ }
+
+ const float coeff = 1.0f / wsum;
+
+ const float I1wVal = sum * coeff;
+ const float I1wxVal = sumx * coeff;
+ const float I1wyVal = sumy * coeff;
+
+ I1w[y * I1w_step + x] = I1wVal;
+ I1wx[y * I1w_step + x] = I1wxVal;
+ I1wy[y * I1w_step + x] = I1wyVal;
+
+ const float Ix2 = I1wxVal * I1wxVal;
+ const float Iy2 = I1wyVal * I1wyVal;
+
+ // store the |Grad(I1)|^2
+ grad[y * I1w_step + x] = Ix2 + Iy2;
+
+ // compute the constant part of the rho function
+ const float I0Val = I0[y * I0_step + x];
+ rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
+ }
+
+}
+
+float readImage(__global const float *image, const int x, const int y, const int rows, const int cols, const int elemCntPerRow)
+{
+ int i0 = clamp(x, 0, cols - 1);
+ int j0 = clamp(y, 0, rows - 1);
+ int i1 = clamp(x + 1, 0, cols - 1);
+ int j1 = clamp(y + 1, 0, rows - 1);
+
+ return image[j0 * elemCntPerRow + i0];
+}
+
+__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,
+ __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
+ __global const float* u1, int u1_step,
+ __global const float* u2,
+ __global float* I1w,
+ __global float* I1wx, /*int I1wx_step,*/
+ __global float* I1wy, /*int I1wy_step,*/
+ __global float* grad, /*int grad_step,*/
+ __global float* rho,
+ int I1w_step,
+ int u2_step,
+ int I1_step,
+ int I1x_step)
+{
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+
+ if(x < I0_col&&y < I0_row)
+ {
+ //const float u1Val = u1(y, x);
+ const float u1Val = u1[y * u1_step + x];
+ //const float u2Val = u2(y, x);
+ const float u2Val = u2[y * u2_step + x];
+
+ const float wx = x + u1Val;
+ const float wy = y + u2Val;
+
+ const int xmin = ceil(wx - 2.0f);
+ const int xmax = floor(wx + 2.0f);
+
+ const int ymin = ceil(wy - 2.0f);
+ const int ymax = floor(wy + 2.0f);
+
+ float sum = 0.0f;
+ float sumx = 0.0f;
+ float sumy = 0.0f;
+ float wsum = 0.0f;
+
+ for (int cy = ymin; cy <= ymax; ++cy)
+ {
+ for (int cx = xmin; cx <= xmax; ++cx)
+ {
+ const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+
+ int2 cood = (int2)(cx, cy);
+ sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
+ sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);
+ sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);
+ wsum += w;
+ }
+ }
+
+ const float coeff = 1.0f / wsum;
+
+ const float I1wVal = sum * coeff;
+ const float I1wxVal = sumx * coeff;
+ const float I1wyVal = sumy * coeff;
+
+ I1w[y * I1w_step + x] = I1wVal;
+ I1wx[y * I1w_step + x] = I1wxVal;
+ I1wy[y * I1w_step + x] = I1wyVal;
+
+ const float Ix2 = I1wxVal * I1wxVal;
+ const float Iy2 = I1wyVal * I1wyVal;
+
+ // store the |Grad(I1)|^2
+ grad[y * I1w_step + x] = Ix2 + Iy2;
+
+ // compute the constant part of the rho function
+ const float I0Val = I0[y * I0_step + x];
+ rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
+ }
+
+}
+
+
+__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
+ __global const float* u2,
+ __global float* p11, int p11_step,
+ __global float* p12,
+ __global float* p21,
+ __global float* p22,
+ const float taut,
+ int u2_step,
+ int u1_offset_x,
+ int u1_offset_y,
+ int u2_offset_x,
+ int u2_offset_y)
+{
+
+ //const int x = blockIdx.x * blockDim.x + threadIdx.x;
+ //const int y = blockIdx.y * blockDim.y + threadIdx.y;
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+
+ if(x < u1_col && y < u1_row)
+ {
+ int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
+ const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+
+ int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
+ const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+
+ int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
+ const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+
+ int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
+ const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+
+ const float g1 = hypot(u1x, u1y);
+ const float g2 = hypot(u2x, u2y);
+
+ const float ng1 = 1.0f + taut * g1;
+ const float ng2 = 1.0f + taut * g2;
+
+ p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;
+ p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;
+ p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2;
+ p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2;
+ }
+
+}
+
+float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
+{
+
+ if (x > 0 && y > 0)
+ {
+ const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
+ const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
+ return v1x + v2y;
+ }
+ else
+ {
+ if (y > 0)
+ return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0];
+ else
+ {
+ if (x > 0)
+ return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x];
+ else
+ return v1[0 * v1_step + 0] + v2[0 * v2_step + 0];
+ }
+ }
+
+}
+
+__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,
+ __global const float* I1wy, /*int I1wy_step,*/
+ __global const float* grad, /*int grad_step,*/
+ __global const float* rho_c, /*int rho_c_step,*/
+ __global const float* p11, /*int p11_step,*/
+ __global const float* p12, /*int p12_step,*/
+ __global const float* p21, /*int p21_step,*/
+ __global const float* p22, /*int p22_step,*/
+ __global float* u1, int u1_step,
+ __global float* u2,
+ __global float* error, const float l_t, const float theta, int u2_step,
+ int u1_offset_x,
+ int u1_offset_y,
+ int u2_offset_x,
+ int u2_offset_y)
+{
+
+ //const int x = blockIdx.x * blockDim.x + threadIdx.x;
+ //const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+
+ if(x < I1wx_col && y < I1wx_row)
+ {
+ const float I1wxVal = I1wx[y * I1wx_step + x];
+ const float I1wyVal = I1wy[y * I1wx_step + x];
+ const float gradVal = grad[y * I1wx_step + x];
+ const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+ const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+
+ const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
+
+ // estimate the values of the variable (v1, v2) (thresholding operator TH)
+
+ float d1 = 0.0f;
+ float d2 = 0.0f;
+
+ if (rho < -l_t * gradVal)
+ {
+ d1 = l_t * I1wxVal;
+ d2 = l_t * I1wyVal;
+ }
+ else if (rho > l_t * gradVal)
+ {
+ d1 = -l_t * I1wxVal;
+ d2 = -l_t * I1wyVal;
+ }
+ else if (gradVal > 1.192092896e-07f)
+ {
+ const float fi = -rho / gradVal;
+ d1 = fi * I1wxVal;
+ d2 = fi * I1wyVal;
+ }
+
+ const float v1 = u1OldVal + d1;
+ const float v2 = u2OldVal + d2;
+
+ // compute the divergence of the dual variable (p1, p2)
+
+ const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
+ const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
+
+ // estimate the values of the optical flow (u1, u2)
+
+ const float u1NewVal = v1 + theta * div_p1;
+ const float u2NewVal = v2 + theta * div_p2;
+
+ u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
+ u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
+
+ const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+ const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+ error[y * I1wx_step + x] = n1 + n2;
+ }
+
+}
#if defined (HAVE_OPENCL)
+#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include "opencv2/ocl/private/util.hpp"
#include "safe_call.hpp"
// Third party copyrights are property of their respective owners.
//
// @Authors
-// Dachuan Zhao, dachuan@multicorewareinc.com
-// Yao Wang, bitwangyaoyao@gmail.com
+// Dachuan Zhao, dachuan@multicorewareinc.com
+// Yao Wang, bitwangyaoyao@gmail.com
// Nathan, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
{
namespace ocl
{
-///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *pyrlk;
extern const char *pyrlk_no_image;
-extern const char *arithm_mul;
}
}
-
struct dim3
{
unsigned int x, y, z;
};
-struct float2
-{
- float x, y;
-};
-
-struct int2
-{
- int x, y;
-};
-
-namespace
-{
-void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
+static void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
{
winSize.width *= cn;
block.z = patch.z = 1;
}
-}
-
-static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar)
-{
- if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
- {
- CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
- return;
- }
-
- CV_Assert(src1.cols == dst.cols &&
- src1.rows == dst.rows);
-
- CV_Assert(src1.type() == dst.type());
- CV_Assert(src1.depth() != CV_8S);
-
- Context *clCxt = src1.clCxt;
-
- size_t localThreads[3] = { 16, 16, 1 };
- size_t globalThreads[3] = { src1.cols,
- src1.rows,
- 1
- };
-
- int dst_step1 = dst.cols * dst.elemSize();
- vector<pair<size_t , const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- args.push_back( make_pair( sizeof(float), (float *)&scalar ));
-
- openCLExecuteKernel(clCxt, &arithm_mul, "arithm_muls", globalThreads, localThreads, args, -1, src1.depth());
-}
static void lkSparse_run(oclMat &I, oclMat &J,
const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 };
size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
int cn = I.oclchannels();
- char calcErr;
- if (level == 0)
- {
- calcErr = 1;
- }
- else
- {
- calcErr = 0;
- }
+ char calcErr = level==0?1:0;
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
- bool is_cpu;
- queryDeviceInfo(IS_CPU_DEVICE, &is_cpu);
+ bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
if (is_cpu)
{
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
{
if(isImageSupported)
{
- openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
+ stringstream idxStr;
+ idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
+ cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
+ int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
+ openCLSafeCall(clReleaseKernel(kernel));
+
+ static char opt[32] = {0};
+ sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
+
+ openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
+ args, I.oclchannels(), I.depth(), opt);
releaseTexture(ITex);
releaseTexture(JTex);
}
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
oclMat temp2 = nextPts.reshape(1);
- multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
- //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
+ multiply(1.0f/(1<<maxLevel)/2.0f, temp1, temp2);
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
status.setTo(Scalar::all(1));
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
// build the image pyramids.
-
prevPyr_.resize(maxLevel + 1);
nextPyr_.resize(maxLevel + 1);
}
// dI/dx ~ Ix, dI/dy ~ Iy
-
for (int level = maxLevel; level >= 0; level--)
{
lkSparse_run(prevPyr_[level], nextPyr_[level],
#define __OPENCV_OPENCL_SAFE_CALL_HPP__
#if defined __APPLE__
-#include <OpenCL/OpenCL.h>
+#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+// Jin Ma, jin@multicorewareinc.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other oclMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#include "precomp.hpp"
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+namespace cv
+{
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char* tvl1flow;
+ }
+}
+
+cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL()
+{
+ tau = 0.25;
+ lambda = 0.15;
+ theta = 0.3;
+ nscales = 5;
+ warps = 5;
+ epsilon = 0.01;
+ iterations = 300;
+ useInitialFlow = false;
+}
+
+void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy)
+{
+ CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
+ CV_Assert( I0.size() == I1.size() );
+ CV_Assert( I0.type() == I1.type() );
+ CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) );
+ CV_Assert( nscales > 0 );
+
+ // allocate memory for the pyramid structure
+ I0s.resize(nscales);
+ I1s.resize(nscales);
+ u1s.resize(nscales);
+ u2s.resize(nscales);
+ //I0s_step == I1s_step
+ I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
+ I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
+
+
+ if (!useInitialFlow)
+ {
+ flowx.create(I0.size(), CV_32FC1);
+ flowy.create(I0.size(), CV_32FC1);
+ }
+ //u1s_step != u2s_step
+ u1s[0] = flowx;
+ u2s[0] = flowy;
+
+ I1x_buf.create(I0.size(), CV_32FC1);
+ I1y_buf.create(I0.size(), CV_32FC1);
+
+ I1w_buf.create(I0.size(), CV_32FC1);
+ I1wx_buf.create(I0.size(), CV_32FC1);
+ I1wy_buf.create(I0.size(), CV_32FC1);
+
+ grad_buf.create(I0.size(), CV_32FC1);
+ rho_c_buf.create(I0.size(), CV_32FC1);
+
+ p11_buf.create(I0.size(), CV_32FC1);
+ p12_buf.create(I0.size(), CV_32FC1);
+ p21_buf.create(I0.size(), CV_32FC1);
+ p22_buf.create(I0.size(), CV_32FC1);
+
+ diff_buf.create(I0.size(), CV_32FC1);
+
+ // create the scales
+ for (int s = 1; s < nscales; ++s)
+ {
+ ocl::pyrDown(I0s[s - 1], I0s[s]);
+ ocl::pyrDown(I1s[s - 1], I1s[s]);
+
+ if (I0s[s].cols < 16 || I0s[s].rows < 16)
+ {
+ nscales = s;
+ break;
+ }
+
+ if (useInitialFlow)
+ {
+ ocl::pyrDown(u1s[s - 1], u1s[s]);
+ ocl::pyrDown(u2s[s - 1], u2s[s]);
+
+ //ocl::multiply(u1s[s], Scalar::all(0.5), u1s[s]);
+ multiply(0.5, u1s[s], u1s[s]);
+ //ocl::multiply(u2s[s], Scalar::all(0.5), u2s[s]);
+ multiply(0.5, u1s[s], u2s[s]);
+ }
+ }
+
+ // pyramidal structure for computing the optical flow
+ for (int s = nscales - 1; s >= 0; --s)
+ {
+ // compute the optical flow at the current scale
+ procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]);
+
+ // if this was the last scale, finish now
+ if (s == 0)
+ break;
+
+ // otherwise, upsample the optical flow
+
+ // zoom the optical flow for the next finer scale
+ ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
+ ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
+
+ // scale the optical flow with the appropriate zoom factor
+ multiply(2, u1s[s - 1], u1s[s - 1]);
+ multiply(2, u2s[s - 1], u2s[s - 1]);
+
+ }
+
+}
+
+namespace ocl_tvl1flow
+{
+ void centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy);
+
+ void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y,
+ oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy,
+ oclMat &grad, oclMat &rho);
+
+ void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
+ oclMat &rho_c, oclMat &p11, oclMat &p12,
+ oclMat &p21, oclMat &p22, oclMat &u1,
+ oclMat &u2, oclMat &error, float l_t, float theta);
+
+ void estimateDualVariables(oclMat &u1, oclMat &u2,
+ oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut);
+}
+
+void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2)
+{
+ using namespace ocl_tvl1flow;
+
+ const double scaledEpsilon = epsilon * epsilon * I0.size().area();
+
+ CV_DbgAssert( I1.size() == I0.size() );
+ CV_DbgAssert( I1.type() == I0.type() );
+ CV_DbgAssert( u1.empty() || u1.size() == I0.size() );
+ CV_DbgAssert( u2.size() == u1.size() );
+
+ if (u1.empty())
+ {
+ u1.create(I0.size(), CV_32FC1);
+ u1.setTo(Scalar::all(0));
+
+ u2.create(I0.size(), CV_32FC1);
+ u2.setTo(Scalar::all(0));
+ }
+
+ oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows));
+
+ centeredGradient(I1, I1x, I1y);
+
+ oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
+
+ oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
+
+ oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
+ oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
+ p11.setTo(Scalar::all(0));
+ p12.setTo(Scalar::all(0));
+ p21.setTo(Scalar::all(0));
+ p22.setTo(Scalar::all(0));
+
+ oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));
+
+ const float l_t = static_cast<float>(lambda * theta);
+ const float taut = static_cast<float>(tau / theta);
+
+ for (int warpings = 0; warpings < warps; ++warpings)
+ {
+ warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
+
+ double error = numeric_limits<double>::max();
+ for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
+ {
+ estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
+ u1, u2, diff, l_t, static_cast<float>(theta));
+
+ error = ocl::sum(diff)[0];
+
+ estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
+
+ }
+ }
+
+}
+
+void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage()
+{
+ I0s.clear();
+ I1s.clear();
+ u1s.clear();
+ u2s.clear();
+
+ I1x_buf.release();
+ I1y_buf.release();
+
+ I1w_buf.release();
+ I1wx_buf.release();
+ I1wy_buf.release();
+
+ grad_buf.release();
+ rho_c_buf.release();
+
+ p11_buf.release();
+ p12_buf.release();
+ p21_buf.release();
+ p22_buf.release();
+
+ diff_buf.release();
+ norm_buf.release();
+}
+
+void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy)
+{
+ Context *clCxt = src.clCxt;
+ size_t localThreads[3] = {32, 8, 1};
+ size_t globalThreads[3] = {src.cols, src.rows, 1};
+
+ int srcElementSize = src.elemSize();
+ int src_step = src.step/srcElementSize;
+
+ int dElememntSize = dx.elemSize();
+ int dx_step = dx.step/dElememntSize;
+
+ string kernelName = "centeredGradientKernel";
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&src.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&src.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&src.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&src_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&dx.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&dy.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&dx_step));
+ openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1);
+
+}
+
+void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut)
+{
+ Context *clCxt = u1.clCxt;
+
+ size_t localThread[] = {32, 8, 1};
+ size_t globalThread[] =
+ {
+ u1.cols,
+ u1.rows,
+ 1
+ };
+
+ int u1_element_size = u1.elemSize();
+ int u1_step = u1.step/u1_element_size;
+
+ int u2_element_size = u2.elemSize();
+ int u2_step = u2.step/u2_element_size;
+
+ int p11_element_size = p11.elemSize();
+ int p11_step = p11.step/p11_element_size;
+
+ int u1_offset_y = u1.offset/u1.step;
+ int u1_offset_x = u1.offset%u1.step;
+ u1_offset_x = u1_offset_x/u1.elemSize();
+
+ int u2_offset_y = u2.offset/u2.step;
+ int u2_offset_x = u2.offset%u2.step;
+ u2_offset_x = u2_offset_x/u2.elemSize();
+
+ string kernelName = "estimateDualVariablesKernel";
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&p11_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
+ args.push_back( make_pair( sizeof(cl_float), (void*)&taut));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+ openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+}
+
+void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
+ oclMat &rho_c, oclMat &p11, oclMat &p12,
+ oclMat &p21, oclMat &p22, oclMat &u1,
+ oclMat &u2, oclMat &error, float l_t, float theta)
+{
+ Context* clCxt = I1wx.clCxt;
+
+ size_t localThread[] = {32, 8, 1};
+ size_t globalThread[] =
+ {
+ I1wx.cols,
+ I1wx.rows,
+ 1
+ };
+
+ int I1wx_element_size = I1wx.elemSize();
+ int I1wx_step = I1wx.step/I1wx_element_size;
+
+ int u1_element_size = u1.elemSize();
+ int u1_step = u1.step/u1_element_size;
+
+ int u2_element_size = u2.elemSize();
+ int u2_step = u2.step/u2_element_size;
+
+ int u1_offset_y = u1.offset/u1.step;
+ int u1_offset_x = u1.offset%u1.step;
+ u1_offset_x = u1_offset_x/u1.elemSize();
+
+ int u2_offset_y = u2.offset/u2.step;
+ int u2_offset_x = u2.offset%u2.step;
+ u2_offset_x = u2_offset_x/u2.elemSize();
+
+ string kernelName = "estimateUKernel";
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&rho_c.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&error.data));
+ args.push_back( make_pair( sizeof(cl_float), (void*)&l_t));
+ args.push_back( make_pair( sizeof(cl_float), (void*)&theta));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+ openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+}
+
+void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho)
+{
+ Context* clCxt = I0.clCxt;
+ const bool isImgSupported = support_image2d(clCxt);
+
+ CV_Assert(isImgSupported);
+
+ int u1ElementSize = u1.elemSize();
+ int u1Step = u1.step/u1ElementSize;
+
+ int u2ElementSize = u2.elemSize();
+ int u2Step = u2.step/u2ElementSize;
+
+ int I0ElementSize = I0.elemSize();
+ int I0Step = I0.step/I0ElementSize;
+
+ int I1w_element_size = I1w.elemSize();
+ int I1w_step = I1w.step/I1w_element_size;
+
+ int u1_offset_y = u1.offset/u1.step;
+ int u1_offset_x = u1.offset%u1.step;
+ u1_offset_x = u1_offset_x/u1.elemSize();
+
+ int u2_offset_y = u2.offset/u2.step;
+ int u2_offset_x = u2.offset%u2.step;
+ u2_offset_x = u2_offset_x/u2.elemSize();
+
+ size_t localThread[] = {32, 8, 1};
+ size_t globalThread[] =
+ {
+ I0.cols,
+ I0.rows,
+ 1
+ };
+
+ cl_mem I1_tex;
+ cl_mem I1x_tex;
+ cl_mem I1y_tex;
+ I1_tex = bindTexture(I1);
+ I1x_tex = bindTexture(I1x);
+ I1y_tex = bindTexture(I1y);
+
+ string kernelName = "warpBackwardKernel";
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I0.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I0Step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I0.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I0.rows));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1_tex));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1x_tex));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1y_tex));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1Step));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1w.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void*)&rho.data));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&I1w_step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2Step));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
+ args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+
+ openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
+
+ releaseTexture(I1_tex);
+ releaseTexture(I1x_tex);
+ releaseTexture(I1y_tex);
+}
\ No newline at end of file
#include "precomp.hpp"
#ifdef HAVE_OPENCL
-#define SHOW_RESULT 0
////////////////////////////////////////////////////////
// Canny
bool useL2gradient;
cv::Mat edges_gold;
- //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
}
};
double low_thresh = 50.0;
double high_thresh = 100.0;
- cv::resize(img, img, cv::Size(512, 384));
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
cv::ocl::oclMat edges;
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
- char filename [100];
- sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
-
cv::Mat edges_gold;
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
-#if SHOW_RESULT
- cv::Mat edges_x2, ocl_edges(edges);
- edges_x2.create(edges.rows, edges.cols * 2, edges.type());
- edges_x2.setTo(0);
- cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)));
- cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)));
- cv::namedWindow("Canny result (left: cpu, right: ocl)");
- cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
- cv::waitKey();
-#endif //OUTPUT_RESULT
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
}
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
+INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
#endif
\ No newline at end of file
cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
- EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
+ EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4);
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
using namespace std;
using namespace cv;
extern string workdir;
+
+namespace
+{
+IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
+CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
+CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
return e.rect;
}
};
+}
-PARAM_TEST_CASE(Haar, double, int)
+PARAM_TEST_CASE(Haar, double, int, CascadeName)
{
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
- cv::ocl::OclCascadeClassifierBuf cascadebuf;
cv::CascadeClassifier cpucascade, cpunestedCascade;
double scale;
int flags;
+ std::string cascadeName;
virtual void SetUp()
{
scale = GET_PARAM(0);
flags = GET_PARAM(1);
- string cascadeName = workdir + "../../data/haarcascades/haarcascade_frontalface_alt.xml";
+ cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(2));
- if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) || (!cascadebuf.load( cascadeName )))
+ if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
{
cout << "ERROR: Could not load classifier cascade" << endl;
return;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
-
+
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
vector<Rect> faces, oclfaces;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
- MemStorage storage(cvCreateMemStorage(0));
cvtColor( img, gray, CV_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
cv::ocl::oclMat image;
image.upload(smallImg);
+ cv::ocl::OclCascadeClassifierBuf cascadebuf;
+ if( !cascadebuf.load( cascadeName ) )
+ {
+ cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl;
+ return;
+ }
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
- cascadebuf.release();
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
flags,
Size(30, 30), Size(0, 0) );
EXPECT_EQ(faces.size(), oclfaces.size());
+
+ // intentionally run ocl facedetect again and check if it still works after the first run
+ cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
+ flags,
+ Size(30, 30));
+ cascadebuf.release();
+ EXPECT_EQ(faces.size(), oclfaces.size());
}
INSTANTIATE_TEST_CASE_P(FaceDetect, Haar,
Combine(Values(1.0),
- Values(CV_HAAR_SCALE_IMAGE, 0)));
+ Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
#endif // HAVE_OPENCL
// Rock Li, Rock.Li@amd.com
// Wu Zailong, bullet@yeah.net
// Xu Pang, pangxu010@163.com
+// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0);
}
}
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// CLAHE
+namespace
+{
+ IMPLEMENT_PARAM_CLASS(ClipLimit, double)
+}
+
+PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit)
+{
+ cv::Size size;
+ double clipLimit;
+
+ cv::Mat src;
+ cv::Mat dst_gold;
+
+ cv::ocl::oclMat g_src;
+ cv::ocl::oclMat g_dst;
+
+ virtual void SetUp()
+ {
+ size = GET_PARAM(0);
+ clipLimit = GET_PARAM(1);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ src = randomMat(rng, size, CV_8UC1, 0, 256, false);
+ g_src.upload(src);
+ }
+};
+
+TEST_P(CLAHE, Accuracy)
+{
+ cv::Ptr<cv::ocl::CLAHE> clahe = cv::ocl::createCLAHE(clipLimit);
+ clahe->apply(g_src, g_dst);
+ cv::Mat dst(g_dst);
+
+ cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
+ clahe_gold->apply(src, dst_gold);
+
+ EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
+}
///////////////////////////Convolve//////////////////////////////////
PARAM_TEST_CASE(ConvolveTestBase, MatType, bool)
ONE_TYPE(CV_32SC1) //no use
));
+INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine(
+ Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)),
+ Values(0.0, 40.0)));
+
//INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
// Values(CV_32FC1, CV_32FC1),
// Values(false))); // Values(false) is the reserved parameter
-/*M///////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// copy or use the software.
//
//
-// Intel License Agreement
+// License Agreement
// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
+// @Authors
+//
+//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
+// and/or other oclMaterials provided with the distribution.
//
-// * The name of Intel Corporation may not be used to endorse or promote products
+// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
extern string workdir;
+
+//////////////////////////////////////////////////////
+// GoodFeaturesToTrack
+namespace
+{
+ IMPLEMENT_PARAM_CLASS(MinDistance, double)
+}
+PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance)
+{
+ double minDistance;
+
+ virtual void SetUp()
+ {
+ minDistance = GET_PARAM(0);
+ }
+};
+
+TEST_P(GoodFeaturesToTrack, Accuracy)
+{
+ cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame.empty());
+
+ int maxCorners = 1000;
+ double qualityLevel = 0.01;
+
+ cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+ cv::ocl::oclMat d_pts;
+ detector(oclMat(frame), d_pts);
+
+ ASSERT_FALSE(d_pts.empty());
+
+ std::vector<cv::Point2f> pts(d_pts.cols);
+
+ detector.downloadPoints(d_pts, pts);
+
+ std::vector<cv::Point2f> pts_gold;
+ cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance);
+
+ ASSERT_EQ(pts_gold.size(), pts.size());
+
+ size_t mistmatch = 0;
+ for (size_t i = 0; i < pts.size(); ++i)
+ {
+ cv::Point2i a = pts_gold[i];
+ cv::Point2i b = pts[i];
+
+ bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
+
+ if (!eq)
+ ++mistmatch;
+ }
+
+ double bad_ratio = static_cast<double>(mistmatch) / pts.size();
+
+ ASSERT_LE(bad_ratio, 0.01);
+}
+
+TEST_P(GoodFeaturesToTrack, EmptyCorners)
+{
+ int maxCorners = 1000;
+ double qualityLevel = 0.01;
+
+ cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
+
+ cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
+ cv::ocl::oclMat corners(1, maxCorners, CV_32FC2);
+
+ detector(src, corners);
+
+ ASSERT_TRUE(corners.empty());
+}
+
+INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
+ testing::Values(MinDistance(0.0), MinDistance(3.0)));
+
+//////////////////////////////////////////////////////////////////////////
+PARAM_TEST_CASE(TVL1, bool)
+{
+ bool useRoi;
+
+ virtual void SetUp()
+ {
+ useRoi = GET_PARAM(0);
+ }
+
+};
+
+TEST_P(TVL1, Accuracy)
+{
+ cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame0.empty());
+
+ cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(frame1.empty());
+
+ cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Mat flowx = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi);
+ cv::Mat flowy = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi);
+ cv::ocl::oclMat d_flowx(flowx), d_flowy(flowy);
+ d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy);
+
+ cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
+ cv::Mat flow;
+ alg->calc(frame0, frame1, flow);
+ cv::Mat gold[2];
+ cv::split(flow, gold);
+
+ EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3);
+ EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3);
+}
+INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false));
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// PyrLKOpticalFlow
+
PARAM_TEST_CASE(Sparse, bool, bool)
{
bool useGray;
virtual void SetUp()
{
UseSmart = GET_PARAM(0);
- useGray = GET_PARAM(0);
+ useGray = GET_PARAM(1);
}
};
}
-INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine(
- Values(false, true),
- Values(false)));
+INSTANTIATE_TEST_CASE_P(OCL_Video, Sparse, Combine(
+ Values(false, true),
+ Values(false, true)));
#endif // HAVE_OPENCL
switch (src.type()) {
case CV_8U:
- parallel_for(cv::BlockedRange(0, src.rows),
+ parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<uchar>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC2:
- parallel_for(cv::BlockedRange(0, src.rows),
+ parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec2b>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC3:
- parallel_for(cv::BlockedRange(0, src.rows),
+ parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec3b>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
switch (srcImgs[0].type()) {
case CV_8U:
- parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+ parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC2:
- parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+ parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC3:
- parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
+ parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
using namespace cv;
template <typename T>
-struct FastNlMeansDenoisingInvoker {
+struct FastNlMeansDenoisingInvoker : ParallelLoopBody {
public:
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
int template_window_size, int search_window_size, const float h);
- void operator() (const BlockedRange& range) const;
+ void operator() (const Range& range) const;
private:
void operator= (const FastNlMeansDenoisingInvoker&);
}
template <class T>
-void FastNlMeansDenoisingInvoker<T>::operator() (const BlockedRange& range) const {
- int row_from = range.begin();
- int row_to = range.end() - 1;
+void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
+ int row_from = range.start;
+ int row_to = range.end - 1;
Array2d<int> dist_sums(search_window_size_, search_window_size_);
using namespace cv;
template <typename T>
-struct FastNlMeansMultiDenoisingInvoker {
+struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody {
public:
FastNlMeansMultiDenoisingInvoker(
const std::vector<Mat>& srcImgs, int imgToDenoiseIndex, int temporalWindowSize,
Mat& dst, int template_window_size, int search_window_size, const float h);
- void operator() (const BlockedRange& range) const;
+ void operator() (const Range& range) const;
private:
void operator= (const FastNlMeansMultiDenoisingInvoker&);
}
template <class T>
-void FastNlMeansMultiDenoisingInvoker<T>::operator() (const BlockedRange& range) const {
- int row_from = range.begin();
- int row_to = range.end() - 1;
+void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const {
+ int row_from = range.start;
+ int row_to = range.end - 1;
Array3d<int> dist_sums(temporal_window_size_, search_window_size_, search_window_size_);
};
-struct MatchPairsBody
+struct MatchPairsBody : ParallelLoopBody
{
- MatchPairsBody(const MatchPairsBody& other)
- : matcher(other.matcher), features(other.features),
- pairwise_matches(other.pairwise_matches), near_pairs(other.near_pairs) {}
-
MatchPairsBody(FeaturesMatcher &_matcher, const vector<ImageFeatures> &_features,
vector<MatchesInfo> &_pairwise_matches, vector<pair<int,int> > &_near_pairs)
: matcher(_matcher), features(_features),
pairwise_matches(_pairwise_matches), near_pairs(_near_pairs) {}
- void operator ()(const BlockedRange &r) const
+ void operator ()(const Range &r) const
{
const int num_images = static_cast<int>(features.size());
- for (int i = r.begin(); i < r.end(); ++i)
+ for (int i = r.start; i < r.end; ++i)
{
int from = near_pairs[i].first;
int to = near_pairs[i].second;
MatchPairsBody body(*this, features, pairwise_matches, near_pairs);
if (is_thread_safe_)
- parallel_for(BlockedRange(0, static_cast<int>(near_pairs.size())), body);
+ parallel_for_(Range(0, static_cast<int>(near_pairs.size())), body);
else
- body(BlockedRange(0, static_cast<int>(near_pairs.size())));
+ body(Range(0, static_cast<int>(near_pairs.size())));
LOGLN_CHAT("");
}
K_from(0,0) = cameras[edge.from].focal;
K_from(1,1) = cameras[edge.from].focal * cameras[edge.from].aspect;
K_from(0,2) = cameras[edge.from].ppx;
- K_from(0,2) = cameras[edge.from].ppy;
+ K_from(1,2) = cameras[edge.from].ppy;
Mat_<double> K_to = Mat::eye(3, 3, CV_64F);
K_to(0,0) = cameras[edge.to].focal;
K_to(1,1) = cameras[edge.to].focal * cameras[edge.to].aspect;
K_to(0,2) = cameras[edge.to].ppx;
- K_to(0,2) = cameras[edge.to].ppy;
+ K_to(1,2) = cameras[edge.to].ppy;
Mat R = K_from.inv() * pairwise_matches[pair_idx].H.inv() * K_to;
cameras[edge.to].R = cameras[edge.from].R * R;
if self.adb:
# construct name for aapt tool
self.aapt = [os.path.join(os.path.dirname(self.adb[0]), ("aapt","aapt.exe")[hostos == 'nt'])]
+ if not os.path.isfile(self.aapt[0]):
+ # it's moved in SDK r22
+ sdk_dir = os.path.dirname( os.path.dirname(self.adb[0]) )
+ aapt_fn = ("aapt", "aapt.exe")[hostos == 'nt']
+ for r, ds, fs in os.walk( os.path.join(sdk_dir, 'build-tools') ):
+ if aapt_fn in fs:
+ self.aapt = [ os.path.join(r, aapt_fn) ]
+ break
+ else:
+ self.error = "Can't find '%s' tool!" % aapt_fn
# fix has_perf_tests param
self.has_perf_tests = self.has_perf_tests == "ON"
//IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004
//http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf
-struct MOG2Invoker
+struct MOG2Invoker : ParallelLoopBody
{
MOG2Invoker(const Mat& _src, Mat& _dst,
GMM* _gmm, float* _mean,
cvtfunc = src->depth() != CV_32F ? getConvertFunc(src->depth(), CV_32F) : 0;
}
- void operator()(const BlockedRange& range) const
+ void operator()(const Range& range) const
{
- int y0 = range.begin(), y1 = range.end();
+ int y0 = range.start, y1 = range.end;
int ncols = src->cols, nchannels = src->channels();
AutoBuffer<float> buf(src->cols*nchannels);
float alpha1 = 1.f - alphaT;
learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./min( 2*nframes, history );
CV_Assert(learningRate >= 0);
- parallel_for(BlockedRange(0, image.rows),
- MOG2Invoker(image, fgmask,
- (GMM*)bgmodel.data,
- (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols),
- bgmodelUsedModes.data, nmixtures, (float)learningRate,
- (float)varThreshold,
- backgroundRatio, varThresholdGen,
- fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
- bShadowDetection, nShadowDetection));
+ parallel_for_(Range(0, image.rows),
+ MOG2Invoker(image, fgmask,
+ (GMM*)bgmodel.data,
+ (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols),
+ bgmodelUsedModes.data, nmixtures, (float)learningRate,
+ (float)varThreshold,
+ backgroundRatio, varThresholdGen,
+ fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
+ bShadowDetection, nShadowDetection));
}
void BackgroundSubtractorMOG2::getBackgroundImage(OutputArray backgroundImage) const
minEigThreshold = _minEigThreshold;
}
-void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const
+void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
{
Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
const Mat& I = *prevImg;
Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf);
Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn);
- for( int ptidx = range.begin(); ptidx < range.end(); ptidx++ )
+ for( int ptidx = range.start; ptidx < range.end; ptidx++ )
{
Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
Point2f nextPt;
typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
#endif
- parallel_for(BlockedRange(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
- nextPyr[level * lvlStep2], prevPts, nextPts,
- status, err,
- winSize, criteria, level, maxLevel,
- flags, (float)minEigThreshold));
+ parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
+ nextPyr[level * lvlStep2], prevPts, nextPts,
+ status, err,
+ winSize, criteria, level, maxLevel,
+ flags, (float)minEigThreshold));
}
}
typedef short deriv_type;
- struct LKTrackerInvoker
+ struct LKTrackerInvoker : ParallelLoopBody
{
LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
const Point2f* _prevPts, Point2f* _nextPts,
Size _winSize, TermCriteria _criteria,
int _level, int _maxLevel, int _flags, float _minEigThreshold );
- void operator()(const BlockedRange& range) const;
+ void operator()(const Range& range) const;
const Mat* prevImg;
const Mat* nextImg;
obj.info()->addParam(obj, "history", obj.history);
obj.info()->addParam(obj, "nmixtures", obj.nmixtures);
obj.info()->addParam(obj, "varThreshold", obj.varThreshold);
- obj.info()->addParam(obj, "detectShadows", obj.bShadowDetection));
+ obj.info()->addParam(obj, "detectShadows", obj.bShadowDetection);
+ obj.info()->addParam(obj, "backgroundRatio", obj.backgroundRatio);
+ obj.info()->addParam(obj, "varThresholdGen", obj.varThresholdGen);
+ obj.info()->addParam(obj, "fVarInit", obj.fVarInit);
+ obj.info()->addParam(obj, "fVarMin", obj.fVarMin);
+ obj.info()->addParam(obj, "fVarMax", obj.fVarMax);
+ obj.info()->addParam(obj, "fCT", obj.fCT);
+ obj.info()->addParam(obj, "nShadowDetection", obj.nShadowDetection);
+ obj.info()->addParam(obj, "fTau", obj.fTau));
///////////////////////////////////////////////////////////////////////////////////////////////////////////
estimateGlobMotionLeastSquaresAffine };
const int npoints = static_cast<int>(points0.size());
+ if (npoints < params.size)
+ return Mat::eye(3, 3, CV_32F);
+
const int niters = static_cast<int>(ceil(log(1 - params.prob) /
log(1 - pow(1 - params.eps, params.size))));
Mat PyrLkRobustMotionEstimator::estimate(const Mat &frame0, const Mat &frame1)
{
detector_->detect(frame0, keypointsPrev_);
+ if (keypointsPrev_.empty())
+ return Mat::eye(3, 3, CV_32F);
pointsPrev_.resize(keypointsPrev_.size());
for (size_t i = 0; i < keypointsPrev_.size(); ++i)
--- /dev/null
+# Copyright (c) 2010-2011, Ethan Rublee
+# Copyright (c) 2011-2013, Andrey Kamaev
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. The name of the copyright holders may be used to endorse or promote
+# products derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# ------------------------------------------------------------------------------
+# Android CMake toolchain file, for use with the Android NDK r5-r8
+# Requires cmake 2.6.3 or newer (2.8.5 or newer is recommended).
+# See home page: https://github.com/taka-no-me/android-cmake
+#
+# The file is mantained by the OpenCV project. The latest version can be get at
+# http://code.opencv.org/projects/opencv/repository/revisions/master/changes/android/android.toolchain.cmake
+#
+# Usage Linux:
+# $ export ANDROID_NDK=/absolute/path/to/the/android-ndk
+# $ mkdir build && cd build
+# $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake ..
+# $ make -j8
+#
+# Usage Linux (using standalone toolchain):
+# $ export ANDROID_STANDALONE_TOOLCHAIN=/absolute/path/to/android-toolchain
+# $ mkdir build && cd build
+# $ cmake -DCMAKE_TOOLCHAIN_FILE=path/to/the/android.toolchain.cmake ..
+# $ make -j8
+#
+# Usage Windows:
+# You need native port of make to build your project.
+# Android NDK r7 (or newer) already has make.exe on board.
+# For older NDK you have to install it separately.
+# For example, this one: http://gnuwin32.sourceforge.net/packages/make.htm
+#
+# $ SET ANDROID_NDK=C:\absolute\path\to\the\android-ndk
+# $ mkdir build && cd build
+# $ cmake.exe -G"MinGW Makefiles"
+# -DCMAKE_TOOLCHAIN_FILE=path\to\the\android.toolchain.cmake
+# -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%\prebuilt\windows\bin\make.exe" ..
+# $ cmake.exe --build .
+#
+#
+# Options (can be set as cmake parameters: -D<option_name>=<value>):
+# ANDROID_NDK=/opt/android-ndk - path to the NDK root.
+# Can be set as environment variable. Can be set only at first cmake run.
+#
+# ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain - path to the
+# standalone toolchain. This option is not used if full NDK is found
+# (ignored if ANDROID_NDK is set).
+# Can be set as environment variable. Can be set only at first cmake run.
+#
+# ANDROID_ABI=armeabi-v7a - specifies the target Application Binary
+# Interface (ABI). This option nearly matches to the APP_ABI variable
+# used by ndk-build tool from Android NDK.
+#
+# Possible targets are:
+# "armeabi" - matches to the NDK ABI with the same name.
+# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+# "armeabi-v7a" - matches to the NDK ABI with the same name.
+# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+# "armeabi-v7a with NEON" - same as armeabi-v7a, but
+# sets NEON as floating-point unit
+# "armeabi-v7a with VFPV3" - same as armeabi-v7a, but
+# sets VFPV3 as floating-point unit (has 32 registers instead of 16).
+# "armeabi-v6 with VFP" - tuned for ARMv6 processors having VFP.
+# "x86" - matches to the NDK ABI with the same name.
+# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+# "mips" - matches to the NDK ABI with the same name
+# (It is not tested on real devices by the authos of this toolchain)
+# See ${ANDROID_NDK}/docs/CPU-ARCH-ABIS.html for the documentation.
+#
+# ANDROID_NATIVE_API_LEVEL=android-8 - level of Android API compile for.
+# Option is read-only when standalone toolchain is used.
+#
+# ANDROID_TOOLCHAIN_NAME=arm-linux-androideabi-4.6 - the name of compiler
+# toolchain to be used. The list of possible values depends on the NDK
+# version. For NDK r8c the possible values are:
+#
+# * arm-linux-androideabi-4.4.3
+# * arm-linux-androideabi-4.6
+# * arm-linux-androideabi-clang3.1
+# * mipsel-linux-android-4.4.3
+# * mipsel-linux-android-4.6
+# * mipsel-linux-android-clang3.1
+# * x86-4.4.3
+# * x86-4.6
+# * x86-clang3.1
+#
+# ANDROID_FORCE_ARM_BUILD=OFF - set ON to generate 32-bit ARM instructions
+# instead of Thumb. Is not available for "x86" (inapplicable) and
+# "armeabi-v6 with VFP" (is forced to be ON) ABIs.
+#
+# ANDROID_NO_UNDEFINED=ON - set ON to show all undefined symbols as linker
+# errors even if they are not used.
+#
+# ANDROID_SO_UNDEFINED=OFF - set ON to allow undefined symbols in shared
+# libraries. Automatically turned for NDK r5x and r6x due to GLESv2
+# problems.
+#
+# LIBRARY_OUTPUT_PATH_ROOT=${CMAKE_SOURCE_DIR} - where to output binary
+# files. See additional details below.
+#
+# ANDROID_SET_OBSOLETE_VARIABLES=ON - if set, then toolchain defines some
+# obsolete variables which were used by previous versions of this file for
+# backward compatibility.
+#
+# ANDROID_STL=gnustl_static - specify the runtime to use.
+#
+# Possible values are:
+# none -> Do not configure the runtime.
+# system -> Use the default minimal system C++ runtime library.
+# Implies -fno-rtti -fno-exceptions.
+# Is not available for standalone toolchain.
+# system_re -> Use the default minimal system C++ runtime library.
+# Implies -frtti -fexceptions.
+# Is not available for standalone toolchain.
+# gabi++_static -> Use the GAbi++ runtime as a static library.
+# Implies -frtti -fno-exceptions.
+# Available for NDK r7 and newer.
+# Is not available for standalone toolchain.
+# gabi++_shared -> Use the GAbi++ runtime as a shared library.
+# Implies -frtti -fno-exceptions.
+# Available for NDK r7 and newer.
+# Is not available for standalone toolchain.
+# stlport_static -> Use the STLport runtime as a static library.
+# Implies -fno-rtti -fno-exceptions for NDK before r7.
+# Implies -frtti -fno-exceptions for NDK r7 and newer.
+# Is not available for standalone toolchain.
+# stlport_shared -> Use the STLport runtime as a shared library.
+# Implies -fno-rtti -fno-exceptions for NDK before r7.
+# Implies -frtti -fno-exceptions for NDK r7 and newer.
+# Is not available for standalone toolchain.
+# gnustl_static -> Use the GNU STL as a static library.
+# Implies -frtti -fexceptions.
+# gnustl_shared -> Use the GNU STL as a shared library.
+# Implies -frtti -fno-exceptions.
+# Available for NDK r7b and newer.
+# Silently degrades to gnustl_static if not available.
+#
+# ANDROID_STL_FORCE_FEATURES=ON - turn rtti and exceptions support based on
+# chosen runtime. If disabled, then the user is responsible for settings
+# these options.
+#
+# What?:
+# android-cmake toolchain searches for NDK/toolchain in the following order:
+# ANDROID_NDK - cmake parameter
+# ANDROID_NDK - environment variable
+# ANDROID_STANDALONE_TOOLCHAIN - cmake parameter
+# ANDROID_STANDALONE_TOOLCHAIN - environment variable
+# ANDROID_NDK - default locations
+# ANDROID_STANDALONE_TOOLCHAIN - default locations
+#
+# Make sure to do the following in your scripts:
+# SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${my_cxx_flags}" )
+# SET( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${my_cxx_flags}" )
+# The flags will be prepopulated with critical flags, so don't loose them.
+# Also be aware that toolchain also sets configuration-specific compiler
+# flags and linker flags.
+#
+# ANDROID and BUILD_ANDROID will be set to true, you may test any of these
+# variables to make necessary Android-specific configuration changes.
+#
+# Also ARMEABI or ARMEABI_V7A or X86 or MIPS will be set true, mutually
+# exclusive. NEON option will be set true if VFP is set to NEON.
+#
+# LIBRARY_OUTPUT_PATH_ROOT should be set in cache to determine where Android
+# libraries will be installed.
+# Default is ${CMAKE_SOURCE_DIR}, and the android libs will always be
+# under the ${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}
+# (depending on the target ABI). This is convenient for Android packaging.
+#
+# Change Log:
+# - initial version December 2010
+# - April 2011
+# [+] added possibility to build with NDK (without standalone toolchain)
+# [+] support cross-compilation on Windows (native, no cygwin support)
+# [+] added compiler option to force "char" type to be signed
+# [+] added toolchain option to compile to 32-bit ARM instructions
+# [+] added toolchain option to disable SWIG search
+# [+] added platform "armeabi-v7a with VFPV3"
+# [~] ARM_TARGETS renamed to ARM_TARGET
+# [+] EXECUTABLE_OUTPUT_PATH is set by toolchain (required on Windows)
+# [~] Fixed bug with ANDROID_API_LEVEL variable
+# [~] turn off SWIG search if it is not found first time
+# - May 2011
+# [~] ANDROID_LEVEL is renamed to ANDROID_API_LEVEL
+# [+] ANDROID_API_LEVEL is detected by toolchain if not specified
+# [~] added guard to prevent changing of output directories on the first
+# cmake pass
+# [~] toolchain exits with error if ARM_TARGET is not recognized
+# - June 2011
+# [~] default NDK path is updated for version r5c
+# [+] variable CMAKE_SYSTEM_PROCESSOR is set based on ARM_TARGET
+# [~] toolchain install directory is added to linker paths
+# [-] removed SWIG-related stuff from toolchain
+# [+] added macro find_host_package, find_host_program to search
+# packages/programs on the host system
+# [~] fixed path to STL library
+# - July 2011
+# [~] fixed options caching
+# [~] search for all supported NDK versions
+# [~] allowed spaces in NDK path
+# - September 2011
+# [~] updated for NDK r6b
+# - November 2011
+# [*] rewritten for NDK r7
+# [+] x86 toolchain support (experimental)
+# [+] added "armeabi-v6 with VFP" ABI for ARMv6 processors.
+# [~] improved compiler and linker flags management
+# [+] support different build flags for Release and Debug configurations
+# [~] by default compiler flags the same as used by ndk-build (but only
+# where reasonable)
+# [~] ANDROID_NDK_TOOLCHAIN_ROOT is splitted to ANDROID_STANDALONE_TOOLCHAIN
+# and ANDROID_TOOLCHAIN_ROOT
+# [~] ARM_TARGET is renamed to ANDROID_ABI
+# [~] ARMEABI_NDK_NAME is renamed to ANDROID_NDK_ABI_NAME
+# [~] ANDROID_API_LEVEL is renamed to ANDROID_NATIVE_API_LEVEL
+# - January 2012
+# [+] added stlport_static support (experimental)
+# [+] added special check for cygwin
+# [+] filtered out hidden files (starting with .) while globbing inside NDK
+# [+] automatically applied GLESv2 linkage fix for NDK revisions 5-6
+# [+] added ANDROID_GET_ABI_RAWNAME to get NDK ABI names by CMake flags
+# - February 2012
+# [+] updated for NDK r7b
+# [~] fixed cmake try_compile() command
+# [~] Fix for missing install_name_tool on OS X
+# - March 2012
+# [~] fixed incorrect C compiler flags
+# [~] fixed CMAKE_SYSTEM_PROCESSOR change on ANDROID_ABI change
+# [+] improved toolchain loading speed
+# [+] added assembler language support (.S)
+# [+] allowed preset search paths and extra search suffixes
+# - April 2012
+# [+] updated for NDK r7c
+# [~] fixed most of problems with compiler/linker flags and caching
+# [+] added option ANDROID_FUNCTION_LEVEL_LINKING
+# - May 2012
+# [+] updated for NDK r8
+# [+] added mips architecture support
+# - August 2012
+# [+] updated for NDK r8b
+# [~] all intermediate files generated by toolchain are moved to CMakeFiles
+# [~] libstdc++ and libsupc are removed from explicit link libraries
+# [+] added CCache support (via NDK_CCACHE environment or cmake variable)
+# [+] added gold linker support for NDK r8b
+# [~] fixed mips linker flags for NDK r8b
+# - September 2012
+# [+] added NDK release name detection (see ANDROID_NDK_RELEASE)
+# [+] added support for all C++ runtimes from NDK
+# (system, gabi++, stlport, gnustl)
+# [+] improved warnings on known issues of NDKs
+# [~] use gold linker as default if available (NDK r8b)
+# [~] globally turned off rpath
+# [~] compiler options are aligned with NDK r8b
+# - October 2012
+# [~] fixed C++ linking: explicitly link with math library (OpenCV #2426)
+# - November 2012
+# [+] updated for NDK r8c
+# [+] added support for clang compiler
+# - December 2012
+# [+] suppress warning about unused CMAKE_TOOLCHAIN_FILE variable
+# [+] adjust API level to closest compatible as NDK does
+# [~] fixed ccache full path search
+# [+] updated for NDK r8d
+# [~] compiler options are aligned with NDK r8d
+# - March 2013
+# [+] updated for NDK r8e (x86 version)
+# [+] support x86_64 version of NDK
+# - April 2013
+# [+] support non-release NDK layouts (from Linaro git and Android git)
+# [~] automatically detect if explicit link to crtbegin_*.o is needed
+# ------------------------------------------------------------------------------
+
+cmake_minimum_required( VERSION 2.6.3 )
+
+if( DEFINED CMAKE_CROSSCOMPILING )
+ # subsequent toolchain loading is not really needed
+ return()
+endif()
+
+if( CMAKE_TOOLCHAIN_FILE )
+ # touch toolchain variable only to suppress "unused variable" warning
+endif()
+
+get_property( _CMAKE_IN_TRY_COMPILE GLOBAL PROPERTY IN_TRY_COMPILE )
+if( _CMAKE_IN_TRY_COMPILE )
+ include( "${CMAKE_CURRENT_SOURCE_DIR}/../android.toolchain.config.cmake" OPTIONAL )
+endif()
+
+# this one is important
+set( CMAKE_SYSTEM_NAME Linux )
+# this one not so much
+set( CMAKE_SYSTEM_VERSION 1 )
+
+# rpath makes low sence for Android
+set( CMAKE_SKIP_RPATH TRUE CACHE BOOL "If set, runtime paths are not added when using shared libraries." )
+
+set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" )
+if(NOT DEFINED ANDROID_NDK_SEARCH_PATHS)
+ if( CMAKE_HOST_WIN32 )
+ file( TO_CMAKE_PATH "$ENV{PROGRAMFILES}" ANDROID_NDK_SEARCH_PATHS )
+ set( ANDROID_NDK_SEARCH_PATHS "${ANDROID_NDK_SEARCH_PATHS}/android-ndk" "$ENV{SystemDrive}/NVPACK/android-ndk" )
+ else()
+ file( TO_CMAKE_PATH "$ENV{HOME}" ANDROID_NDK_SEARCH_PATHS )
+ set( ANDROID_NDK_SEARCH_PATHS /opt/android-ndk "${ANDROID_NDK_SEARCH_PATHS}/NVPACK/android-ndk" )
+ endif()
+endif()
+if(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH)
+ set( ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH /opt/android-toolchain )
+endif()
+
+set( ANDROID_SUPPORTED_ABIS_arm "armeabi-v7a;armeabi;armeabi-v7a with NEON;armeabi-v7a with VFPV3;armeabi-v6 with VFP" )
+set( ANDROID_SUPPORTED_ABIS_x86 "x86" )
+set( ANDROID_SUPPORTED_ABIS_mipsel "mips" )
+
+set( ANDROID_DEFAULT_NDK_API_LEVEL 8 )
+set( ANDROID_DEFAULT_NDK_API_LEVEL_x86 9 )
+set( ANDROID_DEFAULT_NDK_API_LEVEL_mips 9 )
+
+
+macro( __LIST_FILTER listvar regex )
+ if( ${listvar} )
+ foreach( __val ${${listvar}} )
+ if( __val MATCHES "${regex}" )
+ list( REMOVE_ITEM ${listvar} "${__val}" )
+ endif()
+ endforeach()
+ endif()
+endmacro()
+
+macro( __INIT_VARIABLE var_name )
+ set( __test_path 0 )
+ foreach( __var ${ARGN} )
+ if( __var STREQUAL "PATH" )
+ set( __test_path 1 )
+ break()
+ endif()
+ endforeach()
+ if( __test_path AND NOT EXISTS "${${var_name}}" )
+ unset( ${var_name} CACHE )
+ endif()
+ if( "${${var_name}}" STREQUAL "" )
+ set( __values 0 )
+ foreach( __var ${ARGN} )
+ if( __var STREQUAL "VALUES" )
+ set( __values 1 )
+ elseif( NOT __var STREQUAL "PATH" )
+ set( __obsolete 0 )
+ if( __var MATCHES "^OBSOLETE_.*$" )
+ string( REPLACE "OBSOLETE_" "" __var "${__var}" )
+ set( __obsolete 1 )
+ endif()
+ if( __var MATCHES "^ENV_.*$" )
+ string( REPLACE "ENV_" "" __var "${__var}" )
+ set( __value "$ENV{${__var}}" )
+ elseif( DEFINED ${__var} )
+ set( __value "${${__var}}" )
+ else()
+ if( __values )
+ set( __value "${__var}" )
+ else()
+ set( __value "" )
+ endif()
+ endif()
+ if( NOT "${__value}" STREQUAL "" )
+ if( __test_path )
+ if( EXISTS "${__value}" )
+ file( TO_CMAKE_PATH "${__value}" ${var_name} )
+ if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE )
+ message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." )
+ endif()
+ break()
+ endif()
+ else()
+ set( ${var_name} "${__value}" )
+ if( __obsolete AND NOT _CMAKE_IN_TRY_COMPILE )
+ message( WARNING "Using value of obsolete variable ${__var} as initial value for ${var_name}. Please note, that ${__var} can be completely removed in future versions of the toolchain." )
+ endif()
+ break()
+ endif()
+ endif()
+ endif()
+ endforeach()
+ unset( __value )
+ unset( __values )
+ unset( __obsolete )
+ elseif( __test_path )
+ file( TO_CMAKE_PATH "${${var_name}}" ${var_name} )
+ endif()
+ unset( __test_path )
+endmacro()
+
+macro( __DETECT_NATIVE_API_LEVEL _var _path )
+ SET( __ndkApiLevelRegex "^[\t ]*#define[\t ]+__ANDROID_API__[\t ]+([0-9]+)[\t ]*$" )
+ FILE( STRINGS ${_path} __apiFileContent REGEX "${__ndkApiLevelRegex}" )
+ if( NOT __apiFileContent )
+ message( SEND_ERROR "Could not get Android native API level. Probably you have specified invalid level value, or your copy of NDK/toolchain is broken." )
+ endif()
+ string( REGEX REPLACE "${__ndkApiLevelRegex}" "\\1" ${_var} "${__apiFileContent}" )
+ unset( __apiFileContent )
+ unset( __ndkApiLevelRegex )
+endmacro()
+
+macro( __DETECT_TOOLCHAIN_MACHINE_NAME _var _root )
+ if( EXISTS "${_root}" )
+ file( GLOB __gccExePath RELATIVE "${_root}/bin/" "${_root}/bin/*-gcc${TOOL_OS_SUFFIX}" )
+ __LIST_FILTER( __gccExePath "^[.].*" )
+ list( LENGTH __gccExePath __gccExePathsCount )
+ if( NOT __gccExePathsCount EQUAL 1 AND NOT _CMAKE_IN_TRY_COMPILE )
+ message( WARNING "Could not determine machine name for compiler from ${_root}" )
+ set( ${_var} "" )
+ else()
+ get_filename_component( __gccExeName "${__gccExePath}" NAME_WE )
+ string( REPLACE "-gcc" "" ${_var} "${__gccExeName}" )
+ endif()
+ unset( __gccExePath )
+ unset( __gccExePathsCount )
+ unset( __gccExeName )
+ else()
+ set( ${_var} "" )
+ endif()
+endmacro()
+
+
+# fight against cygwin
+set( ANDROID_FORBID_SYGWIN TRUE CACHE BOOL "Prevent cmake from working under cygwin and using cygwin tools")
+mark_as_advanced( ANDROID_FORBID_SYGWIN )
+if( ANDROID_FORBID_SYGWIN )
+ if( CYGWIN )
+ message( FATAL_ERROR "Android NDK and android-cmake toolchain are not welcome Cygwin. It is unlikely that this cmake toolchain will work under cygwin. But if you want to try then you can set cmake variable ANDROID_FORBID_SYGWIN to FALSE and rerun cmake." )
+ endif()
+
+ if( CMAKE_HOST_WIN32 )
+ # remove cygwin from PATH
+ set( __new_path "$ENV{PATH}")
+ __LIST_FILTER( __new_path "cygwin" )
+ set(ENV{PATH} "${__new_path}")
+ unset(__new_path)
+ endif()
+endif()
+
+
+# detect current host platform
+if( NOT DEFINED ANDROID_NDK_HOST_X64 AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64|AMD64")
+ set( ANDROID_NDK_HOST_X64 1 CACHE BOOL "Try to use 64-bit compiler toolchain" )
+ mark_as_advanced( ANDROID_NDK_HOST_X64 )
+endif()
+
+set( TOOL_OS_SUFFIX "" )
+if( CMAKE_HOST_APPLE )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "darwin-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "darwin-x86" )
+elseif( CMAKE_HOST_WIN32 )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "windows-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "windows" )
+ set( TOOL_OS_SUFFIX ".exe" )
+elseif( CMAKE_HOST_UNIX )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME "linux-x86_64" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME2 "linux-x86" )
+else()
+ message( FATAL_ERROR "Cross-compilation on your platform is not supported by this cmake toolchain" )
+endif()
+
+if( NOT ANDROID_NDK_HOST_X64 )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} )
+endif()
+
+# see if we have path to Android NDK
+__INIT_VARIABLE( ANDROID_NDK PATH ENV_ANDROID_NDK )
+if( NOT ANDROID_NDK )
+ # see if we have path to Android standalone toolchain
+ __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ENV_ANDROID_STANDALONE_TOOLCHAIN OBSOLETE_ANDROID_NDK_TOOLCHAIN_ROOT OBSOLETE_ENV_ANDROID_NDK_TOOLCHAIN_ROOT )
+
+ if( NOT ANDROID_STANDALONE_TOOLCHAIN )
+ #try to find Android NDK in one of the the default locations
+ set( __ndkSearchPaths )
+ foreach( __ndkSearchPath ${ANDROID_NDK_SEARCH_PATHS} )
+ foreach( suffix ${ANDROID_SUPPORTED_NDK_VERSIONS} )
+ list( APPEND __ndkSearchPaths "${__ndkSearchPath}${suffix}" )
+ endforeach()
+ endforeach()
+ __INIT_VARIABLE( ANDROID_NDK PATH VALUES ${__ndkSearchPaths} )
+ unset( __ndkSearchPaths )
+
+ if( ANDROID_NDK )
+ message( STATUS "Using default path for Android NDK: ${ANDROID_NDK}" )
+ message( STATUS " If you prefer to use a different location, please define a cmake or environment variable: ANDROID_NDK" )
+ else()
+ #try to find Android standalone toolchain in one of the the default locations
+ __INIT_VARIABLE( ANDROID_STANDALONE_TOOLCHAIN PATH ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH )
+
+ if( ANDROID_STANDALONE_TOOLCHAIN )
+ message( STATUS "Using default path for standalone toolchain ${ANDROID_STANDALONE_TOOLCHAIN}" )
+ message( STATUS " If you prefer to use a different location, please define the variable: ANDROID_STANDALONE_TOOLCHAIN" )
+ endif( ANDROID_STANDALONE_TOOLCHAIN )
+ endif( ANDROID_NDK )
+ endif( NOT ANDROID_STANDALONE_TOOLCHAIN )
+endif( NOT ANDROID_NDK )
+
+# remember found paths
+if( ANDROID_NDK )
+ get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE )
+ set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE )
+ set( BUILD_WITH_ANDROID_NDK True )
+ if( EXISTS "${ANDROID_NDK}/RELEASE.TXT" )
+ file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? )
+ string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" )
+ else()
+ set( ANDROID_NDK_RELEASE "r1x" )
+ set( ANDROID_NDK_RELEASE_FULL "unreleased" )
+ endif()
+elseif( ANDROID_STANDALONE_TOOLCHAIN )
+ get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE )
+ # try to detect change
+ if( CMAKE_AR )
+ string( LENGTH "${ANDROID_STANDALONE_TOOLCHAIN}" __length )
+ string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidStandaloneToolchainPreviousPath )
+ if( NOT __androidStandaloneToolchainPreviousPath STREQUAL ANDROID_STANDALONE_TOOLCHAIN )
+ message( FATAL_ERROR "It is not possible to change path to the Android standalone toolchain on subsequent run." )
+ endif()
+ unset( __androidStandaloneToolchainPreviousPath )
+ unset( __length )
+ endif()
+ set( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" CACHE INTERNAL "Path of the Android standalone toolchain" FORCE )
+ set( BUILD_WITH_STANDALONE_TOOLCHAIN True )
+else()
+ list(GET ANDROID_NDK_SEARCH_PATHS 0 ANDROID_NDK_SEARCH_PATH)
+ message( FATAL_ERROR "Could not find neither Android NDK nor Android standalone toolchain.
+ You should either set an environment variable:
+ export ANDROID_NDK=~/my-android-ndk
+ or
+ export ANDROID_STANDALONE_TOOLCHAIN=~/my-android-toolchain
+ or put the toolchain or NDK in the default path:
+ sudo ln -s ~/my-android-ndk ${ANDROID_NDK_SEARCH_PATH}
+ sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" )
+endif()
+
+# android NDK layout
+if( BUILD_WITH_ANDROID_NDK )
+ if( NOT DEFINED ANDROID_NDK_LAYOUT )
+ # try to automatically detect the layout
+ if( EXISTS "${ANDROID_NDK}/RELEASE.TXT")
+ set( ANDROID_NDK_LAYOUT "RELEASE" )
+ elseif( EXISTS "${ANDROID_NDK}/../../linux-x86/toolchain/" )
+ set( ANDROID_NDK_LAYOUT "LINARO" )
+ elseif( EXISTS "${ANDROID_NDK}/../../gcc/" )
+ set( ANDROID_NDK_LAYOUT "ANDROID" )
+ endif()
+ endif()
+ set( ANDROID_NDK_LAYOUT "${ANDROID_NDK_LAYOUT}" CACHE STRING "The inner layout of NDK" )
+ mark_as_advanced( ANDROID_NDK_LAYOUT )
+ if( ANDROID_NDK_LAYOUT STREQUAL "LINARO" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
+ set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../${ANDROID_NDK_HOST_SYSTEM_NAME}/toolchain" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
+ elseif( ANDROID_NDK_LAYOUT STREQUAL "ANDROID" )
+ set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment
+ set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../gcc/${ANDROID_NDK_HOST_SYSTEM_NAME}/arm" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" )
+ else() # ANDROID_NDK_LAYOUT STREQUAL "RELEASE"
+ set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/toolchains" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME2}" )
+ endif()
+ get_filename_component( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK_TOOLCHAINS_PATH}" ABSOLUTE )
+
+ # try to detect change of NDK
+ if( CMAKE_AR )
+ string( LENGTH "${ANDROID_NDK_TOOLCHAINS_PATH}" __length )
+ string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath )
+ if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK_TOOLCHAINS_PATH )
+ message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first.
+ " )
+ endif()
+ unset( __androidNdkPreviousPath )
+ unset( __length )
+ endif()
+endif()
+
+
+# get all the details about standalone toolchain
+if( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" )
+ set( ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ set( __availableToolchains "standalone" )
+ __DETECT_TOOLCHAIN_MACHINE_NAME( __availableToolchainMachines "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ if( NOT __availableToolchainMachines )
+ message( FATAL_ERROR "Could not determine machine name of your toolchain. Probably your Android standalone toolchain is broken." )
+ endif()
+ if( __availableToolchainMachines MATCHES i686 )
+ set( __availableToolchainArchs "x86" )
+ elseif( __availableToolchainMachines MATCHES arm )
+ set( __availableToolchainArchs "arm" )
+ elseif( __availableToolchainMachines MATCHES mipsel )
+ set( __availableToolchainArchs "mipsel" )
+ endif()
+ execute_process( COMMAND "${ANDROID_STANDALONE_TOOLCHAIN}/bin/${__availableToolchainMachines}-gcc${TOOL_OS_SUFFIX}" -dumpversion
+ OUTPUT_VARIABLE __availableToolchainCompilerVersions OUTPUT_STRIP_TRAILING_WHITESPACE )
+ string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?" __availableToolchainCompilerVersions "${__availableToolchainCompilerVersions}" )
+ if( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/bin/clang${TOOL_OS_SUFFIX}" )
+ list( APPEND __availableToolchains "standalone-clang" )
+ list( APPEND __availableToolchainMachines ${__availableToolchainMachines} )
+ list( APPEND __availableToolchainArchs ${__availableToolchainArchs} )
+ list( APPEND __availableToolchainCompilerVersions ${__availableToolchainCompilerVersions} )
+ endif()
+endif()
+
+macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath )
+ foreach( __toolchain ${${__availableToolchainsLst}} )
+ if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" )
+ string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" )
+ else()
+ set( __gcc_toolchain "${__toolchain}" )
+ endif()
+ __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK_TOOLCHAINS_PATH}/${__gcc_toolchain}${__toolchain_subpath}" )
+ if( __machine )
+ string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9x]+)?$" __version "${__gcc_toolchain}" )
+ if( __machine MATCHES i686 )
+ set( __arch "x86" )
+ elseif( __machine MATCHES arm )
+ set( __arch "arm" )
+ elseif( __machine MATCHES mipsel )
+ set( __arch "mipsel" )
+ endif()
+ list( APPEND __availableToolchainMachines "${__machine}" )
+ list( APPEND __availableToolchainArchs "${__arch}" )
+ list( APPEND __availableToolchainCompilerVersions "${__version}" )
+ list( APPEND ${__availableToolchainsVar} "${__toolchain}" )
+ endif()
+ unset( __gcc_toolchain )
+ endforeach()
+endmacro()
+
+# get all the details about NDK
+if( BUILD_WITH_ANDROID_NDK )
+ file( GLOB ANDROID_SUPPORTED_NATIVE_API_LEVELS RELATIVE "${ANDROID_NDK}/platforms" "${ANDROID_NDK}/platforms/android-*" )
+ string( REPLACE "android-" "" ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_SUPPORTED_NATIVE_API_LEVELS}" )
+ set( __availableToolchains "" )
+ set( __availableToolchainMachines "" )
+ set( __availableToolchainArchs "" )
+ set( __availableToolchainCompilerVersions "" )
+ if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_TOOLCHAIN_NAME}/" )
+ # do not go through all toolchains if we know the name
+ set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+ if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
+ if( __availableToolchains )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
+ endif()
+ endif()
+ endif()
+ if( NOT __availableToolchains )
+ file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK_TOOLCHAINS_PATH}" "${ANDROID_NDK_TOOLCHAINS_PATH}/*" )
+ if( __availableToolchains )
+ list(SORT __availableToolchainsLst) # we need clang to go after gcc
+ endif()
+ __LIST_FILTER( __availableToolchainsLst "^[.]" )
+ __LIST_FILTER( __availableToolchainsLst "llvm" )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+ if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 )
+ __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" )
+ if( __availableToolchains )
+ set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} )
+ endif()
+ endif()
+ endif()
+ if( NOT __availableToolchains )
+ message( FATAL_ERROR "Could not find any working toolchain in the NDK. Probably your Android NDK is broken." )
+ endif()
+endif()
+
+# build list of available ABIs
+set( ANDROID_SUPPORTED_ABIS "" )
+set( __uniqToolchainArchNames ${__availableToolchainArchs} )
+list( REMOVE_DUPLICATES __uniqToolchainArchNames )
+list( SORT __uniqToolchainArchNames )
+foreach( __arch ${__uniqToolchainArchNames} )
+ list( APPEND ANDROID_SUPPORTED_ABIS ${ANDROID_SUPPORTED_ABIS_${__arch}} )
+endforeach()
+unset( __uniqToolchainArchNames )
+if( NOT ANDROID_SUPPORTED_ABIS )
+ message( FATAL_ERROR "No one of known Android ABIs is supported by this cmake toolchain." )
+endif()
+
+# choose target ABI
+__INIT_VARIABLE( ANDROID_ABI OBSOLETE_ARM_TARGET OBSOLETE_ARM_TARGETS VALUES ${ANDROID_SUPPORTED_ABIS} )
+# verify that target ABI is supported
+list( FIND ANDROID_SUPPORTED_ABIS "${ANDROID_ABI}" __androidAbiIdx )
+if( __androidAbiIdx EQUAL -1 )
+ string( REPLACE ";" "\", \"", PRINTABLE_ANDROID_SUPPORTED_ABIS "${ANDROID_SUPPORTED_ABIS}" )
+ message( FATAL_ERROR "Specified ANDROID_ABI = \"${ANDROID_ABI}\" is not supported by this cmake toolchain or your NDK/toolchain.
+ Supported values are: \"${PRINTABLE_ANDROID_SUPPORTED_ABIS}\"
+ " )
+endif()
+unset( __androidAbiIdx )
+
+# set target ABI options
+if( ANDROID_ABI STREQUAL "x86" )
+ set( X86 true )
+ set( ANDROID_NDK_ABI_NAME "x86" )
+ set( ANDROID_ARCH_NAME "x86" )
+ set( ANDROID_ARCH_FULLNAME "x86" )
+ set( ANDROID_LLVM_TRIPLE "i686-none-linux-android" )
+ set( CMAKE_SYSTEM_PROCESSOR "i686" )
+elseif( ANDROID_ABI STREQUAL "mips" )
+ set( MIPS true )
+ set( ANDROID_NDK_ABI_NAME "mips" )
+ set( ANDROID_ARCH_NAME "mips" )
+ set( ANDROID_ARCH_FULLNAME "mipsel" )
+ set( ANDROID_LLVM_TRIPLE "mipsel-none-linux-android" )
+ set( CMAKE_SYSTEM_PROCESSOR "mips" )
+elseif( ANDROID_ABI STREQUAL "armeabi" )
+ set( ARMEABI true )
+ set( ANDROID_NDK_ABI_NAME "armeabi" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv5te" )
+elseif( ANDROID_ABI STREQUAL "armeabi-v6 with VFP" )
+ set( ARMEABI_V6 true )
+ set( ANDROID_NDK_ABI_NAME "armeabi" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv5te-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv6" )
+ # need always fallback to older platform
+ set( ARMEABI true )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a")
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a with VFPV3" )
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+ set( VFPV3 true )
+elseif( ANDROID_ABI STREQUAL "armeabi-v7a with NEON" )
+ set( ARMEABI_V7A true )
+ set( ANDROID_NDK_ABI_NAME "armeabi-v7a" )
+ set( ANDROID_ARCH_NAME "arm" )
+ set( ANDROID_ARCH_FULLNAME "arm" )
+ set( ANDROID_LLVM_TRIPLE "armv7-none-linux-androideabi" )
+ set( CMAKE_SYSTEM_PROCESSOR "armv7-a" )
+ set( VFPV3 true )
+ set( NEON true )
+else()
+ message( SEND_ERROR "Unknown ANDROID_ABI=\"${ANDROID_ABI}\" is specified." )
+endif()
+
+if( CMAKE_BINARY_DIR AND EXISTS "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" )
+ # really dirty hack
+ # it is not possible to change CMAKE_SYSTEM_PROCESSOR after the first run...
+ file( APPEND "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeSystem.cmake" "SET(CMAKE_SYSTEM_PROCESSOR \"${CMAKE_SYSTEM_PROCESSOR}\")\n" )
+endif()
+
+if( ANDROID_ARCH_NAME STREQUAL "arm" AND NOT ARMEABI_V6 )
+ __INIT_VARIABLE( ANDROID_FORCE_ARM_BUILD OBSOLETE_FORCE_ARM VALUES OFF )
+ set( ANDROID_FORCE_ARM_BUILD ${ANDROID_FORCE_ARM_BUILD} CACHE BOOL "Use 32-bit ARM instructions instead of Thumb-1" FORCE )
+ mark_as_advanced( ANDROID_FORCE_ARM_BUILD )
+else()
+ unset( ANDROID_FORCE_ARM_BUILD CACHE )
+endif()
+
+# choose toolchain
+if( ANDROID_TOOLCHAIN_NAME )
+ list( FIND __availableToolchains "${ANDROID_TOOLCHAIN_NAME}" __toolchainIdx )
+ if( __toolchainIdx EQUAL -1 )
+ list( SORT __availableToolchains )
+ string( REPLACE ";" "\n * " toolchains_list "${__availableToolchains}" )
+ set( toolchains_list " * ${toolchains_list}")
+ message( FATAL_ERROR "Specified toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is missing in your NDK or broken. Please verify that your NDK is working or select another compiler toolchain.
+To configure the toolchain set CMake variable ANDROID_TOOLCHAIN_NAME to one of the following values:\n${toolchains_list}\n" )
+ endif()
+ list( GET __availableToolchainArchs ${__toolchainIdx} __toolchainArch )
+ if( NOT __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
+ message( SEND_ERROR "Selected toolchain \"${ANDROID_TOOLCHAIN_NAME}\" is not able to compile binaries for the \"${ANDROID_ARCH_NAME}\" platform." )
+ endif()
+else()
+ set( __toolchainIdx -1 )
+ set( __applicableToolchains "" )
+ set( __toolchainMaxVersion "0.0.0" )
+ list( LENGTH __availableToolchains __availableToolchainsCount )
+ math( EXPR __availableToolchainsCount "${__availableToolchainsCount}-1" )
+ foreach( __idx RANGE ${__availableToolchainsCount} )
+ list( GET __availableToolchainArchs ${__idx} __toolchainArch )
+ if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME )
+ list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion )
+ string( REPLACE "x" "99" __toolchainVersion "${__toolchainVersion}")
+ if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion )
+ set( __toolchainMaxVersion "${__toolchainVersion}" )
+ set( __toolchainIdx ${__idx} )
+ endif()
+ endif()
+ endforeach()
+ unset( __availableToolchainsCount )
+ unset( __toolchainMaxVersion )
+ unset( __toolchainVersion )
+endif()
+unset( __toolchainArch )
+if( __toolchainIdx EQUAL -1 )
+ message( FATAL_ERROR "No one of available compiler toolchains is able to compile for ${ANDROID_ARCH_NAME} platform." )
+endif()
+list( GET __availableToolchains ${__toolchainIdx} ANDROID_TOOLCHAIN_NAME )
+list( GET __availableToolchainMachines ${__toolchainIdx} ANDROID_TOOLCHAIN_MACHINE_NAME )
+list( GET __availableToolchainCompilerVersions ${__toolchainIdx} ANDROID_COMPILER_VERSION )
+
+unset( __toolchainIdx )
+unset( __availableToolchains )
+unset( __availableToolchainMachines )
+unset( __availableToolchainArchs )
+unset( __availableToolchainCompilerVersions )
+
+# choose native API level
+__INIT_VARIABLE( ANDROID_NATIVE_API_LEVEL ENV_ANDROID_NATIVE_API_LEVEL ANDROID_API_LEVEL ENV_ANDROID_API_LEVEL ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME} ANDROID_DEFAULT_NDK_API_LEVEL )
+string( REGEX MATCH "[0-9]+" ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" )
+# adjust API level
+set( __real_api_level ${ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME}} )
+foreach( __level ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ if( NOT __level GREATER ANDROID_NATIVE_API_LEVEL AND NOT __level LESS __real_api_level )
+ set( __real_api_level ${__level} )
+ endif()
+endforeach()
+if( __real_api_level AND NOT ANDROID_NATIVE_API_LEVEL EQUAL __real_api_level )
+ message( STATUS "Adjusting Android API level 'android-${ANDROID_NATIVE_API_LEVEL}' to 'android-${__real_api_level}'")
+ set( ANDROID_NATIVE_API_LEVEL ${__real_api_level} )
+endif()
+unset(__real_api_level)
+# validate
+list( FIND ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_NATIVE_API_LEVEL}" __levelIdx )
+if( __levelIdx EQUAL -1 )
+ message( SEND_ERROR "Specified Android native API level 'android-${ANDROID_NATIVE_API_LEVEL}' is not supported by your NDK/toolchain." )
+else()
+ if( BUILD_WITH_ANDROID_NDK )
+ __DETECT_NATIVE_API_LEVEL( __realApiLevel "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}/usr/include/android/api-level.h" )
+ if( NOT __realApiLevel EQUAL ANDROID_NATIVE_API_LEVEL )
+ message( SEND_ERROR "Specified Android API level (${ANDROID_NATIVE_API_LEVEL}) does not match to the level found (${__realApiLevel}). Probably your copy of NDK is broken." )
+ endif()
+ unset( __realApiLevel )
+ endif()
+ set( ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" CACHE STRING "Android API level for native code" FORCE )
+ if( CMAKE_VERSION VERSION_GREATER "2.8" )
+ list( SORT ANDROID_SUPPORTED_NATIVE_API_LEVELS )
+ set_property( CACHE ANDROID_NATIVE_API_LEVEL PROPERTY STRINGS ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} )
+ endif()
+endif()
+unset( __levelIdx )
+
+
+# remember target ABI
+set( ANDROID_ABI "${ANDROID_ABI}" CACHE STRING "The target ABI for Android. If arm, then armeabi-v7a is recommended for hardware floating point." FORCE )
+if( CMAKE_VERSION VERSION_GREATER "2.8" )
+ list( SORT ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME} )
+ set_property( CACHE ANDROID_ABI PROPERTY STRINGS ${ANDROID_SUPPORTED_ABIS_${ANDROID_ARCH_FULLNAME}} )
+endif()
+
+
+# runtime choice (STL, rtti, exceptions)
+if( NOT ANDROID_STL )
+ # honor legacy ANDROID_USE_STLPORT
+ if( DEFINED ANDROID_USE_STLPORT )
+ if( ANDROID_USE_STLPORT )
+ set( ANDROID_STL stlport_static )
+ endif()
+ message( WARNING "You are using an obsolete variable ANDROID_USE_STLPORT to select the STL variant. Use -DANDROID_STL=stlport_static instead." )
+ endif()
+ if( NOT ANDROID_STL )
+ set( ANDROID_STL gnustl_static )
+ endif()
+endif()
+set( ANDROID_STL "${ANDROID_STL}" CACHE STRING "C++ runtime" )
+set( ANDROID_STL_FORCE_FEATURES ON CACHE BOOL "automatically configure rtti and exceptions support based on C++ runtime" )
+mark_as_advanced( ANDROID_STL ANDROID_STL_FORCE_FEATURES )
+
+if( BUILD_WITH_ANDROID_NDK )
+ if( NOT "${ANDROID_STL}" MATCHES "^(none|system|system_re|gabi\\+\\+_static|gabi\\+\\+_shared|stlport_static|stlport_shared|gnustl_static|gnustl_shared)$")
+ message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\".
+The possible values are:
+ none -> Do not configure the runtime.
+ system -> Use the default minimal system C++ runtime library.
+ system_re -> Same as system but with rtti and exceptions.
+ gabi++_static -> Use the GAbi++ runtime as a static library.
+ gabi++_shared -> Use the GAbi++ runtime as a shared library.
+ stlport_static -> Use the STLport runtime as a static library.
+ stlport_shared -> Use the STLport runtime as a shared library.
+ gnustl_static -> (default) Use the GNU STL as a static library.
+ gnustl_shared -> Use the GNU STL as a shared library.
+" )
+ endif()
+elseif( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ if( NOT "${ANDROID_STL}" MATCHES "^(none|gnustl_static|gnustl_shared)$")
+ message( FATAL_ERROR "ANDROID_STL is set to invalid value \"${ANDROID_STL}\".
+The possible values are:
+ none -> Do not configure the runtime.
+ gnustl_static -> (default) Use the GNU STL as a static library.
+ gnustl_shared -> Use the GNU STL as a shared library.
+" )
+ endif()
+endif()
+
+unset( ANDROID_RTTI )
+unset( ANDROID_EXCEPTIONS )
+unset( ANDROID_STL_INCLUDE_DIRS )
+unset( __libstl )
+unset( __libsupcxx )
+
+if( NOT _CMAKE_IN_TRY_COMPILE AND ANDROID_NDK_RELEASE STREQUAL "r7b" AND ARMEABI_V7A AND NOT VFPV3 AND ANDROID_STL MATCHES "gnustl" )
+ message( WARNING "The GNU STL armeabi-v7a binaries from NDK r7b can crash non-NEON devices. The files provided with NDK r7b were not configured properly, resulting in crashes on Tegra2-based devices and others when trying to use certain floating-point functions (e.g., cosf, sinf, expf).
+You are strongly recommended to switch to another NDK release.
+" )
+endif()
+
+if( NOT _CMAKE_IN_TRY_COMPILE AND X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" )
+ message( WARNING "The x86 system header file from NDK r6 has incorrect definition for ptrdiff_t. You are recommended to upgrade to a newer NDK release or manually patch the header:
+See https://android.googlesource.com/platform/development.git f907f4f9d4e56ccc8093df6fee54454b8bcab6c2
+ diff --git a/ndk/platforms/android-9/arch-x86/include/machine/_types.h b/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+ index 5e28c64..65892a1 100644
+ --- a/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+ +++ b/ndk/platforms/android-9/arch-x86/include/machine/_types.h
+ @@ -51,7 +51,11 @@ typedef long int ssize_t;
+ #endif
+ #ifndef _PTRDIFF_T
+ #define _PTRDIFF_T
+ -typedef long ptrdiff_t;
+ +# ifdef __ANDROID__
+ + typedef int ptrdiff_t;
+ +# else
+ + typedef long ptrdiff_t;
+ +# endif
+ #endif
+" )
+endif()
+
+
+# setup paths and STL for standalone toolchain
+if( BUILD_WITH_STANDALONE_TOOLCHAIN )
+ set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_STANDALONE_TOOLCHAIN}" )
+ set( ANDROID_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot" )
+
+ if( NOT ANDROID_STL STREQUAL "none" )
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/include/c++/${ANDROID_COMPILER_VERSION}" )
+ if( ARMEABI_V7A AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}/bits" )
+ list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/${CMAKE_SYSTEM_PROCESSOR}" )
+ elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb/bits" )
+ list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/thumb" )
+ else()
+ list( APPEND ANDROID_STL_INCLUDE_DIRS "${ANDROID_STL_INCLUDE_DIRS}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" )
+ endif()
+ # always search static GNU STL to get the location of libsupc++.a
+ if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libstdc++.a" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb" )
+ elseif( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libstdc++.a" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}" )
+ elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libstdc++.a" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb" )
+ elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libstdc++.a" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib" )
+ endif()
+ if( __libstl )
+ set( __libsupcxx "${__libstl}/libsupc++.a" )
+ set( __libstl "${__libstl}/libstdc++.a" )
+ endif()
+ if( NOT EXISTS "${__libsupcxx}" )
+ message( FATAL_ERROR "The required libstdsupc++.a is missing in your standalone toolchain.
+ Usually it happens because of bug in make-standalone-toolchain.sh script from NDK r7, r7b and r7c.
+ You need to either upgrade to newer NDK or manually copy
+ $ANDROID_NDK/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a
+ to
+ ${__libsupcxx}
+ " )
+ endif()
+ if( ANDROID_STL STREQUAL "gnustl_shared" )
+ if( ARMEABI_V7A AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libgnustl_shared.so" )
+ elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD AND EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libgnustl_shared.so" )
+ elseif( EXISTS "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" )
+ set( __libstl "${ANDROID_STANDALONE_TOOLCHAIN}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libgnustl_shared.so" )
+ endif()
+ endif()
+ endif()
+endif()
+
+# clang
+if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" )
+ set( ANDROID_COMPILER_IS_CLANG 1 )
+ execute_process( COMMAND "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/clang${TOOL_OS_SUFFIX}" --version OUTPUT_VARIABLE ANDROID_CLANG_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE )
+ string( REGEX MATCH "[0-9]+[.][0-9]+" ANDROID_CLANG_VERSION "${ANDROID_CLANG_VERSION}")
+elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" )
+ string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}")
+ string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
+ if( NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}/bin/clang${TOOL_OS_SUFFIX}" )
+ message( FATAL_ERROR "Could not find the Clang compiler driver" )
+ endif()
+ set( ANDROID_COMPILER_IS_CLANG 1 )
+ set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+else()
+ set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" )
+ unset( ANDROID_COMPILER_IS_CLANG CACHE )
+endif()
+
+string( REPLACE "." "" _clang_name "clang${ANDROID_CLANG_VERSION}" )
+if( NOT EXISTS "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" )
+ set( _clang_name "clang" )
+endif()
+
+
+# setup paths and STL for NDK
+if( BUILD_WITH_ANDROID_NDK )
+ set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" )
+ set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" )
+
+ if( ANDROID_STL STREQUAL "none" )
+ # do nothing
+ elseif( ANDROID_STL STREQUAL "system" )
+ set( ANDROID_RTTI OFF )
+ set( ANDROID_EXCEPTIONS OFF )
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" )
+ elseif( ANDROID_STL STREQUAL "system_re" )
+ set( ANDROID_RTTI ON )
+ set( ANDROID_EXCEPTIONS ON )
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/system/include" )
+ elseif( ANDROID_STL MATCHES "gabi" )
+ if( ANDROID_NDK_RELEASE STRLESS "r7" )
+ message( FATAL_ERROR "gabi++ is not awailable in your NDK. You have to upgrade to NDK r7 or newer to use gabi++.")
+ endif()
+ set( ANDROID_RTTI ON )
+ set( ANDROID_EXCEPTIONS OFF )
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/gabi++/include" )
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gabi++/libs/${ANDROID_NDK_ABI_NAME}/libgabi++_static.a" )
+ elseif( ANDROID_STL MATCHES "stlport" )
+ if( NOT ANDROID_NDK_RELEASE STRLESS "r8d" )
+ set( ANDROID_EXCEPTIONS ON )
+ else()
+ set( ANDROID_EXCEPTIONS OFF )
+ endif()
+ if( ANDROID_NDK_RELEASE STRLESS "r7" )
+ set( ANDROID_RTTI OFF )
+ else()
+ set( ANDROID_RTTI ON )
+ endif()
+ set( ANDROID_STL_INCLUDE_DIRS "${ANDROID_NDK}/sources/cxx-stl/stlport/stlport" )
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/stlport/libs/${ANDROID_NDK_ABI_NAME}/libstlport_static.a" )
+ elseif( ANDROID_STL MATCHES "gnustl" )
+ set( ANDROID_EXCEPTIONS ON )
+ set( ANDROID_RTTI ON )
+ if( EXISTS "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" )
+ if( ARMEABI_V7A AND ANDROID_COMPILER_VERSION VERSION_EQUAL "4.7" AND ANDROID_NDK_RELEASE STREQUAL "r8d" )
+ # gnustl binary for 4.7 compiler is buggy :(
+ # TODO: look for right fix
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/4.6" )
+ else()
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}" )
+ endif()
+ else()
+ set( __libstl "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++" )
+ endif()
+ set( ANDROID_STL_INCLUDE_DIRS "${__libstl}/include" "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/include" )
+ if( EXISTS "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" )
+ set( __libstl "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libgnustl_static.a" )
+ else()
+ set( __libstl "${__libstl}/libs/${ANDROID_NDK_ABI_NAME}/libstdc++.a" )
+ endif()
+ else()
+ message( FATAL_ERROR "Unknown runtime: ${ANDROID_STL}" )
+ endif()
+ # find libsupc++.a - rtti & exceptions
+ if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" )
+ set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r8b or newer
+ if( NOT EXISTS "${__libsupcxx}" )
+ set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r7-r8
+ endif()
+ if( NOT EXISTS "${__libsupcxx}" ) # before r7
+ if( ARMEABI_V7A )
+ if( ANDROID_FORCE_ARM_BUILD )
+ set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" )
+ else()
+ set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/thumb/libsupc++.a" )
+ endif()
+ elseif( ARMEABI AND NOT ANDROID_FORCE_ARM_BUILD )
+ set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/thumb/libsupc++.a" )
+ else()
+ set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/libsupc++.a" )
+ endif()
+ endif()
+ if( NOT EXISTS "${__libsupcxx}")
+ message( ERROR "Could not find libsupc++.a for a chosen platform. Either your NDK is not supported or is broken.")
+ endif()
+ endif()
+endif()
+
+
+# case of shared STL linkage
+if( ANDROID_STL MATCHES "shared" AND DEFINED __libstl )
+ string( REPLACE "_static.a" "_shared.so" __libstl "${__libstl}" )
+ if( NOT _CMAKE_IN_TRY_COMPILE AND __libstl MATCHES "[.]so$" )
+ get_filename_component( __libstlname "${__libstl}" NAME )
+ execute_process( COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__libstl}" "${LIBRARY_OUTPUT_PATH}/${__libstlname}" RESULT_VARIABLE __fileCopyProcess )
+ if( NOT __fileCopyProcess EQUAL 0 OR NOT EXISTS "${LIBRARY_OUTPUT_PATH}/${__libstlname}")
+ message( SEND_ERROR "Failed copying of ${__libstl} to the ${LIBRARY_OUTPUT_PATH}/${__libstlname}" )
+ endif()
+ unset( __fileCopyProcess )
+ unset( __libstlname )
+ endif()
+endif()
+
+
+# ccache support
+__INIT_VARIABLE( _ndk_ccache NDK_CCACHE ENV_NDK_CCACHE )
+if( _ndk_ccache )
+ if( DEFINED NDK_CCACHE AND NOT EXISTS NDK_CCACHE )
+ unset( NDK_CCACHE CACHE )
+ endif()
+ find_program( NDK_CCACHE "${_ndk_ccache}" DOC "The path to ccache binary")
+else()
+ unset( NDK_CCACHE CACHE )
+endif()
+unset( _ndk_ccache )
+
+
+# setup the cross-compiler
+if( NOT CMAKE_C_COMPILER )
+ if( NDK_CCACHE AND NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
+ set( CMAKE_C_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" )
+ set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" )
+ if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_C_COMPILER_ARG1 "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" CACHE PATH "C compiler")
+ set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+ else()
+ set( CMAKE_C_COMPILER_ARG1 "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "C compiler")
+ set( CMAKE_CXX_COMPILER_ARG1 "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+ endif()
+ else()
+ if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_C_COMPILER "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}${TOOL_OS_SUFFIX}" CACHE PATH "C compiler")
+ set( CMAKE_CXX_COMPILER "${ANDROID_CLANG_TOOLCHAIN_ROOT}/bin/${_clang_name}++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler")
+ else()
+ set( CMAKE_C_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "C compiler" )
+ set( CMAKE_CXX_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-g++${TOOL_OS_SUFFIX}" CACHE PATH "C++ compiler" )
+ endif()
+ endif()
+ set( CMAKE_ASM_COMPILER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-gcc${TOOL_OS_SUFFIX}" CACHE PATH "assembler" )
+ set( CMAKE_STRIP "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-strip${TOOL_OS_SUFFIX}" CACHE PATH "strip" )
+ set( CMAKE_AR "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ar${TOOL_OS_SUFFIX}" CACHE PATH "archive" )
+ set( CMAKE_LINKER "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ld${TOOL_OS_SUFFIX}" CACHE PATH "linker" )
+ set( CMAKE_NM "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-nm${TOOL_OS_SUFFIX}" CACHE PATH "nm" )
+ set( CMAKE_OBJCOPY "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objcopy${TOOL_OS_SUFFIX}" CACHE PATH "objcopy" )
+ set( CMAKE_OBJDUMP "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-objdump${TOOL_OS_SUFFIX}" CACHE PATH "objdump" )
+ set( CMAKE_RANLIB "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_MACHINE_NAME}-ranlib${TOOL_OS_SUFFIX}" CACHE PATH "ranlib" )
+endif()
+
+set( _CMAKE_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_MACHINE_NAME}-" )
+if( CMAKE_VERSION VERSION_LESS 2.8.5 )
+ set( CMAKE_ASM_COMPILER_ARG1 "-c" )
+endif()
+if( APPLE )
+ find_program( CMAKE_INSTALL_NAME_TOOL NAMES install_name_tool )
+ if( NOT CMAKE_INSTALL_NAME_TOOL )
+ message( FATAL_ERROR "Could not find install_name_tool, please check your installation." )
+ endif()
+ mark_as_advanced( CMAKE_INSTALL_NAME_TOOL )
+endif()
+
+# Force set compilers because standard identification works badly for us
+include( CMakeForceCompiler )
+CMAKE_FORCE_C_COMPILER( "${CMAKE_C_COMPILER}" GNU )
+if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_C_COMPILER_ID Clang)
+endif()
+set( CMAKE_C_PLATFORM_ID Linux )
+set( CMAKE_C_SIZEOF_DATA_PTR 4 )
+set( CMAKE_C_HAS_ISYSROOT 1 )
+set( CMAKE_C_COMPILER_ABI ELF )
+CMAKE_FORCE_CXX_COMPILER( "${CMAKE_CXX_COMPILER}" GNU )
+if( ANDROID_COMPILER_IS_CLANG )
+ set( CMAKE_CXX_COMPILER_ID Clang)
+endif()
+set( CMAKE_CXX_PLATFORM_ID Linux )
+set( CMAKE_CXX_SIZEOF_DATA_PTR 4 )
+set( CMAKE_CXX_HAS_ISYSROOT 1 )
+set( CMAKE_CXX_COMPILER_ABI ELF )
+set( CMAKE_CXX_SOURCE_FILE_EXTENSIONS cc cp cxx cpp CPP c++ C )
+# force ASM compiler (required for CMake < 2.8.5)
+set( CMAKE_ASM_COMPILER_ID_RUN TRUE )
+set( CMAKE_ASM_COMPILER_ID GNU )
+set( CMAKE_ASM_COMPILER_WORKS TRUE )
+set( CMAKE_ASM_COMPILER_FORCED TRUE )
+set( CMAKE_COMPILER_IS_GNUASM 1)
+set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm )
+
+# flags and definitions
+remove_definitions( -DANDROID )
+add_definitions( -DANDROID )
+
+if( ANDROID_SYSROOT MATCHES "[ ;\"]" )
+ if( CMAKE_HOST_WIN32 )
+ # try to convert path to 8.3 form
+ file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "@echo %~s1" )
+ execute_process( COMMAND "$ENV{ComSpec}" /c "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "${ANDROID_SYSROOT}"
+ OUTPUT_VARIABLE __path OUTPUT_STRIP_TRAILING_WHITESPACE
+ RESULT_VARIABLE __result ERROR_QUIET )
+ if( __result EQUAL 0 )
+ file( TO_CMAKE_PATH "${__path}" ANDROID_SYSROOT )
+ set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
+ else()
+ set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" )
+ endif()
+ else()
+ set( ANDROID_CXX_FLAGS "'--sysroot=${ANDROID_SYSROOT}'" )
+ endif()
+ if( NOT _CMAKE_IN_TRY_COMPILE )
+ # quotes can break try_compile and compiler identification
+ message(WARNING "Path to your Android NDK (or toolchain) has non-alphanumeric symbols.\nThe build might be broken.\n")
+ endif()
+else()
+ set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" )
+endif()
+
+# NDK flags
+if( ARMEABI OR ARMEABI_V7A )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -funwind-tables" )
+ if( NOT ANDROID_FORCE_ARM_BUILD AND NOT ARMEABI_V6 )
+ set( ANDROID_CXX_FLAGS_RELEASE "-mthumb -fomit-frame-pointer -fno-strict-aliasing" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-marm -fno-omit-frame-pointer -fno-strict-aliasing" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -finline-limit=64" )
+ endif()
+ else()
+ # always compile ARMEABI_V6 in arm mode; otherwise there is no difference from ARMEABI
+ set( ANDROID_CXX_FLAGS_RELEASE "-marm -fomit-frame-pointer -fstrict-aliasing" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-marm -fno-omit-frame-pointer -fno-strict-aliasing" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" )
+ endif()
+ endif()
+elseif( X86 )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funwind-tables" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" )
+ else()
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fPIC" )
+ endif()
+ set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer -fstrict-aliasing" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-fno-omit-frame-pointer -fno-strict-aliasing" )
+elseif( MIPS )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -fno-strict-aliasing -finline-functions -ffunction-sections -funwind-tables -fmessage-length=0" )
+ set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-fno-omit-frame-pointer" )
+ if( NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fno-inline-functions-called-once -fgcse-after-reload -frerun-cse-after-loop -frename-registers" )
+ set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} -funswitch-loops -finline-limit=300" )
+ endif()
+elseif()
+ set( ANDROID_CXX_FLAGS_RELEASE "" )
+ set( ANDROID_CXX_FLAGS_DEBUG "" )
+endif()
+
+set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fsigned-char" ) # good/necessary when porting desktop libraries
+
+if( NOT X86 AND NOT ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "-Wno-psabi ${ANDROID_CXX_FLAGS}" )
+endif()
+
+if( NOT ANDROID_COMPILER_VERSION VERSION_LESS "4.6" )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -no-canonical-prefixes" ) # see https://android-review.googlesource.com/#/c/47564/
+endif()
+
+# ABI-specific flags
+if( ARMEABI_V7A )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv7-a -mfloat-abi=softfp" )
+ if( NEON )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=neon" )
+ elseif( VFPV3 )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3" )
+ else()
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -mfpu=vfpv3-d16" )
+ endif()
+elseif( ARMEABI_V6 )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv6 -mfloat-abi=softfp -mfpu=vfp" ) # vfp == vfpv2
+elseif( ARMEABI )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" )
+endif()
+
+if( ANDROID_STL MATCHES "gnustl" AND (EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}") )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+else()
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+endif()
+
+# STL
+if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" )
+ if( EXISTS "${__libstl}" )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libstl}\"" )
+ endif()
+ if( EXISTS "${__libsupcxx}" )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
+ # C objects:
+ set( CMAKE_C_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_C_CREATE_SHARED_MODULE "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" )
+ set( CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>" )
+ set( CMAKE_C_CREATE_SHARED_LIBRARY "${CMAKE_C_CREATE_SHARED_LIBRARY} \"${__libsupcxx}\"" )
+ set( CMAKE_C_CREATE_SHARED_MODULE "${CMAKE_C_CREATE_SHARED_MODULE} \"${__libsupcxx}\"" )
+ set( CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" )
+ endif()
+ if( ANDROID_STL MATCHES "gnustl" )
+ if( NOT EXISTS "${ANDROID_LIBM_PATH}" )
+ set( ANDROID_LIBM_PATH -lm )
+ endif()
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} ${ANDROID_LIBM_PATH}" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} ${ANDROID_LIBM_PATH}" )
+ set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} ${ANDROID_LIBM_PATH}" )
+ endif()
+endif()
+
+# variables controlling optional build flags
+if (ANDROID_NDK_RELEASE STRLESS "r7")
+ # libGLESv2.so in NDK's prior to r7 refers to missing external symbols.
+ # So this flag option is required for all projects using OpenGL from native.
+ __INIT_VARIABLE( ANDROID_SO_UNDEFINED VALUES ON )
+else()
+ __INIT_VARIABLE( ANDROID_SO_UNDEFINED VALUES OFF )
+endif()
+__INIT_VARIABLE( ANDROID_NO_UNDEFINED OBSOLETE_NO_UNDEFINED VALUES ON )
+__INIT_VARIABLE( ANDROID_FUNCTION_LEVEL_LINKING VALUES ON )
+__INIT_VARIABLE( ANDROID_GOLD_LINKER VALUES ON )
+__INIT_VARIABLE( ANDROID_NOEXECSTACK VALUES ON )
+__INIT_VARIABLE( ANDROID_RELRO VALUES ON )
+
+set( ANDROID_NO_UNDEFINED ${ANDROID_NO_UNDEFINED} CACHE BOOL "Show all undefined symbols as linker errors" )
+set( ANDROID_SO_UNDEFINED ${ANDROID_SO_UNDEFINED} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_FUNCTION_LEVEL_LINKING ${ANDROID_FUNCTION_LEVEL_LINKING} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_GOLD_LINKER ${ANDROID_GOLD_LINKER} CACHE BOOL "Enables gold linker (only avaialble for NDK r8b for ARM and x86 architectures on linux-86 and darwin-x86 hosts)" )
+set( ANDROID_NOEXECSTACK ${ANDROID_NOEXECSTACK} CACHE BOOL "Allows or disallows undefined symbols in shared libraries" )
+set( ANDROID_RELRO ${ANDROID_RELRO} CACHE BOOL "Enables RELRO - a memory corruption mitigation technique" )
+mark_as_advanced( ANDROID_NO_UNDEFINED ANDROID_SO_UNDEFINED ANDROID_FUNCTION_LEVEL_LINKING ANDROID_GOLD_LINKER ANDROID_NOEXECSTACK ANDROID_RELRO )
+
+# linker flags
+set( ANDROID_LINKER_FLAGS "" )
+
+if( ARMEABI_V7A )
+ # this is *required* to use the following linker flags that routes around
+ # a CPU bug in some Cortex-A8 implementations:
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--fix-cortex-a8" )
+endif()
+
+if( ANDROID_NO_UNDEFINED )
+ if( MIPS )
+ # there is some sysroot-related problem in mips linker...
+ if( NOT ANDROID_SYSROOT MATCHES "[ ;\"]" )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined -Wl,-rpath-link,${ANDROID_SYSROOT}/usr/lib" )
+ endif()
+ else()
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" )
+ endif()
+endif()
+
+if( ANDROID_SO_UNDEFINED )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-allow-shlib-undefined" )
+endif()
+
+if( ANDROID_FUNCTION_LEVEL_LINKING )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fdata-sections -ffunction-sections" )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--gc-sections" )
+endif()
+
+if( ANDROID_COMPILER_VERSION VERSION_EQUAL "4.6" )
+ if( ANDROID_GOLD_LINKER AND (CMAKE_HOST_UNIX OR ANDROID_NDK_RELEASE STRGREATER "r8b") AND (ARMEABI OR ARMEABI_V7A OR X86) )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=gold" )
+ elseif( ANDROID_NDK_RELEASE STRGREATER "r8b")
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -fuse-ld=bfd" )
+ elseif( ANDROID_NDK_RELEASE STREQUAL "r8b" AND ARMEABI AND NOT _CMAKE_IN_TRY_COMPILE )
+ message( WARNING "The default bfd linker from arm GCC 4.6 toolchain can fail with 'unresolvable R_ARM_THM_CALL relocation' error message. See https://code.google.com/p/android/issues/detail?id=35342
+ On Linux and OS X host platform you can workaround this problem using gold linker (default).
+ Rerun cmake with -DANDROID_GOLD_LINKER=ON option in case of problems.
+" )
+ endif()
+endif() # version 4.6
+
+if( ANDROID_NOEXECSTACK )
+ if( ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -Xclang -mnoexecstack" )
+ else()
+ set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -Wa,--noexecstack" )
+ endif()
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,noexecstack" )
+endif()
+
+if( ANDROID_RELRO )
+ set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,-z,relro -Wl,-z,now" )
+endif()
+
+if( ANDROID_COMPILER_IS_CLANG )
+ set( ANDROID_CXX_FLAGS "-Qunused-arguments ${ANDROID_CXX_FLAGS}" )
+ if( ARMEABI_V7A AND NOT ANDROID_FORCE_ARM_BUILD )
+ set( ANDROID_CXX_FLAGS_RELEASE "-target thumbv7-none-linux-androideabi ${ANDROID_CXX_FLAGS_RELEASE}" )
+ set( ANDROID_CXX_FLAGS_DEBUG "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS_DEBUG}" )
+ else()
+ set( ANDROID_CXX_FLAGS "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS}" )
+ endif()
+ if( BUILD_WITH_ANDROID_NDK )
+ set( ANDROID_CXX_FLAGS "-gcc-toolchain ${ANDROID_TOOLCHAIN_ROOT} ${ANDROID_CXX_FLAGS}" )
+ endif()
+endif()
+
+# cache flags
+set( CMAKE_CXX_FLAGS "" CACHE STRING "c++ flags" )
+set( CMAKE_C_FLAGS "" CACHE STRING "c flags" )
+set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "c++ Release flags" )
+set( CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "c Release flags" )
+set( CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c++ Debug flags" )
+set( CMAKE_C_FLAGS_DEBUG "-O0 -g -DDEBUG -D_DEBUG" CACHE STRING "c Debug flags" )
+set( CMAKE_SHARED_LINKER_FLAGS "" CACHE STRING "shared linker flags" )
+set( CMAKE_MODULE_LINKER_FLAGS "" CACHE STRING "module linker flags" )
+set( CMAKE_EXE_LINKER_FLAGS "-Wl,-z,nocopyreloc" CACHE STRING "executable linker flags" )
+
+# put flags to cache (for debug purpose only)
+set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS}" CACHE INTERNAL "Android specific c/c++ flags" )
+set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE}" CACHE INTERNAL "Android specific c/c++ Release flags" )
+set( ANDROID_CXX_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG}" CACHE INTERNAL "Android specific c/c++ Debug flags" )
+set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}" CACHE INTERNAL "Android specific c/c++ linker flags" )
+
+# finish flags
+set( CMAKE_CXX_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_CXX_FLAGS}" )
+set( CMAKE_C_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_C_FLAGS}" )
+set( CMAKE_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_RELEASE}" )
+set( CMAKE_C_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} ${CMAKE_C_FLAGS_RELEASE}" )
+set( CMAKE_CXX_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_CXX_FLAGS_DEBUG}" )
+set( CMAKE_C_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG} ${CMAKE_C_FLAGS_DEBUG}" )
+set( CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}" )
+set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}" )
+set( CMAKE_EXE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" )
+
+if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" )
+ set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" )
+ set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" )
+ set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" )
+endif()
+
+# configure rtti
+if( DEFINED ANDROID_RTTI AND ANDROID_STL_FORCE_FEATURES )
+ if( ANDROID_RTTI )
+ set( CMAKE_CXX_FLAGS "-frtti ${CMAKE_CXX_FLAGS}" )
+ else()
+ set( CMAKE_CXX_FLAGS "-fno-rtti ${CMAKE_CXX_FLAGS}" )
+ endif()
+endif()
+
+# configure exceptios
+if( DEFINED ANDROID_EXCEPTIONS AND ANDROID_STL_FORCE_FEATURES )
+ if( ANDROID_EXCEPTIONS )
+ set( CMAKE_CXX_FLAGS "-fexceptions ${CMAKE_CXX_FLAGS}" )
+ set( CMAKE_C_FLAGS "-fexceptions ${CMAKE_C_FLAGS}" )
+ else()
+ set( CMAKE_CXX_FLAGS "-fno-exceptions ${CMAKE_CXX_FLAGS}" )
+ set( CMAKE_C_FLAGS "-fno-exceptions ${CMAKE_C_FLAGS}" )
+ endif()
+endif()
+
+# global includes and link directories
+include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} )
+link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" )
+
+# detect if need link crtbegin_so.o explicitly
+if( NOT DEFINED ANDROID_EXPLICIT_CRT_LINK )
+ set( __cmd "${CMAKE_CXX_CREATE_SHARED_LIBRARY}" )
+ string( REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_C_COMPILER>" "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_CXX_FLAGS>" "${CMAKE_CXX_FLAGS}" __cmd "${__cmd}" )
+ string( REPLACE "<LANGUAGE_COMPILE_FLAGS>" "" __cmd "${__cmd}" )
+ string( REPLACE "<LINK_FLAGS>" "${CMAKE_SHARED_LINKER_FLAGS}" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS>" "-shared" __cmd "${__cmd}" )
+ string( REPLACE "<CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG>" "" __cmd "${__cmd}" )
+ string( REPLACE "<TARGET_SONAME>" "" __cmd "${__cmd}" )
+ string( REPLACE "<TARGET>" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain_crtlink_test.so" __cmd "${__cmd}" )
+ string( REPLACE "<OBJECTS>" "\"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" __cmd "${__cmd}" )
+ string( REPLACE "<LINK_LIBRARIES>" "" __cmd "${__cmd}" )
+ separate_arguments( __cmd )
+ foreach( __var ANDROID_NDK ANDROID_NDK_TOOLCHAINS_PATH ANDROID_STANDALONE_TOOLCHAIN )
+ if( ${__var} )
+ set( __tmp "${${__var}}" )
+ separate_arguments( __tmp )
+ string( REPLACE "${__tmp}" "${${__var}}" __cmd "${__cmd}")
+ endif()
+ endforeach()
+ string( REPLACE "'" "" __cmd "${__cmd}" )
+ string( REPLACE "\"" "" __cmd "${__cmd}" )
+ execute_process( COMMAND ${__cmd} RESULT_VARIABLE __cmd_result OUTPUT_QUIET ERROR_QUIET )
+ if( __cmd_result EQUAL 0 )
+ set( ANDROID_EXPLICIT_CRT_LINK ON )
+ else()
+ set( ANDROID_EXPLICIT_CRT_LINK OFF )
+ endif()
+endif()
+
+if( ANDROID_EXPLICIT_CRT_LINK )
+ set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+ set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" )
+endif()
+
+# setup output directories
+set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" )
+set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" )
+
+if(NOT _CMAKE_IN_TRY_COMPILE)
+ if( EXISTS "${CMAKE_SOURCE_DIR}/jni/CMakeLists.txt" )
+ set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin/${ANDROID_NDK_ABI_NAME}" CACHE PATH "Output directory for applications" )
+ else()
+ set( EXECUTABLE_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/bin" CACHE PATH "Output directory for applications" )
+ endif()
+ set( LIBRARY_OUTPUT_PATH "${LIBRARY_OUTPUT_PATH_ROOT}/libs/${ANDROID_NDK_ABI_NAME}" CACHE PATH "path for android libs" )
+endif()
+
+# set these global flags for cmake client scripts to change behavior
+set( ANDROID True )
+set( BUILD_ANDROID True )
+
+# where is the target environment
+set( CMAKE_FIND_ROOT_PATH "${ANDROID_TOOLCHAIN_ROOT}/bin" "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}" "${ANDROID_SYSROOT}" "${CMAKE_INSTALL_PREFIX}" "${CMAKE_INSTALL_PREFIX}/share" )
+
+# only search for libraries and includes in the ndk toolchain
+set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+
+
+# macro to find packages on the host OS
+macro( find_host_package )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER )
+ if( CMAKE_HOST_WIN32 )
+ SET( WIN32 1 )
+ SET( UNIX )
+ elseif( CMAKE_HOST_APPLE )
+ SET( APPLE 1 )
+ SET( UNIX )
+ endif()
+ find_package( ${ARGN} )
+ SET( WIN32 )
+ SET( APPLE )
+ SET( UNIX 1 )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+endmacro()
+
+
+# macro to find programs on the host OS
+macro( find_host_program )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER )
+ if( CMAKE_HOST_WIN32 )
+ SET( WIN32 1 )
+ SET( UNIX )
+ elseif( CMAKE_HOST_APPLE )
+ SET( APPLE 1 )
+ SET( UNIX )
+ endif()
+ find_program( ${ARGN} )
+ SET( WIN32 )
+ SET( APPLE )
+ SET( UNIX 1 )
+ set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY )
+ set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY )
+endmacro()
+
+
+macro( ANDROID_GET_ABI_RAWNAME TOOLCHAIN_FLAG VAR )
+ if( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI" )
+ set( ${VAR} "armeabi" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "ARMEABI_V7A" )
+ set( ${VAR} "armeabi-v7a" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "X86" )
+ set( ${VAR} "x86" )
+ elseif( "${TOOLCHAIN_FLAG}" STREQUAL "MIPS" )
+ set( ${VAR} "mips" )
+ else()
+ set( ${VAR} "unknown" )
+ endif()
+endmacro()
+
+
+# export toolchain settings for the try_compile() command
+if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" )
+ set( __toolchain_config "")
+ foreach( __var NDK_CCACHE LIBRARY_OUTPUT_PATH_ROOT ANDROID_FORBID_SYGWIN ANDROID_SET_OBSOLETE_VARIABLES
+ ANDROID_NDK_HOST_X64
+ ANDROID_NDK
+ ANDROID_NDK_LAYOUT
+ ANDROID_STANDALONE_TOOLCHAIN
+ ANDROID_TOOLCHAIN_NAME
+ ANDROID_ABI
+ ANDROID_NATIVE_API_LEVEL
+ ANDROID_STL
+ ANDROID_STL_FORCE_FEATURES
+ ANDROID_FORCE_ARM_BUILD
+ ANDROID_NO_UNDEFINED
+ ANDROID_SO_UNDEFINED
+ ANDROID_FUNCTION_LEVEL_LINKING
+ ANDROID_GOLD_LINKER
+ ANDROID_NOEXECSTACK
+ ANDROID_RELRO
+ ANDROID_LIBM_PATH
+ ANDROID_EXPLICIT_CRT_LINK
+ )
+ if( DEFINED ${__var} )
+ if( "${__var}" MATCHES " ")
+ set( __toolchain_config "${__toolchain_config}set( ${__var} \"${${__var}}\" CACHE INTERNAL \"\" )\n" )
+ else()
+ set( __toolchain_config "${__toolchain_config}set( ${__var} ${${__var}} CACHE INTERNAL \"\" )\n" )
+ endif()
+ endif()
+ endforeach()
+ file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/android.toolchain.config.cmake" "${__toolchain_config}" )
+ unset( __toolchain_config )
+endif()
+
+
+# set some obsolete variables for backward compatibility
+set( ANDROID_SET_OBSOLETE_VARIABLES ON CACHE BOOL "Define obsolete Andrid-specific cmake variables" )
+mark_as_advanced( ANDROID_SET_OBSOLETE_VARIABLES )
+if( ANDROID_SET_OBSOLETE_VARIABLES )
+ set( ANDROID_API_LEVEL ${ANDROID_NATIVE_API_LEVEL} )
+ set( ARM_TARGET "${ANDROID_ABI}" )
+ set( ARMEABI_NDK_NAME "${ANDROID_NDK_ABI_NAME}" )
+endif()
+
+
+# Variables controlling behavior or set by cmake toolchain:
+# ANDROID_ABI : "armeabi-v7a" (default), "armeabi", "armeabi-v7a with NEON", "armeabi-v7a with VFPV3", "armeabi-v6 with VFP", "x86", "mips"
+# ANDROID_NATIVE_API_LEVEL : 3,4,5,8,9,14 (depends on NDK version)
+# ANDROID_STL : gnustl_static/gnustl_shared/stlport_static/stlport_shared/gabi++_static/gabi++_shared/system_re/system/none
+# ANDROID_FORBID_SYGWIN : ON/OFF
+# ANDROID_NO_UNDEFINED : ON/OFF
+# ANDROID_SO_UNDEFINED : OFF/ON (default depends on NDK version)
+# ANDROID_FUNCTION_LEVEL_LINKING : ON/OFF
+# ANDROID_GOLD_LINKER : ON/OFF
+# ANDROID_NOEXECSTACK : ON/OFF
+# ANDROID_RELRO : ON/OFF
+# ANDROID_FORCE_ARM_BUILD : ON/OFF
+# ANDROID_STL_FORCE_FEATURES : ON/OFF
+# ANDROID_SET_OBSOLETE_VARIABLES : ON/OFF
+# Can be set only at the first run:
+# ANDROID_NDK
+# ANDROID_STANDALONE_TOOLCHAIN
+# ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain
+# ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems)
+# ANDROID_NDK_LAYOUT : the inner NDK structure (RELEASE, LINARO, ANDROID)
+# LIBRARY_OUTPUT_PATH_ROOT : <any valid path>
+# NDK_CCACHE : <path to your ccache executable>
+# Obsolete:
+# ANDROID_API_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL
+# ARM_TARGET : superseded by ANDROID_ABI
+# ARM_TARGETS : superseded by ANDROID_ABI (can be set only)
+# ANDROID_NDK_TOOLCHAIN_ROOT : superseded by ANDROID_STANDALONE_TOOLCHAIN (can be set only)
+# ANDROID_USE_STLPORT : superseded by ANDROID_STL=stlport_static
+# ANDROID_LEVEL : superseded by ANDROID_NATIVE_API_LEVEL (completely removed)
+#
+# Primary read-only variables:
+# ANDROID : always TRUE
+# ARMEABI : TRUE for arm v6 and older devices
+# ARMEABI_V6 : TRUE for arm v6
+# ARMEABI_V7A : TRUE for arm v7a
+# NEON : TRUE if NEON unit is enabled
+# VFPV3 : TRUE if VFP version 3 is enabled
+# X86 : TRUE if configured for x86
+# MIPS : TRUE if configured for mips
+# BUILD_ANDROID : always TRUE
+# BUILD_WITH_ANDROID_NDK : TRUE if NDK is used
+# BUILD_WITH_STANDALONE_TOOLCHAIN : TRUE if standalone toolchain is used
+# ANDROID_NDK_HOST_SYSTEM_NAME : "windows", "linux-x86" or "darwin-x86" depending on host platform
+# ANDROID_NDK_ABI_NAME : "armeabi", "armeabi-v7a", "x86" or "mips" depending on ANDROID_ABI
+# ANDROID_NDK_RELEASE : one of r5, r5b, r5c, r6, r6b, r7, r7b, r7c, r8, r8b, r8c, r8d, r8e; set only for NDK
+# ANDROID_ARCH_NAME : "arm" or "x86" or "mips" depending on ANDROID_ABI
+# ANDROID_SYSROOT : path to the compiler sysroot
+# TOOL_OS_SUFFIX : "" or ".exe" depending on host platform
+# ANDROID_COMPILER_IS_CLANG : TRUE if clang compiler is used
+# Obsolete:
+# ARMEABI_NDK_NAME : superseded by ANDROID_NDK_ABI_NAME
+#
+# Secondary (less stable) read-only variables:
+# ANDROID_COMPILER_VERSION : GCC version used
+# ANDROID_CXX_FLAGS : C/C++ compiler flags required by Android platform
+# ANDROID_SUPPORTED_ABIS : list of currently allowed values for ANDROID_ABI
+# ANDROID_TOOLCHAIN_MACHINE_NAME : "arm-linux-androideabi", "arm-eabi" or "i686-android-linux"
+# ANDROID_TOOLCHAIN_ROOT : path to the top level of toolchain (standalone or placed inside NDK)
+# ANDROID_CLANG_TOOLCHAIN_ROOT : path to clang tools
+# ANDROID_SUPPORTED_NATIVE_API_LEVELS : list of native API levels found inside NDK
+# ANDROID_STL_INCLUDE_DIRS : stl include paths
+# ANDROID_RTTI : if rtti is enabled by the runtime
+# ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime
+# ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used
+# ANDROID_CLANG_VERSION : version of clang compiler if clang is used
+# ANDROID_LIBM_PATH : path to libm.so (set to something like $(TOP)/out/target/product/<product_name>/obj/lib/libm.so) to workaround unresolved `sincos`
+#
+# Defaults:
+# ANDROID_DEFAULT_NDK_API_LEVEL
+# ANDROID_DEFAULT_NDK_API_LEVEL_${ARCH}
+# ANDROID_NDK_SEARCH_PATHS
+# ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH
+# ANDROID_SUPPORTED_ABIS_${ARCH}
+# ANDROID_SUPPORTED_NDK_VERSIONS
message(WARNING "Can not automatically determine the value for ANDROID_PLATFORM_VERSION_CODE")
endif()
-configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/platforms/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY)
link_directories("${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib" "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib" "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}")
add_executable(opencv_test_engine ${engine_test_files} jni/Tests/gtest/gtest-all.cpp)
target_link_libraries(opencv_test_engine z binder log utils android_runtime ${engine} ${engine}_jni)
-
EXPECT_EQ(info1, info2);
}
#endif
-
// string path = pm.GetPackagePathByVersion("240", PLATFORM_TEGRA2, 0);
// EXPECT_STREQ("/data/data/org.opencv.lib_v24_tegra2/lib", path.c_str());
// }
-
-
os.system("adb %s shell mkdir -p \"%s\"" % (DEVICE_STR, DEVICE_LOG_PATH))
RunTestApp("OpenCVEngineTestApp")
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_hardfp
-cd build_hardfp
-
-cmake -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../..
-
+++ /dev/null
-#!/bin/sh
-cd `dirname $0`/..
-
-mkdir -p build_softfp
-cd build_softfp
-
-cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../..
-
architecture = 'armeabi'
-excludedHeaders = set(['hdf5.h', 'cap_ios.h',
- 'eigen.hpp', 'cxeigen.hpp' #TOREMOVE
- ])
+excludedHeaders = set(['hdf5.h', 'cap_ios.h', 'eigen.hpp', 'cxeigen.hpp']) #TOREMOVE
systemIncludes = ['sources/cxx-stl/gnu-libstdc++/4.6/include', \
'/opt/android-ndk-r8c/platforms/android-8/arch-arm', # TODO: check if this one could be passed as command line arg
'sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include']
if f == m:
moduleHeaders += GetHeaderFiles(os.path.join(cppHeadersFolder, f))
if m == 'flann':
- flann = os.path.join(cppHeadersFolder, f, 'flann.hpp')
+ flann = os.path.join(cppHeadersFolder, f, 'flann.hpp')
moduleHeaders.remove(flann)
moduleHeaders.insert(0, flann)
cppHeaders += moduleHeaders
os.chdir(BuildDir)
BuildLog = os.path.join(BuildDir, "build.log")
- CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../../ > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog)
+ CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../.. > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog)
MakeCmdLine = "make %s >> \"%s\" 2>&1" % (MakeTarget, BuildLog);
#print(CmakeCmdLine)
os.system(CmakeCmdLine)
#!/bin/sh
cd `dirname $0`/..
-mkdir -p build
-cd build
-
-cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
+mkdir -p build_android_arm
+cd build_android_arm
+cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
--- /dev/null
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_android_mips
+cd build_android_mips
+
+cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
--- /dev/null
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_android_service
+cd build_android_service
+
+cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../..
--- /dev/null
+#!/bin/sh
+
+cd `dirname $0`/..
+
+mkdir -p build_android_x86
+cd build_android_x86
+
+cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../..
--- /dev/null
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_linux_arm_hardfp
+cd build_linux_arm_hardfp
+
+cmake -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../..
--- /dev/null
+#!/bin/sh
+cd `dirname $0`/..
+
+mkdir -p build_linux_arm_softfp
+cd build_linux_arm_softfp
+
+cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../..
else if (item == mItemFace20)
setMinFaceSize(0.2f);
else if (item == mItemType) {
- mDetectorType = (mDetectorType + 1) % mDetectorName.length;
- item.setTitle(mDetectorName[mDetectorType]);
- setDetectorType(mDetectorType);
+ int tmpDetectorType = (mDetectorType + 1) % mDetectorName.length;
+ item.setTitle(mDetectorName[tmpDetectorType]);
+ setDetectorType(tmpDetectorType);
}
return true;
}
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>\r
-<?fileVersion 4.0.0?>\r
-\r
-<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">\r
- <storageModule moduleId="org.eclipse.cdt.core.settings">\r
- <cconfiguration id="0.129633445">\r
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="0.129633445" moduleId="org.eclipse.cdt.core.settings" name="Default">\r
- <externalSettings/>\r
- <extensions>\r
- <extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
- <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
- <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>\r
- <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
- <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
- <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>\r
- </extensions>\r
- </storageModule>\r
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">\r
- <configuration artifactName="${ProjName}" buildProperties="" description="" id="0.129633445" name="Default" parent="org.eclipse.cdt.build.core.prefbase.cfg">\r
- <folderInfo id="0.129633445." name="/" resourcePath="">\r
- <toolChain id="org.eclipse.cdt.build.core.prefbase.toolchain.2006441180" name="No ToolChain" resourceTypeBasedDiscovery="false" superClass="org.eclipse.cdt.build.core.prefbase.toolchain">\r
- <targetPlatform id="org.eclipse.cdt.build.core.prefbase.toolchain.2006441180.527973180" name=""/>\r
- <builder autoBuildTarget="" command="${NDKROOT}/ndk-build.cmd" enableAutoBuild="true" enableCleanBuild="false" id="org.eclipse.cdt.build.core.settings.default.builder.180541221" incrementalBuildTarget="" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="org.eclipse.cdt.build.core.settings.default.builder"/>\r
- <tool id="org.eclipse.cdt.build.core.settings.holder.libs.791069665" name="holder for library settings" superClass="org.eclipse.cdt.build.core.settings.holder.libs"/>\r
- <tool id="org.eclipse.cdt.build.core.settings.holder.1894181736" name="Assembly" superClass="org.eclipse.cdt.build.core.settings.holder">\r
- <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.588929884" languageId="org.eclipse.cdt.core.assembly" languageName="Assembly" sourceContentType="org.eclipse.cdt.core.asmSource" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>\r
- </tool>\r
- <tool id="org.eclipse.cdt.build.core.settings.holder.303359177" name="GNU C++" superClass="org.eclipse.cdt.build.core.settings.holder">\r
- <option id="org.eclipse.cdt.build.core.settings.holder.incpaths.373249505" name="Include Paths" superClass="org.eclipse.cdt.build.core.settings.holder.incpaths" valueType="includePath">\r
- <listOptionValue builtIn="false" value=""${NDKROOT}/platforms/android-9/arch-arm/usr/include""/>\r
- <listOptionValue builtIn="false" value=""${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/include""/>\r
- <listOptionValue builtIn="false" value=""${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include""/>\r
- <listOptionValue builtIn="false" value=""${ProjDirPath}/../../sdk/native/jni/include""/>\r
- </option>\r
- <option id="org.eclipse.cdt.build.core.settings.holder.symbols.1424359063" name="Symbols" superClass="org.eclipse.cdt.build.core.settings.holder.symbols" valueType="definedSymbols">\r
- <listOptionValue builtIn="false" value="ANDROID=1"/>\r
- </option>\r
- <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.360067880" languageId="org.eclipse.cdt.core.g++" languageName="GNU C++" sourceContentType="org.eclipse.cdt.core.cxxSource,org.eclipse.cdt.core.cxxHeader" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>\r
- </tool>\r
- <tool id="org.eclipse.cdt.build.core.settings.holder.1156172258" name="GNU C" superClass="org.eclipse.cdt.build.core.settings.holder">\r
- <option id="org.eclipse.cdt.build.core.settings.holder.incpaths.149918263" name="Include Paths" superClass="org.eclipse.cdt.build.core.settings.holder.incpaths" valueType="includePath">\r
- <listOptionValue builtIn="false" value=""${NDKROOT}/platforms/android-9/arch-arm/usr/include""/>\r
- <listOptionValue builtIn="false" value=""${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/include""/>\r
- <listOptionValue builtIn="false" value=""${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include""/>\r
- <listOptionValue builtIn="false" value=""${ProjDirPath}/../../sdk/native/jni/include""/>\r
- </option>\r
- <option id="org.eclipse.cdt.build.core.settings.holder.symbols.719752707" name="Symbols" superClass="org.eclipse.cdt.build.core.settings.holder.symbols" valueType="definedSymbols">\r
- <listOptionValue builtIn="false" value="ANDROID=1"/>\r
- </option>\r
- <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.232493949" languageId="org.eclipse.cdt.core.gcc" languageName="GNU C" sourceContentType="org.eclipse.cdt.core.cSource,org.eclipse.cdt.core.cHeader" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>\r
- </tool>\r
- </toolChain>\r
- </folderInfo>\r
- <sourceEntries>\r
- <entry flags="VALUE_WORKSPACE_PATH" kind="sourcePath" name="jni"/>\r
- </sourceEntries>\r
- </configuration>\r
- </storageModule>\r
- <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>\r
- </cconfiguration>\r
- </storageModule>\r
- <storageModule moduleId="cdtBuildSystem" version="4.0.0">\r
- <project id="OpenCV Sample - face-detection.null.1639518055" name="OpenCV Sample - face-detection"/>\r
- </storageModule>\r
- <storageModule moduleId="scannerConfiguration">\r
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>\r
- <scannerConfigBuildInfo instanceId="0.129633445">\r
- <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>\r
- </scannerConfigBuildInfo>\r
- </storageModule>\r
- <storageModule moduleId="refreshScope" versionNumber="1">\r
- <resource resourceType="PROJECT" workspacePath="/OpenCV Sample - face-detection"/>\r
- </storageModule>\r
- <storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>\r
-</cproject>\r
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?>
+
+<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+ <storageModule moduleId="org.eclipse.cdt.core.settings">
+ <cconfiguration id="0.882924228">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="0.882924228" moduleId="org.eclipse.cdt.core.settings" name="Default">
+ <externalSettings/>
+ <extensions>
+ <extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+ </extensions>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <configuration artifactName="${ProjName}" buildProperties="" description="" id="0.882924228" name="Default" parent="org.eclipse.cdt.build.core.prefbase.cfg">
+ <folderInfo id="0.882924228." name="/" resourcePath="">
+ <toolChain id="org.eclipse.cdt.build.core.prefbase.toolchain.1667980868" name="No ToolChain" resourceTypeBasedDiscovery="false" superClass="org.eclipse.cdt.build.core.prefbase.toolchain">
+ <targetPlatform id="org.eclipse.cdt.build.core.prefbase.toolchain.1667980868.2108168132" name=""/>
+ <builder autoBuildTarget="" command=""${NDKROOT}/ndk-build.cmd"" enableAutoBuild="true" enableCleanBuild="false" id="org.eclipse.cdt.build.core.settings.default.builder.328915772" incrementalBuildTarget="" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="org.eclipse.cdt.build.core.settings.default.builder"/>
+ <tool id="org.eclipse.cdt.build.core.settings.holder.libs.630148311" name="holder for library settings" superClass="org.eclipse.cdt.build.core.settings.holder.libs"/>
+ <tool id="org.eclipse.cdt.build.core.settings.holder.525090327" name="Assembly" superClass="org.eclipse.cdt.build.core.settings.holder">
+ <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.1491216279" languageId="org.eclipse.cdt.core.assembly" languageName="Assembly" sourceContentType="org.eclipse.cdt.core.asmSource" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>
+ </tool>
+ <tool id="org.eclipse.cdt.build.core.settings.holder.1242729366" name="GNU C++" superClass="org.eclipse.cdt.build.core.settings.holder">
+ <option id="org.eclipse.cdt.build.core.settings.holder.incpaths.881377735" name="Include Paths" superClass="org.eclipse.cdt.build.core.settings.holder.incpaths" valueType="includePath">
+ <listOptionValue builtIn="false" value=""${NDKROOT}/platforms/android-9/arch-arm/usr/include""/>
+ <listOptionValue builtIn="false" value=""${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/include""/>
+ <listOptionValue builtIn="false" value=""${NDKROOT}/sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include""/>
+ <listOptionValue builtIn="false" value=""${ProjDirPath}/../../sdk/native/jni/include""/>
+ <listOptionValue builtIn="false" value=""${NDKROOT}/sources/android/native_app_glue""/>
+ </option>
+ <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.273216997" languageId="org.eclipse.cdt.core.g++" languageName="GNU C++" sourceContentType="org.eclipse.cdt.core.cxxSource,org.eclipse.cdt.core.cxxHeader" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>
+ </tool>
+ <tool id="org.eclipse.cdt.build.core.settings.holder.1779128177" name="GNU C" superClass="org.eclipse.cdt.build.core.settings.holder">
+ <inputType id="org.eclipse.cdt.build.core.settings.holder.inType.1778510041" languageId="org.eclipse.cdt.core.gcc" languageName="GNU C" sourceContentType="org.eclipse.cdt.core.cSource,org.eclipse.cdt.core.cHeader" superClass="org.eclipse.cdt.build.core.settings.holder.inType"/>
+ </tool>
+ </toolChain>
+ </folderInfo>
+ </configuration>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+ </cconfiguration>
+ </storageModule>
+ <storageModule moduleId="cdtBuildSystem" version="4.0.0">
+ <project id="CvNativeActivity.null.708321898" name="CvNativeActivity"/>
+ </storageModule>
+ <storageModule moduleId="scannerConfiguration">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ <scannerConfigBuildInfo instanceId="0.882924228">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+ </scannerConfigBuildInfo>
+ </storageModule>
+ <storageModule moduleId="refreshScope" versionNumber="1">
+ <resource resourceType="PROJECT" workspacePath="/CvNativeActivity"/>
+ </storageModule>
+ <storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
+</cproject>
</projects>
<buildSpec>
<buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+ <triggers>auto,full,incremental,</triggers>
+ <arguments>
+ <dictionary>
+ <key>?name?</key>
+ <value></value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.append_environment</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.autoBuildTarget</key>
+ <value></value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.buildArguments</key>
+ <value></value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.buildCommand</key>
+ <value>"${NDKROOT}/ndk-build.cmd"</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
+ <value>clean</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.contents</key>
+ <value>org.eclipse.cdt.make.core.activeConfigSettings</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableAutoBuild</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableCleanBuild</key>
+ <value>false</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.enableFullBuild</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.fullBuildTarget</key>
+ <value></value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.stopOnError</key>
+ <value>true</value>
+ </dictionary>
+ <dictionary>
+ <key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
+ <value>false</value>
+ </dictionary>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
<arguments>
</arguments>
<arguments>
</arguments>
</buildCommand>
+ <buildCommand>
+ <name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+ <triggers>full,incremental,</triggers>
+ <arguments>
+ </arguments>
+ </buildCommand>
</buildSpec>
<natures>
<nature>com.android.ide.eclipse.adt.AndroidNature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.cdt.core.cnature</nature>
+ <nature>org.eclipse.cdt.core.ccnature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+ <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
</projectDescription>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
-#include <float.h>
#include <queue>
#include <opencv2/core/core.hpp>
}
}
- idx++; // to skip coma symbol
+ idx++; // to skip comma symbol
} while(supported[idx-1] != '\0');
for (int yy = top_indent; yy < std::min(frame.rows+top_indent, buffer.height); yy++)
{
- unsigned char* line = (unsigned char*)pixels;
- memcpy(line+left_indent*4*sizeof(unsigned char), frame.ptr<unsigned char>(yy),
- std::min(frame.cols, buffer.width)*4*sizeof(unsigned char));
+ unsigned char* line = (unsigned char*)pixels + left_indent*4*sizeof(unsigned char);
+ size_t line_size = std::min(frame.cols, buffer.width)*4*sizeof(unsigned char);
+ memcpy(line, frame.ptr<unsigned char>(yy), line_size);
// go to next line
pixels = (int32_t*)pixels + buffer.stride;
}
return;
}
- LOGI("Camera initialized at resoution %dx%d", camera_resolution.width, camera_resolution.height);
+ LOGI("Camera initialized at resolution %dx%d", camera_resolution.width, camera_resolution.height);
}
break;
case APP_CMD_TERM_WINDOW:
// Make sure glue isn't stripped.
app_dummy();
- memset(&engine, 0, sizeof(engine));
+ size_t engine_size = sizeof(engine); // for Eclipse CDT parser
+ memset((void*)&engine, 0, engine_size);
app->userData = &engine;
app->onAppCmd = engine_handle_cmd;
engine.app = app;
std::vector< DMatch > good_matches;
for( int i = 0; i < descriptors_1.rows; i++ )
- { if( matches[i].distance < 2*min_dist )
+ { if( matches[i].distance <= 2*min_dist )
{ good_matches.push_back( matches[i]); }
}
#include <iostream>
#include <string>
-#include "opencv2/opencv_modules.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/highgui/highgui.hpp"
-#ifdef HAVE_OPENCV_NONFREE
-#include "opencv2/nonfree/gpu.hpp"
-#endif
-
using namespace std;
using namespace cv;
using namespace cv::gpu;
FGD_STAT,
MOG,
MOG2,
-#ifdef HAVE_OPENCV_NONFREE
- VIBE,
-#endif
GMG
};
cv::CommandLineParser cmd(argc, argv,
"{ c | camera | false | use camera }"
"{ f | file | 768x576.avi | input video file }"
- "{ m | method | mog | method (fgd, mog, mog2, vibe, gmg) }"
+ "{ m | method | mog | method (fgd, mog, mog2, gmg) }"
"{ h | help | false | print help message }");
if (cmd.get<bool>("help"))
if (method != "fgd"
&& method != "mog"
&& method != "mog2"
- #ifdef HAVE_OPENCV_NONFREE
- && method != "vibe"
- #endif
&& method != "gmg")
{
cerr << "Incorrect method" << endl;
Method m = method == "fgd" ? FGD_STAT :
method == "mog" ? MOG :
method == "mog2" ? MOG2 :
- #ifdef HAVE_OPENCV_NONFREE
- method == "vibe" ? VIBE :
- #endif
GMG;
VideoCapture cap;
FGDStatModel fgd_stat;
MOG_GPU mog;
MOG2_GPU mog2;
-#ifdef HAVE_OPENCV_NONFREE
- VIBE_GPU vibe;
-#endif
GMG_GPU gmg;
gmg.numInitializationFrames = 40;
mog2(d_frame, d_fgmask);
break;
-#ifdef HAVE_OPENCV_NONFREE
- case VIBE:
- vibe.initialize(d_frame);
- break;
-#endif
-
case GMG:
gmg.initialize(d_frame.size());
break;
namedWindow("image", WINDOW_NORMAL);
namedWindow("foreground mask", WINDOW_NORMAL);
namedWindow("foreground image", WINDOW_NORMAL);
- if (m != GMG
- #ifdef HAVE_OPENCV_NONFREE
- && m != VIBE
- #endif
- )
+ if (m != GMG)
{
namedWindow("mean background image", WINDOW_NORMAL);
}
mog2.getBackgroundImage(d_bgimg);
break;
-#ifdef HAVE_OPENCV_NONFREE
- case VIBE:
- vibe(d_frame, d_fgmask);
- break;
-#endif
-
case GMG:
gmg(d_frame, d_fgmask);
break;
using namespace cv;
-#if !defined(HAVE_CUDA)
+#if !defined(HAVE_CUDA) || defined(__arm__)
+
int main( int, const char** )
{
- cout << "Please compile the library with CUDA support" << endl;
- return -1;
+#if !defined(HAVE_CUDA)
+ std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true)." << std::endl;
+#endif
+
+#if defined(__arm__)
+ std::cout << "Unsupported for ARM CUDA library." << std::endl;
+#endif
+
+ return 0;
}
+
#else
#include "opencv2/core/core.hpp"
#include "opencv2/gpu/gpu.hpp"
-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
+#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
int main()
{
std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
#endif
+#if defined(__arm__)
+ std::cout << "Unsupported for ARM CUDA library." << std::endl;
+#endif
+
return 0;
}
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
+#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
int main()
{
std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
#endif
+#if defined(__arm__)
+ std::cout << "Unsupported for ARM CUDA library." << std::endl;
+#endif
+
return 0;
}
-//This sample is inherited from facedetect.cpp in smaple/c
-
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
using namespace std;
using namespace cv;
+#define LOOP_NUM 10
+
+const static Scalar colors[] = { CV_RGB(0,0,255),
+ CV_RGB(0,128,255),
+ CV_RGB(0,255,255),
+ CV_RGB(0,255,0),
+ CV_RGB(255,128,0),
+ CV_RGB(255,255,0),
+ CV_RGB(255,0,0),
+ CV_RGB(255,0,255)} ;
-static void help()
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+ work_begin = getTickCount();
+}
+static void workEnd()
{
- cout << "\nThis program demonstrates the cascade recognizer.\n"
- "This classifier can recognize many ~rigid objects, it's most known use is for faces.\n"
- "Usage:\n"
- "./facedetect [--cascade=<cascade_path> this is the primary trained classifier such as frontal face]\n"
- " [--scale=<image scale greater or equal to 1, try 1.3 for example>\n"
- " [filename|camera_index]\n\n"
- "see facedetect.cmd for one call:\n"
- "./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --scale=1.3 \n"
- "Hit any key to quit.\n"
- "Using OpenCV version " << CV_VERSION << "\n" << endl;
+ work_end += (getTickCount() - work_begin);
}
-struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
-void detectAndDraw( Mat& img,
- cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier& nestedCascade,
- double scale);
+static double getTime(){
+ return work_end /((double)cvGetTickFrequency() * 1000.);
+}
+
+void detect( Mat& img, vector<Rect>& faces,
+ cv::ocl::OclCascadeClassifierBuf& cascade,
+ double scale, bool calTime);
-String cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
+void detectCPU( Mat& img, vector<Rect>& faces,
+ CascadeClassifier& cascade,
+ double scale, bool calTime);
+
+void Draw(Mat& img, vector<Rect>& faces, double scale);
+
+// This function test if gpu_rst matches cpu_rst.
+// If the two vectors are not equal, it will return the difference in vector size
+// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
+double checkRectSimilarity(Size sz, std::vector<Rect>& cpu_rst, std::vector<Rect>& gpu_rst);
int main( int argc, const char** argv )
{
- CvCapture* capture = 0;
- Mat frame, frameCopy, image;
- const String scaleOpt = "--scale=";
- size_t scaleOptLen = scaleOpt.length();
- const String cascadeOpt = "--cascade=";
- size_t cascadeOptLen = cascadeOpt.length();
- String inputName;
-
- help();
- cv::ocl::OclCascadeClassifier cascade;
- CascadeClassifier nestedCascade;
- double scale = 1;
-
- for( int i = 1; i < argc; i++ )
+ const char* keys =
+ "{ h | help | false | print help message }"
+ "{ i | input | | specify input image }"
+ "{ t | template | ../../../data/haarcascades/haarcascade_frontalface_alt.xml | specify template file }"
+ "{ c | scale | 1.0 | scale image }"
+ "{ s | use_cpu | false | use cpu or gpu to process the image }";
+
+ CommandLineParser cmd(argc, argv, keys);
+ if (cmd.get<bool>("help"))
{
- cout << "Processing " << i << " " << argv[i] << endl;
- if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 )
- {
- cascadeName.assign( argv[i] + cascadeOptLen );
- cout << " from which we have cascadeName= " << cascadeName << endl;
- }
- else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 )
- {
- if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 )
- scale = 1;
- cout << " from which we read scale = " << scale << endl;
- }
- else if( argv[i][0] == '-' )
- {
- cerr << "WARNING: Unknown option %s" << argv[i] << endl;
- }
- else
- inputName.assign( argv[i] );
+ cout << "Avaible options:" << endl;
+ cmd.printParams();
+ return 0;
}
+ CvCapture* capture = 0;
+ Mat frame, frameCopy, image;
- if( !cascade.load( cascadeName ) )
+ bool useCPU = cmd.get<bool>("s");
+ string inputName = cmd.get<string>("i");
+ string cascadeName = cmd.get<string>("t");
+ double scale = cmd.get<double>("c");
+ cv::ocl::OclCascadeClassifierBuf cascade;
+ CascadeClassifier cpu_cascade;
+
+ if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
{
cerr << "ERROR: Could not load classifier cascade" << endl;
- cerr << "Usage: facedetect [--cascade=<cascade_path>]\n"
- " [--scale[=<image scale>\n"
- " [filename|camera_index]\n" << endl ;
return -1;
}
- if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
+ if( inputName.empty() )
{
- capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' );
- int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ;
- if(!capture) cout << "Capture from CAM " << c << " didn't work" << endl;
+ capture = cvCaptureFromCAM(0);
+ if(!capture)
+ cout << "Capture from CAM 0 didn't work" << endl;
}
else if( inputName.size() )
{
if( image.empty() )
{
capture = cvCaptureFromAVI( inputName.c_str() );
- if(!capture) cout << "Capture from AVI didn't work" << endl;
+ if(!capture)
+ cout << "Capture from AVI didn't work" << endl;
+ return -1;
}
}
else
{
image = imread( "lena.jpg", 1 );
- if(image.empty()) cout << "Couldn't read lena.jpg" << endl;
+ if(image.empty())
+ cout << "Couldn't read lena.jpg" << endl;
+ return -1;
}
cvNamedWindow( "result", 1 );
std::vector<cv::ocl::Info> oclinfo;
int devnums = cv::ocl::getDevice(oclinfo);
- if(devnums<1)
+ if( devnums < 1 )
{
std::cout << "no device found\n";
return -1;
}
//if you want to use undefault device, set it here
//setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
+ ocl::setBinpath("./");
if( capture )
{
cout << "In capture ..." << endl;
{
IplImage* iplImg = cvQueryFrame( capture );
frame = iplImg;
+ vector<Rect> faces;
if( frame.empty() )
break;
if( iplImg->origin == IPL_ORIGIN_TL )
frame.copyTo( frameCopy );
else
flip( frame, frameCopy, 0 );
-
- detectAndDraw( frameCopy, cascade, nestedCascade, scale );
-
+ if(useCPU){
+ detectCPU(frameCopy, faces, cpu_cascade, scale, false);
+ }
+ else{
+ detect(frameCopy, faces, cascade, scale, false);
+ }
+ Draw(frameCopy, faces, scale);
if( waitKey( 10 ) >= 0 )
goto _cleanup_;
}
else
{
cout << "In image read" << endl;
- if( !image.empty() )
- {
- detectAndDraw( image, cascade, nestedCascade, scale );
- waitKey(0);
- }
- else if( !inputName.empty() )
+ vector<Rect> faces;
+ vector<Rect> ref_rst;
+ double accuracy = 0.;
+ for(int i = 0; i <= LOOP_NUM;i ++)
{
- /* assume it is a text file containing the
- list of the image filenames to be processed - one per line */
- FILE* f = fopen( inputName.c_str(), "rt" );
- if( f )
+ cout << "loop" << i << endl;
+ if(useCPU){
+ detectCPU(image, faces, cpu_cascade, scale, i==0?false:true);
+ }
+ else{
+ detect(image, faces, cascade, scale, i==0?false:true);
+ if(i == 0){
+ detectCPU(image, ref_rst, cpu_cascade, scale, false);
+ accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
+ }
+ }
+ if (i == LOOP_NUM)
{
- char buf[1000+1];
- while( fgets( buf, 1000, f ) )
- {
- int len = (int)strlen(buf), c;
- while( len > 0 && isspace(buf[len-1]) )
- len--;
- buf[len] = '\0';
- cout << "file " << buf << endl;
- image = imread( buf, 1 );
- if( !image.empty() )
- {
- detectAndDraw( image, cascade, nestedCascade, scale );
- c = waitKey(0);
- if( c == 27 || c == 'q' || c == 'Q' )
- break;
- }
- else
- {
- cerr << "Aw snap, couldn't read image " << buf << endl;
- }
- }
- fclose(f);
+ if (useCPU)
+ cout << "average CPU time (noCamera) : ";
+ else
+ cout << "average GPU time (noCamera) : ";
+ cout << getTime() / LOOP_NUM << " ms" << endl;
+ cout << "accuracy value: " << accuracy <<endl;
}
}
+ Draw(image, faces, scale);
+ waitKey(0);
}
cvDestroyWindow("result");
return 0;
}
-void detectAndDraw( Mat& img,
- cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier&,
- double scale)
+void detect( Mat& img, vector<Rect>& faces,
+ cv::ocl::OclCascadeClassifierBuf& cascade,
+ double scale, bool calTime)
{
- int i = 0;
- double t = 0;
- vector<Rect> faces;
- const static Scalar colors[] = { CV_RGB(0,0,255),
- CV_RGB(0,128,255),
- CV_RGB(0,255,255),
- CV_RGB(0,255,0),
- CV_RGB(255,128,0),
- CV_RGB(255,255,0),
- CV_RGB(255,0,0),
- CV_RGB(255,0,255)} ;
cv::ocl::oclMat image(img);
cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
-
+ if(calTime) workBegin();
cv::ocl::cvtColor( image, gray, CV_BGR2GRAY );
cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
cv::ocl::equalizeHist( smallImg, smallImg );
- CvSeq* _objects;
- MemStorage storage(cvCreateMemStorage(0));
- t = (double)cvGetTickCount();
- _objects = cascade.oclHaarDetectObjects( smallImg, storage, 1.1,
+ cascade.detectMultiScale( smallImg, faces, 1.1,
3, 0
|CV_HAAR_SCALE_IMAGE
, Size(30,30), Size(0, 0) );
- vector<CvAvgComp> vecAvgComp;
- Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
- faces.resize(vecAvgComp.size());
- std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
- t = (double)cvGetTickCount() - t;
- printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
+ if(calTime) workEnd();
+}
+
+void detectCPU( Mat& img, vector<Rect>& faces,
+ CascadeClassifier& cascade,
+ double scale, bool calTime)
+{
+ if(calTime) workBegin();
+ Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
+ cvtColor(img, cpu_gray, CV_BGR2GRAY);
+ resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR);
+ equalizeHist(cpu_smallImg, cpu_smallImg);
+ cascade.detectMultiScale(cpu_smallImg, faces, 1.1,
+ 3, 0 | CV_HAAR_SCALE_IMAGE,
+ Size(30, 30), Size(0, 0));
+ if(calTime) workEnd();
+}
+
+void Draw(Mat& img, vector<Rect>& faces, double scale)
+{
+ int i = 0;
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
{
- Mat smallImgROI;
Point center;
Scalar color = colors[i%8];
int radius;
}
cv::imshow( "result", img );
}
+
+double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& ob2)
+{
+ double final_test_result = 0.0;
+ size_t sz1 = ob1.size();
+ size_t sz2 = ob2.size();
+
+ if(sz1 != sz2)
+ return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+ else
+ {
+ cv::Mat cpu_result(sz, CV_8UC1);
+ cpu_result.setTo(0);
+
+ for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
+ {
+ cv::Mat cpu_result_roi(cpu_result, *r);
+ cpu_result_roi.setTo(1);
+ cpu_result.copyTo(cpu_result);
+ }
+ int cpu_area = cv::countNonZero(cpu_result > 0);
+
+ cv::Mat gpu_result(sz, CV_8UC1);
+ gpu_result.setTo(0);
+ for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
+ {
+ cv::Mat gpu_result_roi(gpu_result, *r2);
+ gpu_result_roi.setTo(1);
+ gpu_result.copyTo(gpu_result);
+ }
+
+ cv::Mat result_;
+ multiply(cpu_result, gpu_result, result_);
+ int result = cv::countNonZero(result_ > 0);
+
+ final_test_result = 1.0 - (double)result/(double)cpu_area;
+ }
+ return final_test_result;
+}
bool gamma_corr;
};
-
class App
{
public:
string message() const;
+// This function test if gpu_rst matches cpu_rst.
+// If the two vectors are not equal, it will return the difference in vector size
+// Else if will return
+// (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
+ double checkRectSimilarity(Size sz,
+ std::vector<Rect>& cpu_rst,
+ std::vector<Rect>& gpu_rst);
private:
App operator=(App&);
ocl::oclMat gpu_img;
// Iterate over all frames
+ bool verify = false;
while (running && !frame.empty())
{
workBegin();
gpu_img.upload(img);
gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
- }
+ if (!verify)
+ {
+ // verify if GPU output same objects with CPU at 1st run
+ verify = true;
+ vector<Rect> ref_rst;
+ cvtColor(img, img, CV_BGRA2BGR);
+ cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride,
+ Size(0, 0), scale, gr_threshold-2);
+ double accuracy = checkRectSimilarity(img.size(), ref_rst, found);
+ cout << "\naccuracy value: " << accuracy << endl;
+ }
+ }
else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
hogWorkEnd();
return ss.str();
}
+double App::checkRectSimilarity(Size sz,
+ std::vector<Rect>& ob1,
+ std::vector<Rect>& ob2)
+{
+ double final_test_result = 0.0;
+ size_t sz1 = ob1.size();
+ size_t sz2 = ob2.size();
+
+ if(sz1 != sz2)
+ return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1);
+ else
+ {
+ cv::Mat cpu_result(sz, CV_8UC1);
+ cpu_result.setTo(0);
+
+ for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
+ {
+ cv::Mat cpu_result_roi(cpu_result, *r);
+ cpu_result_roi.setTo(1);
+ cpu_result.copyTo(cpu_result);
+ }
+ int cpu_area = cv::countNonZero(cpu_result > 0);
+
+ cv::Mat gpu_result(sz, CV_8UC1);
+ gpu_result.setTo(0);
+ for(vector<Rect>::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++)
+ {
+ cv::Mat gpu_result_roi(gpu_result, *r2);
+ gpu_result_roi.setTo(1);
+ gpu_result.copyTo(gpu_result);
+ }
+
+ cv::Mat result_;
+ multiply(cpu_result, gpu_result, result_);
+ int result = cv::countNonZero(result_ > 0);
+
+ final_test_result = 1.0 - (double)result/(double)cpu_area;
+ }
+ return final_test_result;
+
+}
+
--- /dev/null
+#include <iostream>
+#include <vector>
+#include <iomanip>
+
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/video/video.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::ocl;
+
+typedef unsigned char uchar;
+#define LOOP_NUM 10
+int64 work_begin = 0;
+int64 work_end = 0;
+
+static void workBegin()
+{
+ work_begin = getTickCount();
+}
+static void workEnd()
+{
+ work_end += (getTickCount() - work_begin);
+}
+static double getTime(){
+ return work_end * 1000. / getTickFrequency();
+}
+
+static void download(const oclMat& d_mat, vector<Point2f>& vec)
+{
+ vec.clear();
+ vec.resize(d_mat.cols);
+ Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]);
+ d_mat.download(mat);
+}
+
+static void download(const oclMat& d_mat, vector<uchar>& vec)
+{
+ vec.clear();
+ vec.resize(d_mat.cols);
+ Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]);
+ d_mat.download(mat);
+}
+
+static void drawArrows(Mat& frame, const vector<Point2f>& prevPts, const vector<Point2f>& nextPts, const vector<uchar>& status, Scalar line_color = Scalar(0, 0, 255))
+{
+ for (size_t i = 0; i < prevPts.size(); ++i)
+ {
+ if (status[i])
+ {
+ int line_thickness = 1;
+
+ Point p = prevPts[i];
+ Point q = nextPts[i];
+
+ double angle = atan2((double) p.y - q.y, (double) p.x - q.x);
+
+ double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) );
+
+ if (hypotenuse < 1.0)
+ continue;
+
+ // Here we lengthen the arrow by a factor of three.
+ q.x = (int) (p.x - 3 * hypotenuse * cos(angle));
+ q.y = (int) (p.y - 3 * hypotenuse * sin(angle));
+
+ // Now we draw the main line of the arrow.
+ line(frame, p, q, line_color, line_thickness);
+
+ // Now draw the tips of the arrow. I do some scaling so that the
+ // tips look proportional to the main line of the arrow.
+
+ p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4));
+ p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4));
+ line(frame, p, q, line_color, line_thickness);
+
+ p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4));
+ p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4));
+ line(frame, p, q, line_color, line_thickness);
+ }
+ }
+}
+
+
+int main(int argc, const char* argv[])
+{
+ static std::vector<Info> ocl_info;
+ ocl::getDevice(ocl_info);
+ //if you want to use undefault device, set it here
+ setDevice(ocl_info[0]);
+
+ //set this to save kernel compile time from second time you run
+ ocl::setBinpath("./");
+ const char* keys =
+ "{ h | help | false | print help message }"
+ "{ l | left | | specify left image }"
+ "{ r | right | | specify right image }"
+ "{ c | camera | 0 | enable camera capturing }"
+ "{ s | use_cpu | false | use cpu or gpu to process the image }"
+ "{ v | video | | use video as input }"
+ "{ points | points | 1000 | specify points count [GoodFeatureToTrack] }"
+ "{ min_dist | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }";
+
+ CommandLineParser cmd(argc, argv, keys);
+
+ if (cmd.get<bool>("help"))
+ {
+ cout << "Usage: pyrlk_optical_flow [options]" << endl;
+ cout << "Avaible options:" << endl;
+ cmd.printParams();
+ return 0;
+ }
+
+ bool defaultPicturesFail = false;
+ string fname0 = cmd.get<string>("left");
+ string fname1 = cmd.get<string>("right");
+ string vdofile = cmd.get<string>("video");
+ int points = cmd.get<int>("points");
+ double minDist = cmd.get<double>("min_dist");
+ bool useCPU = cmd.get<bool>("s");
+ bool useCamera = cmd.get<bool>("c");
+ int inputName = cmd.get<int>("c");
+
+ oclMat d_nextPts, d_status;
+ GoodFeaturesToTrackDetector_OCL d_features(points);
+ Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE);
+ Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE);
+ PyrLKOpticalFlow d_pyrLK;
+ vector<cv::Point2f> pts(points);
+ vector<cv::Point2f> nextPts(points);
+ vector<unsigned char> status(points);
+ vector<float> err;
+
+ if (frame0.empty() || frame1.empty())
+ {
+ useCamera = true;
+ defaultPicturesFail = true;
+ CvCapture* capture = 0;
+ capture = cvCaptureFromCAM( inputName );
+ if (!capture)
+ {
+ cout << "Can't load input images" << endl;
+ return -1;
+ }
+ }
+
+ cout << "Points count : " << points << endl << endl;
+
+ if (useCamera)
+ {
+ CvCapture* capture = 0;
+ Mat frame, frameCopy;
+ Mat frame0Gray, frame1Gray;
+ Mat ptr0, ptr1;
+
+ if(vdofile == "")
+ capture = cvCaptureFromCAM( inputName );
+ else
+ capture = cvCreateFileCapture(vdofile.c_str());
+
+ int c = inputName ;
+ if(!capture)
+ {
+ if(vdofile == "")
+ cout << "Capture from CAM " << c << " didn't work" << endl;
+ else
+ cout << "Capture from file " << vdofile << " failed" <<endl;
+ if (defaultPicturesFail)
+ {
+ return -1;
+ }
+ goto nocamera;
+ }
+
+ cout << "In capture ..." << endl;
+ for(int i = 0;; i++)
+ {
+ frame = cvQueryFrame( capture );
+ if( frame.empty() )
+ break;
+
+ if (i == 0)
+ {
+ frame.copyTo( frame0 );
+ cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+ }
+ else
+ {
+ if (i%2 == 1)
+ {
+ frame.copyTo(frame1);
+ cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+ ptr0 = frame0Gray;
+ ptr1 = frame1Gray;
+ }
+ else
+ {
+ frame.copyTo(frame0);
+ cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+ ptr0 = frame1Gray;
+ ptr1 = frame0Gray;
+ }
+
+ if (useCPU)
+ {
+ pts.clear();
+ goodFeaturesToTrack(ptr0, pts, points, 0.01, 0.0);
+ if(pts.size() == 0)
+ continue;
+ calcOpticalFlowPyrLK(ptr0, ptr1, pts, nextPts, status, err);
+ }
+ else
+ {
+ oclMat d_img(ptr0), d_prevPts;
+ d_features(d_img, d_prevPts);
+ if(!d_prevPts.rows || !d_prevPts.cols)
+ continue;
+ d_pyrLK.sparse(d_img, oclMat(ptr1), d_prevPts, d_nextPts, d_status);
+ d_features.downloadPoints(d_prevPts,pts);
+ download(d_nextPts, nextPts);
+ download(d_status, status);
+ }
+ if (i%2 == 1)
+ frame1.copyTo(frameCopy);
+ else
+ frame0.copyTo(frameCopy);
+ drawArrows(frameCopy, pts, nextPts, status, Scalar(255, 0, 0));
+ imshow("PyrLK [Sparse]", frameCopy);
+ }
+
+ if( waitKey( 10 ) >= 0 )
+ goto _cleanup_;
+ }
+
+ waitKey(0);
+
+_cleanup_:
+ cvReleaseCapture( &capture );
+ }
+ else
+ {
+nocamera:
+ for(int i = 0; i <= LOOP_NUM;i ++)
+ {
+ cout << "loop" << i << endl;
+ if (i > 0) workBegin();
+
+ if (useCPU)
+ {
+ goodFeaturesToTrack(frame0, pts, points, 0.01, minDist);
+ calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
+ }
+ else
+ {
+ oclMat d_img(frame0), d_prevPts;
+ d_features(d_img, d_prevPts);
+ d_pyrLK.sparse(d_img, oclMat(frame1), d_prevPts, d_nextPts, d_status);
+ d_features.downloadPoints(d_prevPts, pts);
+ download(d_nextPts, nextPts);
+ download(d_status, status);
+ }
+
+ if (i > 0 && i <= LOOP_NUM)
+ workEnd();
+
+ if (i == LOOP_NUM)
+ {
+ if (useCPU)
+ cout << "average CPU time (noCamera) : ";
+ else
+ cout << "average GPU time (noCamera) : ";
+
+ cout << getTime() / LOOP_NUM << " ms" << endl;
+
+ drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0));
+
+ imshow("PyrLK [Sparse]", frame0);
+ }
+ }
+ }
+
+ waitKey();
+
+ return 0;
+}
--- /dev/null
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <stdexcept>
+#include "opencv2/ocl/ocl.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+using namespace cv;
+using namespace std;
+using namespace ocl;
+
+bool help_showed = false;
+
+struct Params
+{
+ Params();
+ static Params read(int argc, char** argv);
+
+ string left;
+ string right;
+
+ string method_str() const
+ {
+ switch (method)
+ {
+ case BM: return "BM";
+ case BP: return "BP";
+ case CSBP: return "CSBP";
+ }
+ return "";
+ }
+ enum {BM, BP, CSBP} method;
+ int ndisp; // Max disparity + 1
+ enum {GPU, CPU} type;
+};
+
+
+struct App
+{
+ App(const Params& p);
+ void run();
+ void handleKey(char key);
+ void printParams() const;
+
+ void workBegin() { work_begin = getTickCount(); }
+ void workEnd()
+ {
+ int64 d = getTickCount() - work_begin;
+ double f = getTickFrequency();
+ work_fps = f / d;
+ }
+
+ string text() const
+ {
+ stringstream ss;
+ ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left)
+ << setprecision(4) << work_fps;
+ return ss.str();
+ }
+private:
+ Params p;
+ bool running;
+
+ Mat left_src, right_src;
+ Mat left, right;
+ oclMat d_left, d_right;
+
+ StereoBM_OCL bm;
+ StereoBeliefPropagation bp;
+ StereoConstantSpaceBP csbp;
+
+ int64 work_begin;
+ double work_fps;
+};
+
+static void printHelp()
+{
+ cout << "Usage: stereo_match_gpu\n"
+ << "\t--left <left_view> --right <right_view> # must be rectified\n"
+ << "\t--method <stereo_match_method> # BM | BP | CSBP\n"
+ << "\t--ndisp <number> # number of disparity levels\n"
+ << "\t--type <device_type> # cpu | CPU | gpu | GPU\n";
+ help_showed = true;
+}
+
+int main(int argc, char** argv)
+{
+ try
+ {
+ if (argc < 2)
+ {
+ printHelp();
+ return 1;
+ }
+
+ Params args = Params::read(argc, argv);
+ if (help_showed)
+ return -1;
+
+ int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU };
+ vector<Info> info;
+
+ if(getDevice(info, flags[args.type]) == 0)
+ {
+ throw runtime_error("Error: Did not find a valid OpenCL device!");
+ }
+ cout << "Device name:" << info[0].DeviceName[0] << endl;
+
+ App app(args);
+ app.run();
+ }
+ catch (const exception& e)
+ {
+ cout << "error: " << e.what() << endl;
+ }
+ return 0;
+}
+
+
+Params::Params()
+{
+ method = BM;
+ ndisp = 64;
+ type = GPU;
+}
+
+
+Params Params::read(int argc, char** argv)
+{
+ Params p;
+
+ for (int i = 1; i < argc; i++)
+ {
+ if (string(argv[i]) == "--left") p.left = argv[++i];
+ else if (string(argv[i]) == "--right") p.right = argv[++i];
+ else if (string(argv[i]) == "--method")
+ {
+ if (string(argv[i + 1]) == "BM") p.method = BM;
+ else if (string(argv[i + 1]) == "BP") p.method = BP;
+ else if (string(argv[i + 1]) == "CSBP") p.method = CSBP;
+ else throw runtime_error("unknown stereo match method: " + string(argv[i + 1]));
+ i++;
+ }
+ else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]);
+ else if (string(argv[i]) == "--type")
+ {
+ string t(argv[++i]);
+ if (t == "cpu" || t == "CPU")
+ {
+ p.type = CPU;
+ }
+ else if (t == "gpu" || t == "GPU")
+ {
+ p.type = GPU;
+ }
+ else throw runtime_error("unknown device type: " + t);
+ }
+ else if (string(argv[i]) == "--help") printHelp();
+ else throw runtime_error("unknown key: " + string(argv[i]));
+ }
+
+ return p;
+}
+
+
+App::App(const Params& params)
+ : p(params), running(false)
+{
+ cout << "stereo_match_ocl sample\n";
+ cout << "\nControls:\n"
+ << "\tesc - exit\n"
+ << "\tp - print current parameters\n"
+ << "\tg - convert source images into gray\n"
+ << "\tm - change stereo match method\n"
+ << "\ts - change Sobel prefiltering flag (for BM only)\n"
+ << "\t1/q - increase/decrease maximum disparity\n"
+ << "\t2/w - increase/decrease window size (for BM only)\n"
+ << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
+ << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
+}
+
+
+void App::run()
+{
+ // Load images
+ left_src = imread(p.left);
+ right_src = imread(p.right);
+ if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\"");
+ if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\"");
+
+ cvtColor(left_src, left, CV_BGR2GRAY);
+ cvtColor(right_src, right, CV_BGR2GRAY);
+
+ d_left.upload(left);
+ d_right.upload(right);
+
+ imshow("left", left);
+ imshow("right", right);
+
+ // Set common parameters
+ bm.ndisp = p.ndisp;
+ bp.ndisp = p.ndisp;
+ csbp.ndisp = p.ndisp;
+
+ cout << endl;
+ printParams();
+
+ running = true;
+ while (running)
+ {
+
+ // Prepare disparity map of specified type
+ Mat disp;
+ oclMat d_disp;
+ workBegin();
+ switch (p.method)
+ {
+ case Params::BM:
+ if (d_left.channels() > 1 || d_right.channels() > 1)
+ {
+ cout << "BM doesn't support color images\n";
+ cvtColor(left_src, left, CV_BGR2GRAY);
+ cvtColor(right_src, right, CV_BGR2GRAY);
+ cout << "image_channels: " << left.channels() << endl;
+ d_left.upload(left);
+ d_right.upload(right);
+ imshow("left", left);
+ imshow("right", right);
+ }
+ bm(d_left, d_right, d_disp);
+ break;
+ case Params::BP:
+ bp(d_left, d_right, d_disp);
+ break;
+ case Params::CSBP:
+ csbp(d_left, d_right, d_disp);
+ break;
+ }
+ ocl::finish();
+ workEnd();
+
+ // Show results
+ d_disp.download(disp);
+ if (p.method != Params::BM)
+ {
+ disp.convertTo(disp, 0);
+ }
+ putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
+ imshow("disparity", disp);
+
+ handleKey((char)waitKey(3));
+ }
+}
+
+
+void App::printParams() const
+{
+ cout << "--- Parameters ---\n";
+ cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
+ cout << "image_channels: " << left.channels() << endl;
+ cout << "method: " << p.method_str() << endl
+ << "ndisp: " << p.ndisp << endl;
+ switch (p.method)
+ {
+ case Params::BM:
+ cout << "win_size: " << bm.winSize << endl;
+ cout << "prefilter_sobel: " << bm.preset << endl;
+ break;
+ case Params::BP:
+ cout << "iter_count: " << bp.iters << endl;
+ cout << "level_count: " << bp.levels << endl;
+ break;
+ case Params::CSBP:
+ cout << "iter_count: " << csbp.iters << endl;
+ cout << "level_count: " << csbp.levels << endl;
+ break;
+ }
+ cout << endl;
+}
+
+
+void App::handleKey(char key)
+{
+ switch (key)
+ {
+ case 27:
+ running = false;
+ break;
+ case 'p': case 'P':
+ printParams();
+ break;
+ case 'g': case 'G':
+ if (left.channels() == 1 && p.method != Params::BM)
+ {
+ left = left_src;
+ right = right_src;
+ }
+ else
+ {
+ cvtColor(left_src, left, CV_BGR2GRAY);
+ cvtColor(right_src, right, CV_BGR2GRAY);
+ }
+ d_left.upload(left);
+ d_right.upload(right);
+ cout << "image_channels: " << left.channels() << endl;
+ imshow("left", left);
+ imshow("right", right);
+ break;
+ case 'm': case 'M':
+ switch (p.method)
+ {
+ case Params::BM:
+ p.method = Params::BP;
+ break;
+ case Params::BP:
+ p.method = Params::CSBP;
+ break;
+ case Params::CSBP:
+ p.method = Params::BM;
+ break;
+ }
+ cout << "method: " << p.method_str() << endl;
+ break;
+ case 's': case 'S':
+ if (p.method == Params::BM)
+ {
+ switch (bm.preset)
+ {
+ case StereoBM_OCL::BASIC_PRESET:
+ bm.preset = StereoBM_OCL::PREFILTER_XSOBEL;
+ break;
+ case StereoBM_OCL::PREFILTER_XSOBEL:
+ bm.preset = StereoBM_OCL::BASIC_PRESET;
+ break;
+ }
+ cout << "prefilter_sobel: " << bm.preset << endl;
+ }
+ break;
+ case '1':
+ p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8;
+ cout << "ndisp: " << p.ndisp << endl;
+ bm.ndisp = p.ndisp;
+ bp.ndisp = p.ndisp;
+ csbp.ndisp = p.ndisp;
+ break;
+ case 'q': case 'Q':
+ p.ndisp = max(p.ndisp - 8, 1);
+ cout << "ndisp: " << p.ndisp << endl;
+ bm.ndisp = p.ndisp;
+ bp.ndisp = p.ndisp;
+ csbp.ndisp = p.ndisp;
+ break;
+ case '2':
+ if (p.method == Params::BM)
+ {
+ bm.winSize = min(bm.winSize + 1, 51);
+ cout << "win_size: " << bm.winSize << endl;
+ }
+ break;
+ case 'w': case 'W':
+ if (p.method == Params::BM)
+ {
+ bm.winSize = max(bm.winSize - 1, 2);
+ cout << "win_size: " << bm.winSize << endl;
+ }
+ break;
+ case '3':
+ if (p.method == Params::BP)
+ {
+ bp.iters += 1;
+ cout << "iter_count: " << bp.iters << endl;
+ }
+ else if (p.method == Params::CSBP)
+ {
+ csbp.iters += 1;
+ cout << "iter_count: " << csbp.iters << endl;
+ }
+ break;
+ case 'e': case 'E':
+ if (p.method == Params::BP)
+ {
+ bp.iters = max(bp.iters - 1, 1);
+ cout << "iter_count: " << bp.iters << endl;
+ }
+ else if (p.method == Params::CSBP)
+ {
+ csbp.iters = max(csbp.iters - 1, 1);
+ cout << "iter_count: " << csbp.iters << endl;
+ }
+ break;
+ case '4':
+ if (p.method == Params::BP)
+ {
+ bp.levels += 1;
+ cout << "level_count: " << bp.levels << endl;
+ }
+ else if (p.method == Params::CSBP)
+ {
+ csbp.levels += 1;
+ cout << "level_count: " << csbp.levels << endl;
+ }
+ break;
+ case 'r': case 'R':
+ if (p.method == Params::BP)
+ {
+ bp.levels = max(bp.levels - 1, 1);
+ cout << "level_count: " << bp.levels << endl;
+ }
+ else if (p.method == Params::CSBP)
+ {
+ csbp.levels = max(csbp.levels - 1, 1);
+ cout << "level_count: " << csbp.levels << endl;
+ }
+ break;
+ }
+}
+
+
#include <iostream>
#include <stdio.h>
#include "opencv2/core/core.hpp"
-#include "opencv2/features2d/features2d.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/ocl/ocl.hpp"
-#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/nonfree/ocl.hpp"
#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/nonfree/nonfree.hpp"
-using namespace std;
using namespace cv;
using namespace cv::ocl;
-//#define USE_CPU_DESCRIPTOR // use cpu descriptor extractor until ocl descriptor extractor is fixed
-//#define USE_CPU_BFMATCHER
+const int LOOP_NUM = 10;
+const int GOOD_PTS_MAX = 50;
+const float GOOD_PORTION = 0.15f;
+
+namespace
+{
void help();
void help()
{
- cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << endl;
- cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2>" << endl;
+ std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl;
+ std::cout << "\nUsage:\n\tsurf_matcher --left <image1> --right <image2> [-c]" << std::endl;
+ std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl;
}
+int64 work_begin = 0;
+int64 work_end = 0;
-////////////////////////////////////////////////////
-// This program demonstrates the usage of SURF_OCL.
-// use cpu findHomography interface to calculate the transformation matrix
-int main(int argc, char* argv[])
+void workBegin()
+{
+ work_begin = getTickCount();
+}
+void workEnd()
{
- if (argc != 5 && argc != 1)
- {
- help();
- return -1;
- }
- vector<cv::ocl::Info> info;
- if(!cv::ocl::getDevice(info))
- {
- cout << "Error: Did not find a valid OpenCL device!" << endl;
- return -1;
- }
- Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
- oclMat img1, img2;
- if(argc != 5)
+ work_end = getTickCount() - work_begin;
+}
+double getTime(){
+ return work_end /((double)cvGetTickFrequency() * 1000.);
+}
+
+template<class KPDetector>
+struct SURFDetector
+{
+ KPDetector surf;
+ SURFDetector(double hessian = 800.0)
+ :surf(hessian)
{
- cpu_img1 = imread("o.png");
- cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
- img1 = cpu_img1_grey;
- CV_Assert(!img1.empty());
-
- cpu_img2 = imread("r2.png");
- cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
- img2 = cpu_img2_grey;
}
- else
+ template<class T>
+ void operator()(const T& in, const T& mask, vector<cv::KeyPoint>& pts, T& descriptors, bool useProvided = false)
{
- for (int i = 1; i < argc; ++i)
- {
- if (string(argv[i]) == "--left")
- {
- cpu_img1 = imread(argv[++i]);
- cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
- img1 = cpu_img1_grey;
- CV_Assert(!img1.empty());
- }
- else if (string(argv[i]) == "--right")
- {
- cpu_img2 = imread(argv[++i]);
- cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
- img2 = cpu_img2_grey;
- }
- else if (string(argv[i]) == "--help")
- {
- help();
- return -1;
- }
- }
+ surf(in, mask, pts, descriptors, useProvided);
}
+};
- SURF_OCL surf;
- //surf.hessianThreshold = 400.f;
- //surf.extended = false;
-
- // detecting keypoints & computing descriptors
- oclMat keypoints1GPU, keypoints2GPU;
- oclMat descriptors1GPU, descriptors2GPU;
-
- // downloading results
- vector<KeyPoint> keypoints1, keypoints2;
- vector<DMatch> matches;
-
-
-#ifndef USE_CPU_DESCRIPTOR
- surf(img1, oclMat(), keypoints1GPU, descriptors1GPU);
- surf(img2, oclMat(), keypoints2GPU, descriptors2GPU);
-
- surf.downloadKeypoints(keypoints1GPU, keypoints1);
- surf.downloadKeypoints(keypoints2GPU, keypoints2);
-
-
-#ifdef USE_CPU_BFMATCHER
- //BFMatcher
- BFMatcher matcher(cv::NORM_L2);
- matcher.match(Mat(descriptors1GPU), Mat(descriptors2GPU), matches);
-#else
- BruteForceMatcher_OCL_base matcher(BruteForceMatcher_OCL_base::L2Dist);
- matcher.match(descriptors1GPU, descriptors2GPU, matches);
-#endif
-
-#else
- surf(img1, oclMat(), keypoints1GPU);
- surf(img2, oclMat(), keypoints2GPU);
- surf.downloadKeypoints(keypoints1GPU, keypoints1);
- surf.downloadKeypoints(keypoints2GPU, keypoints2);
-
- // use SURF_OCL to detect keypoints and use SURF to extract descriptors
- SURF surf_cpu;
- Mat descriptors1, descriptors2;
- surf_cpu(cpu_img1, Mat(), keypoints1, descriptors1, true);
- surf_cpu(cpu_img2, Mat(), keypoints2, descriptors2, true);
- matcher.match(descriptors1, descriptors2, matches);
-#endif
- cout << "OCL: FOUND " << keypoints1GPU.cols << " keypoints on first image" << endl;
- cout << "OCL: FOUND " << keypoints2GPU.cols << " keypoints on second image" << endl;
-
- double max_dist = 0; double min_dist = 100;
- //-- Quick calculation of max and min distances between keypoints
- for( size_t i = 0; i < keypoints1.size(); i++ )
+template<class KPMatcher>
+struct SURFMatcher
+{
+ KPMatcher matcher;
+ template<class T>
+ void match(const T& in1, const T& in2, vector<cv::DMatch>& matches)
{
- double dist = matches[i].distance;
- if( dist < min_dist ) min_dist = dist;
- if( dist > max_dist ) max_dist = dist;
+ matcher.match(in1, in2, matches);
}
+};
- printf("-- Max dist : %f \n", max_dist );
- printf("-- Min dist : %f \n", min_dist );
-
- //-- Draw only "good" matches (i.e. whose distance is less than 2.5*min_dist )
+Mat drawGoodMatches(
+ const Mat& cpu_img1,
+ const Mat& cpu_img2,
+ const vector<KeyPoint>& keypoints1,
+ const vector<KeyPoint>& keypoints2,
+ vector<DMatch>& matches,
+ vector<Point2f>& scene_corners_
+ )
+{
+ //-- Sort matches and preserve top 10% matches
+ std::sort(matches.begin(), matches.end());
std::vector< DMatch > good_matches;
+ double minDist = matches.front().distance,
+ maxDist = matches.back().distance;
- for( size_t i = 0; i < keypoints1.size(); i++ )
+ const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION));
+ for( int i = 0; i < ptsPairs; i++ )
{
- if( matches[i].distance < 3*min_dist )
- {
- good_matches.push_back( matches[i]);
- }
+ good_matches.push_back( matches[i] );
}
+ std::cout << "\nMax distance: " << maxDist << std::endl;
+ std::cout << "Min distance: " << minDist << std::endl;
+
+ std::cout << "Calculating homography using " << ptsPairs << " point pairs." << std::endl;
// drawing the results
Mat img_matches;
drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2,
good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
- vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
+ vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
//-- Localize the object
std::vector<Point2f> obj;
obj.push_back( keypoints1[ good_matches[i].queryIdx ].pt );
scene.push_back( keypoints2[ good_matches[i].trainIdx ].pt );
}
- Mat H = findHomography( obj, scene, CV_RANSAC );
-
//-- Get the corners from the image_1 ( the object to be "detected" )
std::vector<Point2f> obj_corners(4);
obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( cpu_img1.cols, 0 );
obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = cvPoint( 0, cpu_img1.rows );
std::vector<Point2f> scene_corners(4);
-
+
+ Mat H = findHomography( obj, scene, CV_RANSAC );
perspectiveTransform( obj_corners, scene_corners, H);
+ scene_corners_ = scene_corners;
+
//-- Draw lines between the corners (the mapped object in the scene - image_2 )
- line( img_matches, scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
- line( img_matches, scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
- line( img_matches, scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
- line( img_matches, scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 );
+ line( img_matches,
+ scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0),
+ Scalar( 0, 255, 0), 2, CV_AA );
+ line( img_matches,
+ scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0),
+ Scalar( 0, 255, 0), 2, CV_AA );
+ line( img_matches,
+ scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0),
+ Scalar( 0, 255, 0), 2, CV_AA );
+ line( img_matches,
+ scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0),
+ Scalar( 0, 255, 0), 2, CV_AA );
+ return img_matches;
+}
+
+}
+////////////////////////////////////////////////////
+// This program demonstrates the usage of SURF_OCL.
+// use cpu findHomography interface to calculate the transformation matrix
+int main(int argc, char* argv[])
+{
+ vector<cv::ocl::Info> info;
+ if(cv::ocl::getDevice(info) == 0)
+ {
+ std::cout << "Error: Did not find a valid OpenCL device!" << std::endl;
+ return -1;
+ }
+ ocl::setDevice(info[0]);
+
+ Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey;
+ oclMat img1, img2;
+ bool useCPU = false;
+ bool useGPU = false;
+ bool useALL = false;
+
+ for (int i = 1; i < argc; ++i)
+ {
+ if (string(argv[i]) == "--left")
+ {
+ cpu_img1 = imread(argv[++i]);
+ CV_Assert(!cpu_img1.empty());
+ cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY);
+ img1 = cpu_img1_grey;
+ }
+ else if (string(argv[i]) == "--right")
+ {
+ cpu_img2 = imread(argv[++i]);
+ CV_Assert(!cpu_img2.empty());
+ cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY);
+ img2 = cpu_img2_grey;
+ }
+ else if (string(argv[i]) == "-c")
+ {
+ useCPU = true;
+ useGPU = false;
+ useALL = false;
+ }else if(string(argv[i]) == "-g")
+ {
+ useGPU = true;
+ useCPU = false;
+ useALL = false;
+ }else if(string(argv[i]) == "-a")
+ {
+ useALL = true;
+ useCPU = false;
+ useGPU = false;
+ }
+ else if (string(argv[i]) == "--help")
+ {
+ help();
+ return -1;
+ }
+ }
+ if(!useCPU)
+ {
+ std::cout
+ << "Device name:"
+ << info[0].DeviceName[0]
+ << std::endl;
+ }
+ double surf_time = 0.;
+
+ //declare input/output
+ vector<KeyPoint> keypoints1, keypoints2;
+ vector<DMatch> matches;
+
+ vector<KeyPoint> gpu_keypoints1;
+ vector<KeyPoint> gpu_keypoints2;
+ vector<DMatch> gpu_matches;
+
+ Mat descriptors1CPU, descriptors2CPU;
+
+ oclMat keypoints1GPU, keypoints2GPU;
+ oclMat descriptors1GPU, descriptors2GPU;
+
+ //instantiate detectors/matchers
+ SURFDetector<SURF> cpp_surf;
+ SURFDetector<SURF_OCL> ocl_surf;
+
+ SURFMatcher<BFMatcher> cpp_matcher;
+ SURFMatcher<BFMatcher_OCL> ocl_matcher;
+
+ //-- start of timing section
+ if (useCPU)
+ {
+ for (int i = 0; i <= LOOP_NUM; i++)
+ {
+ if(i == 1) workBegin();
+ cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU);
+ cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU);
+ cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches);
+ }
+ workEnd();
+ std::cout << "CPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+ std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+ surf_time = getTime();
+ std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+ }
+ else if(useGPU)
+ {
+ for (int i = 0; i <= LOOP_NUM; i++)
+ {
+ if(i == 1) workBegin();
+ ocl_surf(img1, oclMat(), keypoints1, descriptors1GPU);
+ ocl_surf(img2, oclMat(), keypoints2, descriptors2GPU);
+ ocl_matcher.match(descriptors1GPU, descriptors2GPU, matches);
+ }
+ workEnd();
+ std::cout << "OCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+ std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+ surf_time = getTime();
+ std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+ }else
+ {
+ //cpu runs
+ for (int i = 0; i <= LOOP_NUM; i++)
+ {
+ if(i == 1) workBegin();
+ cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU);
+ cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU);
+ cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches);
+ }
+ workEnd();
+ std::cout << "\nCPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+ std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+ surf_time = getTime();
+ std::cout << "(CPP)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl;
+
+ //gpu runs
+ for (int i = 0; i <= LOOP_NUM; i++)
+ {
+ if(i == 1) workBegin();
+ ocl_surf(img1, oclMat(), gpu_keypoints1, descriptors1GPU);
+ ocl_surf(img2, oclMat(), gpu_keypoints2, descriptors2GPU);
+ ocl_matcher.match(descriptors1GPU, descriptors2GPU, gpu_matches);
+ }
+ workEnd();
+ std::cout << "\nOCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl;
+ std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl;
+
+ surf_time = getTime();
+ std::cout << "(OCL)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n";
+
+ }
+
+ //--------------------------------------------------------------------------
+ std::vector<Point2f> cpu_corner;
+ Mat img_matches = drawGoodMatches(cpu_img1, cpu_img2, keypoints1, keypoints2, matches, cpu_corner);
+
+ std::vector<Point2f> gpu_corner;
+ Mat ocl_img_matches;
+ if(useALL || (!useCPU&&!useGPU))
+ {
+ ocl_img_matches = drawGoodMatches(cpu_img1, cpu_img2, gpu_keypoints1, gpu_keypoints2, gpu_matches, gpu_corner);
+
+ //check accuracy
+ std::cout<<"\nCheck accuracy:\n";
+
+ if(cpu_corner.size()!=gpu_corner.size())
+ std::cout<<"Failed\n";
+ else
+ {
+ bool result = false;
+ for(size_t i = 0; i < cpu_corner.size(); i++)
+ {
+ if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10)
+ ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10))
+ {
+ std::cout<<"Failed\n";
+ result = false;
+ break;
+ }
+ result = true;
+ }
+ if(result)
+ std::cout<<"Passed\n";
+ }
+ }
//-- Show detected matches
- namedWindow("ocl surf matches", 0);
- imshow("ocl surf matches", img_matches);
- waitKey(0);
+ if (useCPU)
+ {
+ namedWindow("cpu surf matches", 0);
+ imshow("cpu surf matches", img_matches);
+ }
+ else if(useGPU)
+ {
+ namedWindow("ocl surf matches", 0);
+ imshow("ocl surf matches", img_matches);
+ }else
+ {
+ namedWindow("cpu surf matches", 0);
+ imshow("cpu surf matches", img_matches);
+ namedWindow("ocl surf matches", 0);
+ imshow("ocl surf matches", ocl_img_matches);
+ }
+ waitKey(0);
return 0;
}
--- /dev/null
+#!/usr/bin/env python
+'''
+===============================================================================
+Interactive Image Segmentation using GrabCut algorithm.
+
+This sample shows interactive image segmentation using grabcut algorithm.
+
+USAGE :
+ python grabcut.py <filename>
+
+README FIRST:
+ Two windows will show up, one for input and one for output.
+
+ At first, in input window, draw a rectangle around the object using
+mouse right button. Then press 'n' to segment the object (once or a few times)
+For any finer touch-ups, you can press any of the keys below and draw lines on
+the areas you want. Then again press 'n' for updating the output.
+
+Key '0' - To select areas of sure background
+Key '1' - To select areas of sure foreground
+Key '2' - To select areas of probable background
+Key '3' - To select areas of probable foreground
+
+Key 'n' - To update the segmentation
+Key 'r' - To reset the setup
+Key 's' - To save the results
+===============================================================================
+'''
+
+import numpy as np
+import cv2
+import sys
+
+BLUE = [255,0,0] # rectangle color
+RED = [0,0,255] # PR BG
+GREEN = [0,255,0] # PR FG
+BLACK = [0,0,0] # sure BG
+WHITE = [255,255,255] # sure FG
+
+DRAW_BG = {'color' : BLACK, 'val' : 0}
+DRAW_FG = {'color' : WHITE, 'val' : 1}
+DRAW_PR_FG = {'color' : GREEN, 'val' : 3}
+DRAW_PR_BG = {'color' : RED, 'val' : 2}
+
+# setting up flags
+rect = (0,0,1,1)
+drawing = False # flag for drawing curves
+rectangle = False # flag for drawing rect
+rect_over = False # flag to check if rect drawn
+rect_or_mask = 100 # flag for selecting rect or mask mode
+value = DRAW_FG # drawing initialized to FG
+thickness = 3 # brush thickness
+
+def onmouse(event,x,y,flags,param):
+ global img,img2,drawing,value,mask,rectangle,rect,rect_or_mask,ix,iy,rect_over
+
+ # Draw Rectangle
+ if event == cv2.EVENT_RBUTTONDOWN:
+ rectangle = True
+ ix,iy = x,y
+
+ elif event == cv2.EVENT_MOUSEMOVE:
+ if rectangle == True:
+ img = img2.copy()
+ cv2.rectangle(img,(ix,iy),(x,y),BLUE,2)
+ rect = (ix,iy,abs(ix-x),abs(iy-y))
+ rect_or_mask = 0
+
+ elif event == cv2.EVENT_RBUTTONUP:
+ rectangle = False
+ rect_over = True
+ cv2.rectangle(img,(ix,iy),(x,y),BLUE,2)
+ rect = (ix,iy,abs(ix-x),abs(iy-y))
+ rect_or_mask = 0
+ print " Now press the key 'n' a few times until no further change \n"
+
+ # draw touchup curves
+
+ if event == cv2.EVENT_LBUTTONDOWN:
+ if rect_over == False:
+ print "first draw rectangle \n"
+ else:
+ drawing = True
+ cv2.circle(img,(x,y),thickness,value['color'],-1)
+ cv2.circle(mask,(x,y),thickness,value['val'],-1)
+
+ elif event == cv2.EVENT_MOUSEMOVE:
+ if drawing == True:
+ cv2.circle(img,(x,y),thickness,value['color'],-1)
+ cv2.circle(mask,(x,y),thickness,value['val'],-1)
+
+ elif event == cv2.EVENT_LBUTTONUP:
+ if drawing == True:
+ drawing = False
+ cv2.circle(img,(x,y),thickness,value['color'],-1)
+ cv2.circle(mask,(x,y),thickness,value['val'],-1)
+
+# print documentation
+print __doc__
+
+# Loading images
+if len(sys.argv) == 2:
+ filename = sys.argv[1] # for drawing purposes
+else:
+ print "No input image given, so loading default image, lena.jpg \n"
+ print "Correct Usage : python grabcut.py <filename> \n"
+ filename = '../cpp/lena.jpg'
+
+img = cv2.imread(filename)
+img2 = img.copy() # a copy of original image
+mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG
+output = np.zeros(img.shape,np.uint8) # output image to be shown
+
+# input and output windows
+cv2.namedWindow('output')
+cv2.namedWindow('input')
+cv2.setMouseCallback('input',onmouse)
+cv2.moveWindow('input',img.shape[1]+10,90)
+
+print " Instructions : \n"
+print " Draw a rectangle around the object using right mouse button \n"
+
+while(1):
+
+ cv2.imshow('output',output)
+ cv2.imshow('input',img)
+ k = 0xFF & cv2.waitKey(1)
+
+ # key bindings
+ if k == 27: # esc to exit
+ break
+ elif k == ord('0'): # BG drawing
+ print " mark background regions with left mouse button \n"
+ value = DRAW_BG
+ elif k == ord('1'): # FG drawing
+ print " mark foreground regions with left mouse button \n"
+ value = DRAW_FG
+ elif k == ord('2'): # PR_BG drawing
+ value = DRAW_PR_BG
+ elif k == ord('3'): # PR_FG drawing
+ value = DRAW_PR_FG
+ elif k == ord('s'): # save image
+ bar = np.zeros((img.shape[0],5,3),np.uint8)
+ res = np.hstack((img2,bar,img,bar,output))
+ cv2.imwrite('grabcut_output.png',res)
+ print " Result saved as image \n"
+ elif k == ord('r'): # reset everything
+ print "resetting \n"
+ rect = (0,0,1,1)
+ drawing = False
+ rectangle = False
+ rect_or_mask = 100
+ rect_over = False
+ value = DRAW_FG
+ img = img2.copy()
+ mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG
+ output = np.zeros(img.shape,np.uint8) # output image to be shown
+ elif k == ord('n'): # segment the image
+ print """ For finer touchups, mark foreground and background after pressing keys 0-3
+ and again press 'n' \n"""
+ if (rect_or_mask == 0): # grabcut with rect
+ bgdmodel = np.zeros((1,65),np.float64)
+ fgdmodel = np.zeros((1,65),np.float64)
+ cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_RECT)
+ rect_or_mask = 1
+ elif rect_or_mask == 1: # grabcut with mask
+ bgdmodel = np.zeros((1,65),np.float64)
+ fgdmodel = np.zeros((1,65),np.float64)
+ cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_MASK)
+
+ mask2 = np.where((mask==1) + (mask==3),255,0).astype('uint8')
+ output = cv2.bitwise_and(img2,img2,mask=mask2)
+
+cv2.destroyAllWindows()