From: Alexander Alekhin Date: Wed, 22 Mar 2017 11:46:34 +0000 (+0300) Subject: core: CPU target dispatcher update X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~1096^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=17e5e4cd5af61a32198f11f768d4bd1d1c47a277;p=platform%2Fupstream%2Fopencv.git core: CPU target dispatcher update - use suffixes like '.avx.cpp' - added CMake-generated files for '.simd.hpp' optimization approach - wrap HAL intrinsic headers into separate namespaces for different build flags - automatic vzeroupper insertion (via CV_INSTRUMENT_REGION macro) --- diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index b849f02..9b48dc8 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -275,6 +275,11 @@ set(CPU_BASELINE_FLAGS "") set(CPU_BASELINE_FINAL "") set(CPU_DISPATCH_FINAL "") +if(CV_DISABLE_OPTIMIZATION) + set(CPU_DISPATCH "") + set(CPU_DISPATCH_REQUIRE "") +endif() + macro(ocv_check_compiler_optimization OPT) if(NOT DEFINED CPU_${OPT}_SUPPORTED) if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE) @@ -319,7 +324,7 @@ macro(ocv_check_compiler_optimization OPT) endmacro() foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) - set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE) + set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "") if(NOT DEFINED CPU_${OPT}_FORCE) set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}") endif() @@ -515,15 +520,27 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T endforeach() foreach(fname ${${SOURCES_VAR_NAME}}) string(TOLOWER "${fname}" fname_LOWER) - if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$") - if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS) - message(STATUS "Excluding from source files list: ${fname}") + if(fname_LOWER MATCHES "\\.(.*)\\.cpp$") + string(TOUPPER "${CMAKE_MATCH_1}" OPT_) + if(OPT_ MATCHES "(CUDA.*|DISPATCH.*|OCL)") # don't touch files like filename.cuda.cpp + list(APPEND __result "${fname}") + #continue() + elseif(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS) + message(STATUS "Excluding from source files list (optimization is disabled): ${fname}") #continue() else() + get_source_file_property(__definitions "${fname}" COMPILE_DEFINITIONS) + if(__definitions) + list(APPEND __definitions "CV_CPU_DISPATCH_MODE=${OPT_}") + else() + set(__definitions "CV_CPU_DISPATCH_MODE=${OPT_}") + endif() + set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${__definitions}") + set(__opt_found 0) foreach(OPT ${CPU_BASELINE_FINAL}) string(TOLOWER "${OPT}" OPT_LOWER) - if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$") + if(fname_LOWER MATCHES "\\.${OPT_LOWER}\\.cpp$") #message("${fname} BASELINE-${OPT}") set(__opt_found 1) list(APPEND __result "${fname}") @@ -533,11 +550,11 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T foreach(OPT ${CPU_DISPATCH_FINAL}) foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED}) string(TOLOWER "${OPT2}" OPT2_LOWER) - if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$") + if(fname_LOWER MATCHES "\\.${OPT2_LOWER}\\.cpp$") list(APPEND __result_${OPT} "${fname}") math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1") set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE) -#message("${fname} ${OPT}") +#message("(${CPU_${OPT}_USAGE_COUNT})${fname} ${OPT}") #message(" ${CPU_DISPATCH_${OPT}_INCLUDED}") #message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}") #message(" ${CPU_DISPATCH_FLAGS_${OPT}}") @@ -573,7 +590,13 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T list(APPEND __result "$") else() foreach(fname ${__result_${OPT}}) - set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}") + get_source_file_property(__definitions "${fname}" COMPILE_DEFINITIONS) + if(__definitions) + list(APPEND __definitions "${CPU_DISPATCH_DEFINITIONS_${OPT}}") + else() + set(__definitions "${CPU_DISPATCH_DEFINITIONS_${OPT}}") + endif() + set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${__definitions}") set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}") endforeach() list(APPEND __result ${__result_${OPT}}) @@ -620,18 +643,25 @@ macro(ocv_compiler_optimization_fill_cpu_config) set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE} #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT} # define CV_CPU_HAS_SUPPORT_${OPT} 1 -# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__ +# define CV_CPU_CALL_${OPT}(fn, args) return (opt_${OPT}::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT} # define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT})) -# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__ +# define CV_CPU_CALL_${OPT}(fn, args) if (CV_CPU_HAS_SUPPORT_${OPT}) return (opt_${OPT}::fn args) #else # define CV_CPU_HAS_SUPPORT_${OPT} 0 -# define CV_CPU_CALL_${OPT}(...) +# define CV_CPU_CALL_${OPT}(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_${OPT}(fn, args, mode, ...) CV_CPU_CALL_${OPT}(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) ") endif() endforeach() + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE} +#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args) +#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */ +") + + set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h") if(EXISTS "${__file}") file(READ "${__file}" __content) @@ -644,6 +674,57 @@ macro(ocv_compiler_optimization_fill_cpu_config) endif() endmacro() +macro(ocv_add_dispatched_file filename) + if(NOT OPENCV_INITIAL_PASS) + set(__codestr " +#include \"precomp.hpp\" +#include \"${filename}.simd.hpp\" +") + + set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${filename}.simd.hpp\"") + set(__dispatch_modes "BASELINE") + + set(__optimizations "${ARGN}") + if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS) + set(__optimizations "") + endif() + + foreach(OPT ${__optimizations}) + string(TOLOWER "${OPT}" OPT_LOWER) + set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.${OPT_LOWER}.cpp") + if(EXISTS "${__file}") + file(READ "${__file}" __content) + endif() + if(__content STREQUAL __codestr) + #message(STATUS "${__file} contains up-to-date content") + else() + file(WRITE "${__file}" "${__codestr}") + endif() + list(APPEND OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED "${__file}") + + set(__declarations_str "${__declarations_str} +#define CV_CPU_DISPATCH_MODE ${OPT} +#include \"opencv2/core/private/cv_cpu_include_simd_declarations.hpp\" +") + set(__dispatch_modes "${OPT}, ${__dispatch_modes}") + endforeach() + + set(__declarations_str "${__declarations_str} +#define CV_CPU_DISPATCH_MODES_ALL ${__dispatch_modes} +") + + set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.simd_declarations.hpp") + if(EXISTS "${__file}") + file(READ "${__file}" __content) + endif() + if(__content STREQUAL __declarations_str) + #message(STATUS "${__file} contains up-to-date content") + else() + file(WRITE "${__file}" "${__declarations_str}") + endif() + endif() +endmacro() + if(CV_DISABLE_OPTIMIZATION OR CV_ICC) ocv_update(CV_ENABLE_UNROLLED 0) else() diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 10e1f73..2546ac5 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -314,6 +314,7 @@ macro(ocv_glob_modules) set(OPENCV_INITIAL_PASS OFF) if(${BUILD_opencv_world}) foreach(m ${OPENCV_MODULES_BUILD}) + set(the_module "${m}") if("${m}" STREQUAL opencv_world) add_subdirectory("${OPENCV_MODULE_opencv_world_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/world") elseif(NOT OPENCV_MODULE_${m}_IS_PART_OF_WORLD AND NOT ${m} STREQUAL opencv_world) @@ -329,6 +330,7 @@ macro(ocv_glob_modules) endforeach() else() foreach(m ${OPENCV_MODULES_BUILD}) + set(the_module "${m}") if(m MATCHES "^opencv_") string(REGEX REPLACE "^opencv_" "" __shortname "${m}") add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/${__shortname}") @@ -646,11 +648,13 @@ macro(ocv_set_module_sources) ocv_get_module_external_sources() endif() + if(OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED) + list(APPEND OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED}) + endif() + # use full paths for module to be independent from the module location ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS) - ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module}) - set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}") set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}") endmacro() @@ -766,6 +770,11 @@ macro(ocv_create_module) endmacro() macro(_ocv_create_module) + + ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module}) + set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}") + set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}") + # The condition we ought to be testing here is whether ocv_add_precompiled_headers will # be called at some point in the future. We can't look into the future, though, # so this will have to do. diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake index 6a83218..659973a 100644 --- a/cmake/OpenCVPCHSupport.cmake +++ b/cmake/OpenCVPCHSupport.cmake @@ -288,11 +288,12 @@ MACRO(ADD_PRECOMPILED_HEADER _targetName _input) foreach(src ${_sources}) if(NOT "${src}" MATCHES "\\.mm$") get_source_file_property(oldProps "${src}" COMPILE_FLAGS) - if(NOT oldProps) + get_source_file_property(oldProps2 "${src}" COMPILE_DEFINITIONS) + if(NOT oldProps AND NOT oldProps2) set(newProperties "-include \"${CMAKE_CURRENT_BINARY_DIR}/${_name}\"") set_source_files_properties("${src}" PROPERTIES COMPILE_FLAGS "${newProperties}") else() - ocv_debug_message("Skip PCH, flags: ${oldProps} , file: ${src}") + ocv_debug_message("Skip PCH, flags: ${oldProps} defines: ${oldProps2}, file: ${src}") endif() endif() endforeach() @@ -339,11 +340,12 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input) AND NOT "${src}" MATCHES "^\$" # CMake generator expressions ) get_source_file_property(oldProps "${src}" COMPILE_FLAGS) - if(NOT oldProps) + get_source_file_property(oldProps2 "${src}" COMPILE_DEFINITIONS) + if(NOT oldProps AND NOT oldProps2) set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"") set_source_files_properties("${src}" PROPERTIES COMPILE_FLAGS "${newProperties}") else() - ocv_debug_message("Skip PCH, flags: ${oldProps} , file: ${src}") + ocv_debug_message("Skip PCH, flags: ${oldProps} defines: ${oldProps2}, file: ${src}") endif() endif() endforeach() diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index 9a8537f..aaabea3 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -7,6 +7,23 @@ #include "cv_cpu_config.h" #include "cv_cpu_helper.h" +#ifdef CV_CPU_DISPATCH_MODE +#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#else +#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } +#endif + + +#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */ +#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__)) +#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros + + #if defined CV_ENABLE_INTRINSICS \ && !defined CV_DISABLE_OPTIMIZATION \ && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \ @@ -76,6 +93,16 @@ #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ +#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX +struct VZeroUpperGuard { +#ifdef __GNUC__ + __attribute__((always_inline)) +#endif + inline ~VZeroUpperGuard() { _mm256_zeroupper(); } +}; +#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; +#endif + #endif // __OPENCV_BUILD diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h index cb755d6..8bd0457 100644 --- a/modules/core/include/opencv2/core/cv_cpu_helper.h +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h @@ -2,132 +2,147 @@ #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE # define CV_CPU_HAS_SUPPORT_SSE 1 -# define CV_CPU_CALL_SSE(...) return __VA_ARGS__ +# define CV_CPU_CALL_SSE(fn, args) return (opt_SSE::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE # define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE)) -# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__ +# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args) #else # define CV_CPU_HAS_SUPPORT_SSE 0 -# define CV_CPU_CALL_SSE(...) +# define CV_CPU_CALL_SSE(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2 # define CV_CPU_HAS_SUPPORT_SSE2 1 -# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__ +# define CV_CPU_CALL_SSE2(fn, args) return (opt_SSE2::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2 # define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2)) -# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__ +# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args) #else # define CV_CPU_HAS_SUPPORT_SSE2 0 -# define CV_CPU_CALL_SSE2(...) +# define CV_CPU_CALL_SSE2(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3 # define CV_CPU_HAS_SUPPORT_SSE3 1 -# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__ +# define CV_CPU_CALL_SSE3(fn, args) return (opt_SSE3::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3 # define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3)) -# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__ +# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args) #else # define CV_CPU_HAS_SUPPORT_SSE3 0 -# define CV_CPU_CALL_SSE3(...) +# define CV_CPU_CALL_SSE3(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3 # define CV_CPU_HAS_SUPPORT_SSSE3 1 -# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__ +# define CV_CPU_CALL_SSSE3(fn, args) return (opt_SSSE3::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3 # define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3)) -# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__ +# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args) #else # define CV_CPU_HAS_SUPPORT_SSSE3 0 -# define CV_CPU_CALL_SSSE3(...) +# define CV_CPU_CALL_SSSE3(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1 # define CV_CPU_HAS_SUPPORT_SSE4_1 1 -# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__ +# define CV_CPU_CALL_SSE4_1(fn, args) return (opt_SSE4_1::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1 # define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1)) -# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__ +# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args) #else # define CV_CPU_HAS_SUPPORT_SSE4_1 0 -# define CV_CPU_CALL_SSE4_1(...) +# define CV_CPU_CALL_SSE4_1(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2 # define CV_CPU_HAS_SUPPORT_SSE4_2 1 -# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__ +# define CV_CPU_CALL_SSE4_2(fn, args) return (opt_SSE4_2::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2 # define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2)) -# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__ +# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args) #else # define CV_CPU_HAS_SUPPORT_SSE4_2 0 -# define CV_CPU_CALL_SSE4_2(...) +# define CV_CPU_CALL_SSE4_2(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT # define CV_CPU_HAS_SUPPORT_POPCNT 1 -# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__ +# define CV_CPU_CALL_POPCNT(fn, args) return (opt_POPCNT::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT # define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT)) -# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__ +# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args) #else # define CV_CPU_HAS_SUPPORT_POPCNT 0 -# define CV_CPU_CALL_POPCNT(...) +# define CV_CPU_CALL_POPCNT(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX # define CV_CPU_HAS_SUPPORT_AVX 1 -# define CV_CPU_CALL_AVX(...) return __VA_ARGS__ +# define CV_CPU_CALL_AVX(fn, args) return (opt_AVX::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX # define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX)) -# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__ +# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args) #else # define CV_CPU_HAS_SUPPORT_AVX 0 -# define CV_CPU_CALL_AVX(...) +# define CV_CPU_CALL_AVX(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16 # define CV_CPU_HAS_SUPPORT_FP16 1 -# define CV_CPU_CALL_FP16(...) return __VA_ARGS__ +# define CV_CPU_CALL_FP16(fn, args) return (opt_FP16::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16 # define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16)) -# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__ +# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args) #else # define CV_CPU_HAS_SUPPORT_FP16 0 -# define CV_CPU_CALL_FP16(...) +# define CV_CPU_CALL_FP16(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2 # define CV_CPU_HAS_SUPPORT_AVX2 1 -# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__ +# define CV_CPU_CALL_AVX2(fn, args) return (opt_AVX2::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2 # define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2)) -# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__ +# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args) #else # define CV_CPU_HAS_SUPPORT_AVX2 0 -# define CV_CPU_CALL_AVX2(...) +# define CV_CPU_CALL_AVX2(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3 # define CV_CPU_HAS_SUPPORT_FMA3 1 -# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__ +# define CV_CPU_CALL_FMA3(fn, args) return (opt_FMA3::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3 # define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3)) -# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__ +# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args) #else # define CV_CPU_HAS_SUPPORT_FMA3 0 -# define CV_CPU_CALL_FMA3(...) +# define CV_CPU_CALL_FMA3(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON # define CV_CPU_HAS_SUPPORT_NEON 1 -# define CV_CPU_CALL_NEON(...) return __VA_ARGS__ +# define CV_CPU_CALL_NEON(fn, args) return (opt_NEON::fn args) #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON # define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON)) -# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__ +# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args) #else # define CV_CPU_HAS_SUPPORT_NEON 0 -# define CV_CPU_CALL_NEON(...) +# define CV_CPU_CALL_NEON(fn, args) #endif +#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args) +#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */ diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index f2212b4..43f8a02 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -52,6 +52,17 @@ #include "cvconfig.h" #endif +#ifndef __CV_EXPAND +#define __CV_EXPAND(x) x +#endif + +#ifndef __CV_CAT +#define __CV_CAT__(x, y) x ## y +#define __CV_CAT_(x, y) __CV_CAT__(x, y) +#define __CV_CAT(x, y) __CV_CAT_(x, y) +#endif + + #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 # define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ #endif diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 34075e3..9dd1514 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -60,6 +60,25 @@ // access from within opencv code more accessible namespace cv { +#ifndef CV_DOXYGEN + +#ifdef CV_CPU_DISPATCH_MODE +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) { +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#else +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline { +#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END } +#endif + + +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif + //! @addtogroup core_hal_intrin //! @{ @@ -281,6 +300,9 @@ template struct V_SIMD128Traits //! @} +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif } #ifdef CV_DOXYGEN @@ -323,6 +345,10 @@ template struct V_SIMD128Traits namespace cv { +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif + template struct V_RegTrait128; template <> struct V_RegTrait128 { @@ -407,6 +433,10 @@ template <> struct V_RegTrait128 { }; #endif +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif + } // cv:: //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index e15c97d..e8c1664 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -53,6 +53,10 @@ namespace cv { +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#endif + /** @addtogroup core_hal_intrin "Universal intrinsics" is a types and functions set intended to simplify vectorization of code on @@ -1827,7 +1831,9 @@ static inline bool hasSIMD128() //! @} - +#ifndef CV_DOXYGEN +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END +#endif } #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 2bcff2b..c7f4e90 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -53,6 +53,8 @@ namespace cv //! @cond IGNORED +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + #define CV_SIMD128 1 #if defined(__aarch64__) #define CV_SIMD128_64F 1 @@ -1238,11 +1240,13 @@ inline v_float16x4 v_cvt_f16(const v_float32x4& a) //! @brief Check CPU capability of SIMD operation static inline bool hasSIMD128() { - return checkHardwareSupport(CV_CPU_NEON); + return (CV_CPU_HAS_SUPPORT_NEON) ? true : false; } //! @} +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + //! @endcond } diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 6000308..5b9a27f 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -56,6 +56,8 @@ namespace cv //! @cond IGNORED +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN + struct v_uint8x16 { typedef uchar lane_type; @@ -1791,11 +1793,13 @@ inline v_float16x4 v_cvt_f16(const v_float32x4& a) //! @brief Check CPU capability of SIMD operation static inline bool hasSIMD128() { - return checkHardwareSupport(CV_CPU_SSE2); + return (CV_CPU_HAS_SUPPORT_SSE2) ? true : false; } //! @} +CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END + //! @endcond } diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index e428ecf..afbc557 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -540,7 +540,7 @@ CV_EXPORTS InstrNode* getCurrentNode(); ///// General instrumentation // General OpenCV region instrumentation macro -#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN) +#define CV_INSTRUMENT_REGION_() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN) // Custom OpenCV region instrumentation macro #define CV_INSTRUMENT_REGION_NAME(NAME) CV_INSTRUMENT_REGION_CUSTOM_META(NAME, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN) // Instrumentation for parallel_for_ or other regions which forks and gathers threads @@ -566,7 +566,7 @@ CV_EXPORTS InstrNode* getCurrentNode(); #else #define CV_INSTRUMENT_REGION_META(...) -#define CV_INSTRUMENT_REGION() +#define CV_INSTRUMENT_REGION_() #define CV_INSTRUMENT_REGION_NAME(...) #define CV_INSTRUMENT_REGION_MT_FORK() @@ -580,6 +580,12 @@ CV_EXPORTS InstrNode* getCurrentNode(); #define CV_INSTRUMENT_MARK_OPENCL(...) #endif +#ifdef __CV_AVX_GUARD +#define CV_INSTRUMENT_REGION() __CV_AVX_GUARD CV_INSTRUMENT_REGION_() +#else +#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_() +#endif + //! @endcond #endif // OPENCV_CORE_PRIVATE_HPP diff --git a/modules/core/include/opencv2/core/private/cv_cpu_include_simd_declarations.hpp b/modules/core/include/opencv2/core/private/cv_cpu_include_simd_declarations.hpp new file mode 100644 index 0000000..37d06ef --- /dev/null +++ b/modules/core/include/opencv2/core/private/cv_cpu_include_simd_declarations.hpp @@ -0,0 +1,30 @@ +// Helper file to include dispatched functions declaration: +// +// Usage: +// #define CV_CPU_SIMD_FILENAME ".simd.hpp" +// #define CV_CPU_DISPATCH_MODE AVX2 +// #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" +// #define CV_CPU_DISPATCH_MODE SSE2 +// #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" + +#ifndef CV_DISABLE_OPTIMIZATION +#ifdef _MSC_VER +#pragma warning(disable: 4702) // unreachable code +#endif +#endif + +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +#endif + +#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +#undef CV_CPU_OPTIMIZATION_NAMESPACE_END + +#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) { +#define CV_CPU_OPTIMIZATION_NAMESPACE_END } + +#include CV_CPU_SIMD_FILENAME + +#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +#undef CV_CPU_OPTIMIZATION_NAMESPACE_END +#undef CV_CPU_DISPATCH_MODE diff --git a/modules/world/CMakeLists.txt b/modules/world/CMakeLists.txt index dde793f..1152cd3 100644 --- a/modules/world/CMakeLists.txt +++ b/modules/world/CMakeLists.txt @@ -24,6 +24,7 @@ if(NOT OPENCV_INITIAL_PASS) message(STATUS "Processing WORLD modules...") foreach(m ${OPENCV_MODULES_BUILD}) + set(the_module ${m}) if(OPENCV_MODULE_${m}_IS_PART_OF_WORLD) message(STATUS " module ${m}...") set(CMAKE_CURRENT_SOURCE_DIR "${OPENCV_MODULE_${m}_LOCATION}")