From: Alexander Alekhin Date: Thu, 25 May 2017 15:59:01 +0000 (+0300) Subject: trace: initial support for code trace X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~911^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=006966e6295f44e48ade066d4eb56826439680fb;p=platform%2Fupstream%2Fopencv.git trace: initial support for code trace --- diff --git a/3rdparty/ittnotify/CMakeLists.txt b/3rdparty/ittnotify/CMakeLists.txt index 0b2e859..a164d8a 100644 --- a/3rdparty/ittnotify/CMakeLists.txt +++ b/3rdparty/ittnotify/CMakeLists.txt @@ -8,6 +8,13 @@ if(NOT ITT_LIBRARY) endif() project(${ITT_LIBRARY} C) +if(NOT WIN32) + include(CheckLibraryExists) + if(COMMAND CHECK_LIBRARY_EXISTS) + CHECK_LIBRARY_EXISTS(dl dlerror "" HAVE_DL_LIBRARY) + endif() +endif() + ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include") set(ITT_INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include") @@ -30,6 +37,12 @@ set(ITT_SRCS add_library(${ITT_LIBRARY} STATIC ${ITT_SRCS} ${ITT_PUBLIC_HDRS} ${ITT_PRIVATE_HDRS}) +if(NOT WIN32) + if(HAVE_DL_LIBRARY) + target_link_libraries(${ITT_LIBRARY} dl) + endif() +endif() + if(UNIX) if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") diff --git a/CMakeLists.txt b/CMakeLists.txt index d4fe1c3..1dbe5d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -264,6 +264,7 @@ OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF IF (NOT ANDROID AND NOT IOS AND NOT WINRT) ) OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" ON IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_LAPACK "Include Lapack library support" ON IF (NOT ANDROID AND NOT IOS) ) +OCV_OPTION(WITH_ITT "Include Intel ITT support" ON IF (NOT APPLE_FRAMEWORK) ) # OpenCV build components # =================================================== @@ -291,6 +292,7 @@ OCV_OPTION(BUILD_PNG "Build libpng from source" WIN32 O OCV_OPTION(BUILD_OPENEXR "Build openexr from source" (WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OCV_OPTION(BUILD_TBB "Download and build TBB from source" ANDROID ) OCV_OPTION(BUILD_IPP_IW "Build IPP IW from source" NOT MINGW IF (X86_64 OR X86) AND NOT WINRT ) +OCV_OPTION(BUILD_ITT "Build Intel ITT from source" NOT MINGW IF (X86_64 OR X86) AND NOT WINRT AND NOT APPLE_FRAMEWORK ) # OpenCV installation options # =================================================== @@ -324,7 +326,7 @@ OCV_OPTION(ENABLE_BUILD_HARDENING "Enable hardening of the resulting binarie OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX) OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON ) OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF ) - +OCV_OPTION(CV_TRACE "Enable OpenCV code trace" ON) if(ENABLE_IMPL_COLLECTION) @@ -733,6 +735,16 @@ if(HAVE_CUDA) endif() endforeach() endif() + + +# ---------------------------------------------------------------------------- +# Code trace support +# ---------------------------------------------------------------------------- +if(CV_TRACE) + include(cmake/OpenCVDetectTrace.cmake) +endif() + + # ---------------------------------------------------------------------------- # Solution folders: # ---------------------------------------------------------------------------- @@ -1278,6 +1290,14 @@ endif() status("") status(" Parallel framework:" TRUE THEN "${CV_PARALLEL_FRAMEWORK}" ELSE NO) +if(CV_TRACE OR OPENCV_TRACE) + set(__msg "") + if(HAVE_ITT) + set(__msg "with Intel ITT") + endif() + status("") + status(" Trace: " OPENCV_TRACE THEN "YES (${__msg})" ELSE NO) +endif() # ========================== Other third-party libraries ========================== status("") diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index f2cdc87..ed66375 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -1,4 +1,6 @@ add_definitions(-D__OPENCV_BUILD=1) +add_definitions(-D__OPENCV_APPS=1) + link_libraries(${OPENCV_LINKER_LIBS}) add_subdirectory(traincascade) diff --git a/apps/version/opencv_version.cpp b/apps/version/opencv_version.cpp index 78f2810..9ad4bac 100644 --- a/apps/version/opencv_version.cpp +++ b/apps/version/opencv_version.cpp @@ -5,9 +5,15 @@ #include #include +#include int main(int argc, const char** argv) { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG(argc); + CV_TRACE_ARG_VALUE(argv0, "argv0", argv[0]); + CV_TRACE_ARG_VALUE(argv1, "argv1", argv[1]); + cv::CommandLineParser parser(argc, argv, "{ help h usage ? | | show this help message }" "{ verbose v | | show build configuration log }" diff --git a/cmake/OpenCVDetectTrace.cmake b/cmake/OpenCVDetectTrace.cmake new file mode 100644 index 0000000..07a8f64 --- /dev/null +++ b/cmake/OpenCVDetectTrace.cmake @@ -0,0 +1,13 @@ +if(WITH_ITT) + if(BUILD_ITT) + add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/ittnotify") + set(ITT_INCLUDE_DIR "${OpenCV_SOURCE_DIR}/3rdparty/ittnotify/include") + set(ITT_INCLUDE_DIRS "${ITT_INCLUDE_DIR}") + set(ITT_LIBRARIES "ittnotify") + set(HAVE_ITT 1) + else() + #TODO + endif() +endif() + +set(OPENCV_TRACE 1) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 9f0e24a..4bcf633 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -683,6 +683,8 @@ macro(ocv_glob_module_sources) "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/*.h" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/hal/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/hal/*.h" + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/utils/*.hpp" + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/utils/*.h" ) file(GLOB lib_hdrs_detail "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/detail/*.hpp" @@ -927,7 +929,7 @@ macro(_ocv_create_module) if(OPENCV_MODULE_${m}_HEADERS AND ";${OPENCV_MODULES_PUBLIC};" MATCHES ";${m};") foreach(hdr ${OPENCV_MODULE_${m}_HEADERS}) string(REGEX REPLACE "^.*opencv2/" "opencv2/" hdr2 "${hdr}") - if(NOT hdr2 MATCHES "opencv2/${m}/private.*" AND hdr2 MATCHES "^(opencv2/?.*)/[^/]+.h(..)?$" ) + if(NOT hdr2 MATCHES "private" AND hdr2 MATCHES "^(opencv2/?.*)/[^/]+.h(..)?$" ) install(FILES ${hdr} OPTIONAL DESTINATION "${OPENCV_INCLUDE_INSTALL_PATH}/${CMAKE_MATCH_1}" COMPONENT dev) endif() endforeach() @@ -1158,6 +1160,8 @@ function(ocv_add_accuracy_tests) RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}" ) + ocv_append_target_property(${the_target} COMPILE_DEFINITIONS "__OPENCV_TESTS=1") + if(ENABLE_SOLUTION_FOLDERS) set_target_properties(${the_target} PROPERTIES FOLDER "tests accuracy") endif() diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 5c5e96e..76bb431 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -241,5 +241,8 @@ #define HAVE_VIDEO_OUTPUT #endif +/* OpenCV trace utilities */ +#cmakedefine OPENCV_TRACE + #endif // OPENCV_CVCONFIG_H_INCLUDED diff --git a/modules/calib3d/test/test_stereomatching.cpp b/modules/calib3d/test/test_stereomatching.cpp index d4f20b1..3806650 100644 --- a/modules/calib3d/test/test_stereomatching.cpp +++ b/modules/calib3d/test/test_stereomatching.cpp @@ -789,8 +789,11 @@ TEST(Calib3d_StereoSGBM_HH4, regression) { String path = cvtest::TS::ptr()->get_data_path() + "cv/stereomatching/datasets/teddy/"; Mat leftImg = imread(path + "im2.png", 0); + ASSERT_FALSE(leftImg.empty()); Mat rightImg = imread(path + "im6.png", 0); + ASSERT_FALSE(rightImg.empty()); Mat testData = imread(path + "disp2_hh4.png",-1); + ASSERT_FALSE(testData.empty()); Mat leftDisp; Mat toCheck; { diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 3e69dda..6de15ba 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -21,6 +21,10 @@ if(HAVE_CUDA) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wenum-compare -Wunused-function -Wshadow) endif() +if(CV_TRACE AND HAVE_ITT AND BUILD_ITT) + add_definitions(-DOPENCV_WITH_ITT=1) +endif() + file(GLOB lib_cuda_hdrs "include/opencv2/${name}/cuda/*.hpp" "include/opencv2/${name}/cuda/*.h") file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "include/opencv2/${name}/cuda/detail/*.h") @@ -37,9 +41,16 @@ if(ANDROID AND HAVE_CPUFEATURES) ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/system.cpp "HAVE_CPUFEATURES=1") ocv_module_include_directories(${CPUFEATURES_INCLUDE_DIRS}) endif() +if(ITT_INCLUDE_DIRS) + ocv_module_include_directories(${ITT_INCLUDE_DIRS}) +endif() ocv_create_module(${extra_libs}) -ocv_target_link_libraries(${the_module} ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" "${VA_LIBRARIES}" "${LAPACK_LIBRARIES}" "${CPUFEATURES_LIBRARIES}" "${HALIDE_LIBRARIES}") +ocv_target_link_libraries(${the_module} + "${ZLIB_LIBRARIES}" "${OPENCL_LIBRARIES}" "${VA_LIBRARIES}" + "${LAPACK_LIBRARIES}" "${CPUFEATURES_LIBRARIES}" "${HALIDE_LIBRARIES}" + "${ITT_LIBRARIES}" +) ocv_add_accuracy_tests() ocv_add_perf_tests() diff --git a/modules/core/include/opencv2/core/cvstd.inl.hpp b/modules/core/include/opencv2/core/cvstd.inl.hpp index 874364e..c8c7ba9 100644 --- a/modules/core/include/opencv2/core/cvstd.inl.hpp +++ b/modules/core/include/opencv2/core/cvstd.inl.hpp @@ -51,6 +51,11 @@ //! @cond IGNORED +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4127 ) +#endif + namespace cv { #ifndef OPENCV_NOSTL @@ -233,14 +238,7 @@ template static inline std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec) { out << "["; -#ifdef _MSC_VER -#pragma warning( push ) -#pragma warning( disable: 4127 ) -#endif if(Vec<_Tp, n>::depth < CV_32F) -#ifdef _MSC_VER -#pragma warning( pop ) -#endif { for (int i = 0; i < n - 1; ++i) { out << (int)vec[i] << ", "; @@ -285,6 +283,10 @@ static inline std::ostream& operator << (std::ostream& out, const MatSize& msize #endif // OPENCV_NOSTL } // cv +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + //! @endcond #endif // OPENCV_CORE_CVSTDINL_HPP diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index cf3b3a0..332accf 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -49,6 +49,11 @@ # error mat.inl.hpp header must be compiled as C++ #endif +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4127 ) +#endif + namespace cv { @@ -3855,4 +3860,8 @@ inline UMatDataAutoLock::~UMatDataAutoLock() { u->unlock(); } } //cv +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + #endif diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index dbe8bb6..1028505 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -51,6 +51,8 @@ #include "opencv2/core.hpp" #include "cvconfig.h" +#include + #ifdef HAVE_EIGEN # if defined __GNUC__ && defined __APPLE__ # pragma GCC diagnostic ignored "-Wshadow" @@ -548,6 +550,7 @@ static struct __IppInitializer__ __ipp_initializer__; { \ if (cv::ipp::useIPP() && (condition)) \ { \ + CV__TRACE_REGION_("IPP:" #func, CV_TRACE_NS::details::REGION_FLAG_IMPL_IPP) \ if(func) \ { \ CV_IMPL_ADD(CV_IMPL_IPP); \ @@ -562,23 +565,21 @@ static struct __IppInitializer__ __ipp_initializer__; } #else #define CV_IPP_RUN_(condition, func, ...) \ - if (cv::ipp::useIPP() && (condition) && (func)) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return __VA_ARGS__; \ - } + if (cv::ipp::useIPP() && (condition)) \ + { \ + CV__TRACE_REGION_("IPP:" #func, CV_TRACE_NS::details::REGION_FLAG_IMPL_IPP) \ + if(func) \ + { \ + CV_IMPL_ADD(CV_IMPL_IPP); \ + return __VA_ARGS__; \ + } \ + } #endif -#define CV_IPP_RUN_FAST(func, ...) \ - if (cv::ipp::useIPP() && (func)) \ - { \ - CV_IMPL_ADD(CV_IMPL_IPP); \ - return __VA_ARGS__; \ - } #else #define CV_IPP_RUN_(condition, func, ...) -#define CV_IPP_RUN_FAST(func, ...) #endif +#define CV_IPP_RUN_FAST(func, ...) CV_IPP_RUN_(true, func, __VA_ARGS__) #define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_((condition), (func), __VA_ARGS__) @@ -768,15 +769,15 @@ CV_EXPORTS InstrNode* getCurrentNode(); #else #define CV_INSTRUMENT_REGION_META(...) -#define CV_INSTRUMENT_REGION_() -#define CV_INSTRUMENT_REGION_NAME(...) +#define CV_INSTRUMENT_REGION_() CV_TRACE_FUNCTION() +#define CV_INSTRUMENT_REGION_NAME(...) CV_TRACE_REGION(__VA_ARGS__) #define CV_INSTRUMENT_REGION_MT_FORK() -#define CV_INSTRUMENT_REGION_IPP() +#define CV_INSTRUMENT_REGION_IPP() CV__TRACE_REGION_("IPP", CV_TRACE_NS::details::REGION_FLAG_IMPL_IPP) #define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__)) #define CV_INSTRUMENT_MARK_IPP(...) -#define CV_INSTRUMENT_REGION_OPENCL() +#define CV_INSTRUMENT_REGION_OPENCL() CV__TRACE_REGION_("OpenCL", CV_TRACE_NS::details::REGION_FLAG_IMPL_OPENCL) #define CV_INSTRUMENT_REGION_OPENCL_COMPILE(...) #define CV_INSTRUMENT_REGION_OPENCL_RUN(...) #define CV_INSTRUMENT_MARK_OPENCL(...) diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 87c20f1..8a923c9 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -641,6 +641,7 @@ public: inline TLSData() {} inline ~TLSData() { release(); } // Release key and delete associated data inline T* get() const { return (T*)getData(); } // Get data associated with key + inline T& getRef() const { T* ptr = (T*)getData(); CV_Assert(ptr); return *ptr; } // Get data associated with key // Get data from all threads inline void gather(std::vector &data) const @@ -1168,6 +1169,12 @@ static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); } CV_EXPORTS FLAGS getFlags(); } +namespace utils { + +CV_EXPORTS int getThreadID(); + +} // namespace + } //namespace cv #ifndef DISABLE_OPENCV_24_COMPATIBILITY diff --git a/modules/core/include/opencv2/core/utils/logger.hpp b/modules/core/include/opencv2/core/utils/logger.hpp new file mode 100644 index 0000000..d7e73de --- /dev/null +++ b/modules/core/include/opencv2/core/utils/logger.hpp @@ -0,0 +1,84 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_LOGGING_HPP +#define OPENCV_LOGGING_HPP + +#include +#include +#include // INT_MAX + +// TODO This file contains just interface part with implementation stubs. + +//! @addtogroup core_logging +// This section describes OpenCV logging utilities. +// +//! @{ + +namespace utils { +namespace logging { + +// Supported logging levels and their semantic +#define CV_LOG_LEVEL_SILENT 0 //!< for using in setLogVevel() call +#define CV_LOG_LEVEL_FATAL 1 //!< Fatal (critical) error (unrecoverable internal error) +#define CV_LOG_LEVEL_ERROR 2 //!< Error message +#define CV_LOG_LEVEL_WARN 3 //!< Warning message +#define CV_LOG_LEVEL_INFO 4 //!< Info message +#define CV_LOG_LEVEL_DEBUG 5 //!< Debug message. Disabled in the "Release" build. +#define CV_LOG_LEVEL_VERBOSE 6 //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build. + +//! Supported logging levels and their semantic +enum LogLevel { + LOG_LEVEL_SILENT = 0, //!< for using in setLogVevel() call + LOG_LEVEL_FATAL = 1, //!< Fatal (critical) error (unrecoverable internal error) + LOG_LEVEL_ERROR = 2, //!< Error message + LOG_LEVEL_WARNING = 3, //!< Warning message + LOG_LEVEL_INFO = 4, //!< Info message + LOG_LEVEL_DEBUG = 5, //!< Debug message. Disabled in the "Release" build. + LOG_LEVEL_VERBOSE = 6, //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build. +#ifndef CV_DOXYGEN + ENUM_LOG_LEVEL_FORCE_INT = INT_MAX +#endif +}; + + +/** + * \def CV_LOG_STRIP_LEVEL + * + * Define CV_LOG_STRIP_LEVEL=CV_LOG_LEVEL_[DEBUG|INFO|WARN|ERROR|FATAL|DISABLED] to compile out anything at that and before that logging level + */ +#ifndef CV_LOG_STRIP_LEVEL +# if defined NDEBUG +# define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG +# else +# define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE +# endif +#endif + + +#define CV_LOG_FATAL(tag, ...) for(;;) { std::stringstream ss; ss << "[FATAL:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cerr << ss.str(); break; } +#define CV_LOG_ERROR(tag, ...) for(;;) { std::stringstream ss; ss << "[ERROR:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cerr << ss.str(); break; } +#define CV_LOG_WARNING(tag, ...) for(;;) { std::stringstream ss; ss << "[ WARN:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cout << ss.str(); break; } +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO +#define CV_LOG_INFO(tag, ...) +#else +#define CV_LOG_INFO(tag, ...) for(;;) { std::stringstream ss; ss << "[ INFO:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cout << ss.str(); break; } +#endif +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG +#define CV_LOG_DEBUG(tag, ...) +#else +#define CV_LOG_DEBUG(tag, ...) for(;;) { std::stringstream ss; ss << "[DEBUG:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cout << ss.str(); break; } +#endif +#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE +#define CV_LOG_VERBOSE(tag, v, ...) +#else +#define CV_LOG_VERBOSE(tag, v, ...) for(;;) { std::stringstream ss; ss << "[VERB" << v << ":" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cout << ss.str(); break; } +#endif + + +}} // namespace + +//! @} + +#endif // OPENCV_LOGGING_HPP diff --git a/modules/core/include/opencv2/core/utils/trace.hpp b/modules/core/include/opencv2/core/utils/trace.hpp new file mode 100644 index 0000000..1539fb9 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/trace.hpp @@ -0,0 +1,250 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_TRACE_HPP +#define OPENCV_TRACE_HPP + +#include + +//! @addtogroup core_logging +// This section describes OpenCV tracing utilities. +// +//! @{ + +namespace cv { +namespace utils { +namespace trace { + +//! Macro to trace function +#define CV_TRACE_FUNCTION() + +#define CV_TRACE_FUNCTION_SKIP_NESTED() + +//! Trace code scope. +//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "initialize". +#define CV_TRACE_REGION(name_as_static_string_literal) +//! mark completed of the current opened region and create new one +//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "step1". +#define CV_TRACE_REGION_NEXT(name_as_static_string_literal) + +//! Macro to trace argument value +#define CV_TRACE_ARG(arg_id) + +//! Macro to trace argument value (expanded version) +#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) + +//! @cond IGNORED +#define CV_TRACE_NS cv::utils::trace + +namespace details { + +#ifndef __OPENCV_TRACE +# if defined __OPENCV_BUILD && !defined __OPENCV_TESTS && !defined __OPENCV_APPS +# define __OPENCV_TRACE 1 +# else +# define __OPENCV_TRACE 0 +# endif +#endif + +#ifndef CV_TRACE_FILENAME +# define CV_TRACE_FILENAME __FILE__ +#endif + +#ifndef CV__TRACE_FUNCTION +# if defined _MSC_VER +# define CV__TRACE_FUNCTION __FUNCSIG__ +# elif defined __GNUC__ +# define CV__TRACE_FUNCTION __PRETTY_FUNCTION__ +# else +# define CV__TRACE_FUNCTION "" +# endif +#endif + +//! Thread-local instance (usually allocated on stack) +class CV_EXPORTS Region +{ +public: + struct LocationExtraData; + struct LocationStaticStorage + { + LocationExtraData** ppExtra; //< implementation specific data + const char* name; //< region name (function name or other custom name) + const char* filename; //< source code filename + int line; //< source code line + int flags; //< flags (implementation code path: Plain, IPP, OpenCL) + }; + + Region(const LocationStaticStorage& location); + inline ~Region() + { + if (implFlags != 0) + destroy(); + CV_DbgAssert(implFlags == 0); + CV_DbgAssert(pImpl == NULL); + } + + class Impl; + Impl* pImpl; // NULL if current region is not active + int implFlags; // see RegionFlag, 0 if region is ignored + + bool isActive() const { return pImpl != NULL; } + + void destroy(); +private: + Region(const Region&); // disabled + Region& operator= (const Region&); // disabled +}; + +//! Specify region flags +enum RegionLocationFlag { + REGION_FLAG_FUNCTION = (1 << 0), //< region is function (=1) / nested named region (=0) + REGION_FLAG_APP_CODE = (1 << 1), //< region is Application code (=1) / OpenCV library code (=0) + REGION_FLAG_SKIP_NESTED = (1 << 2), //< avoid processing of nested regions + + REGION_FLAG_IMPL_IPP = (1 << 16), //< region is part of IPP code path + REGION_FLAG_IMPL_OPENCL = (2 << 16), //< region is part of OpenCL code path + REGION_FLAG_IMPL_OPENVX = (3 << 16), //< region is part of OpenVX code path + + REGION_FLAG_IMPL_MASK = (15 << 16), + + REGION_FLAG_REGION_FORCE = (1 << 30), + REGION_FLAG_REGION_NEXT = (1 << 31), //< close previous region (see #CV_TRACE_REGION_NEXT macro) + + ENUM_REGION_FLAG_FORCE_INT = INT_MAX +}; + +struct CV_EXPORTS TraceArg { +public: + struct ExtraData; + ExtraData** ppExtra; + const char* name; + int flags; +}; +/** @brief Add meta information to current region (function) + * See CV_TRACE_ARG macro + * @param arg argument information structure (global static cache) + * @param value argument value (can by dynamic string literal in case of string, static allocation is not required) + */ +CV_EXPORTS void traceArg(const TraceArg& arg, const char* value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, int value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, int64 value); +//! @overload +CV_EXPORTS void traceArg(const TraceArg& arg, double value); + +#define CV__TRACE_LOCATION_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_, loc_id), __LINE__) +#define CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_extra_, loc_id) , __LINE__) + +#define CV__TRACE_DEFINE_LOCATION_(loc_id, name, flags) \ + static CV_TRACE_NS::details::Region::LocationExtraData* CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) = 0; \ + static const CV_TRACE_NS::details::Region::LocationStaticStorage \ + CV__TRACE_LOCATION_VARNAME(loc_id) = { &(CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id)), name, CV_TRACE_FILENAME, __LINE__, flags}; + +#define CV__TRACE_DEFINE_LOCATION_FN(name, flags) CV__TRACE_DEFINE_LOCATION_(fn, name, (flags | CV_TRACE_NS::details::REGION_FLAG_FUNCTION)) + + +#define CV__TRACE_OPENCV_FUNCTION() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, 0); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_OPENCV_FUNCTION_NAME(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, 0); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION_NAME(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +#define CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_OPENCV_FUNCTION_NAME_SKIP_NESTED(name) \ + CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + +#define CV__TRACE_APP_FUNCTION_SKIP_NESTED() \ + CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED | CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +#define CV__TRACE_REGION_(name_as_static_string_literal, flags) \ + CV__TRACE_DEFINE_LOCATION_(region, name_as_static_string_literal, flags); \ + CV_TRACE_NS::details::Region CVAUX_CONCAT(__region_, __LINE__)(CV__TRACE_LOCATION_VARNAME(region)); + +#define CV__TRACE_REGION(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, 0) +#define CV__TRACE_REGION_NEXT(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, CV_TRACE_NS::details::REGION_FLAG_REGION_NEXT) + +#define CV__TRACE_ARG_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_ ## arg_id, __LINE__) +#define CV__TRACE_ARG_EXTRA_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_extra_ ## arg_id, __LINE__) + +#define CV__TRACE_DEFINE_ARG_(arg_id, name, flags) \ + static CV_TRACE_NS::details::TraceArg::ExtraData* CV__TRACE_ARG_EXTRA_VARNAME(arg_id) = 0; \ + static const CV_TRACE_NS::details::TraceArg \ + CV__TRACE_ARG_VARNAME(arg_id) = { &(CV__TRACE_ARG_EXTRA_VARNAME(arg_id)), name, flags }; + +#define CV__TRACE_ARG_VALUE(arg_id, arg_name, value) \ + CV__TRACE_DEFINE_ARG_(arg_id, arg_name, 0); \ + CV_TRACE_NS::details::traceArg((CV__TRACE_ARG_VARNAME(arg_id)), value); + +#define CV__TRACE_ARG(arg_id) CV_TRACE_ARG_VALUE(arg_id, #arg_id, (arg_id)) + +} // namespace + +#ifndef OPENCV_DISABLE_TRACE +#undef CV_TRACE_FUNCTION +#undef CV_TRACE_FUNCTION_SKIP_NESTED +#if __OPENCV_TRACE +#define CV_TRACE_FUNCTION CV__TRACE_OPENCV_FUNCTION +#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED +#else +#define CV_TRACE_FUNCTION CV__TRACE_APP_FUNCTION +#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_APP_FUNCTION_SKIP_NESTED +#endif + +#undef CV_TRACE_REGION +#define CV_TRACE_REGION CV__TRACE_REGION + +#undef CV_TRACE_REGION_NEXT +#define CV_TRACE_REGION_NEXT CV__TRACE_REGION_NEXT + +#undef CV_TRACE_ARG_VALUE +#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) \ + if (__region_fn.isActive()) \ + { \ + CV__TRACE_ARG_VALUE(arg_id, arg_name, value); \ + } + +#undef CV_TRACE_ARG +#define CV_TRACE_ARG CV__TRACE_ARG + +#endif // OPENCV_DISABLE_TRACE + +#ifdef OPENCV_TRACE_VERBOSE +#define CV_TRACE_FUNCTION_VERBOSE CV_TRACE_FUNCTION +#define CV_TRACE_REGION_VERBOSE CV_TRACE_REGION +#define CV_TRACE_REGION_NEXT_VERBOSE CV_TRACE_REGION_NEXT +#define CV_TRACE_ARG_VALUE_VERBOSE CV_TRACE_ARG_VALUE +#define CV_TRACE_ARG_VERBOSE CV_TRACE_ARG +#else +#define CV_TRACE_FUNCTION_VERBOSE(...) +#define CV_TRACE_REGION_VERBOSE(...) +#define CV_TRACE_REGION_NEXT_VERBOSE(...) +#define CV_TRACE_ARG_VALUE_VERBOSE(...) +#define CV_TRACE_ARG_VERBOSE(...) +#endif + +//! @endcond + +}}} // namespace + +//! @} + +#endif // OPENCV_TRACE_HPP diff --git a/modules/core/include/opencv2/core/utils/trace.private.hpp b/modules/core/include/opencv2/core/utils/trace.private.hpp new file mode 100644 index 0000000..1798166 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/trace.private.hpp @@ -0,0 +1,419 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_TRACE_PRIVATE_HPP +#define OPENCV_TRACE_PRIVATE_HPP + +#ifdef OPENCV_TRACE + +#include + +#include "trace.hpp" + +//! @cond IGNORED + +#include +#include + +#define INTEL_ITTNOTIFY_API_PRIVATE 1 +#ifdef OPENCV_WITH_ITT +#include "ittnotify.h" +#endif + +#ifndef DEBUG_ONLY +#ifdef _DEBUG +#define DEBUG_ONLY(...) __VA_ARGS__ +#else +#define DEBUG_ONLY(...) (void)0 +#endif +#endif + +#ifndef DEBUG_ONLY_ +#ifdef _DEBUG +#define DEBUG_ONLY_(...) __VA_ARGS__ +#else +#define DEBUG_ONLY_(...) +#endif +#endif + + +namespace cv { +namespace utils { +namespace trace { +namespace details { + +#define CV__TRACE_OPENCV_FUNCTION_NAME_(name, flags) \ + CV__TRACE_DEFINE_LOCATION_FN(name, flags); \ + const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn)); + + +enum RegionFlag { + REGION_FLAG__NEED_STACK_POP = (1 << 0), + REGION_FLAG__ACTIVE = (1 << 1), + + ENUM_REGION_FLAG_IMPL_FORCE_INT = INT_MAX +}; + + +class TraceMessage; + +class TraceStorage { +public: + TraceStorage() {} + virtual ~TraceStorage() {}; + + virtual bool put(const TraceMessage& msg) const = 0; +}; + +struct RegionStatistics +{ + int currentSkippedRegions; + + int64 duration; +#ifdef HAVE_IPP + int64 durationImplIPP; +#endif +#ifdef HAVE_OPENCL + int64 durationImplOpenCL; +#endif +#ifdef HAVE_OPENVX + int64 durationImplOpenVX; +#endif + + RegionStatistics() : + currentSkippedRegions(0), + duration(0) +#ifdef HAVE_IPP + ,durationImplIPP(0) +#endif +#ifdef HAVE_OPENCL + ,durationImplOpenCL(0) +#endif +#ifdef HAVE_OPENVX + ,durationImplOpenVX(0) +#endif + {} + + void grab(RegionStatistics& result) + { + result.currentSkippedRegions = currentSkippedRegions; currentSkippedRegions = 0; + result.duration = duration; duration = 0; +#ifdef HAVE_IPP + result.durationImplIPP = durationImplIPP; durationImplIPP = 0; +#endif +#ifdef HAVE_OPENCL + result.durationImplOpenCL = durationImplOpenCL; durationImplOpenCL = 0; +#endif +#ifdef HAVE_OPENVX + result.durationImplOpenVX = durationImplOpenVX; durationImplOpenVX = 0; +#endif + } + + void append(RegionStatistics& stat) + { + currentSkippedRegions += stat.currentSkippedRegions; + duration += stat.duration; +#ifdef HAVE_IPP + durationImplIPP += stat.durationImplIPP; +#endif +#ifdef HAVE_OPENCL + durationImplOpenCL += stat.durationImplOpenCL; +#endif +#ifdef HAVE_OPENVX + durationImplOpenVX += stat.durationImplOpenVX; +#endif + } + + void multiply(const float c) + { + duration = (int64)(duration * c); +#ifdef HAVE_IPP + durationImplIPP = (int64)(durationImplIPP * c); +#endif +#ifdef HAVE_OPENCL + durationImplOpenCL = (int64)(durationImplOpenCL * c); +#endif +#ifdef HAVE_OPENVX + durationImplOpenVX = (int64)(durationImplOpenVX * c); +#endif + } +}; + +static inline +std::ostream& operator<<(std::ostream& out, const RegionStatistics& stat) +{ + out << "skip=" << stat.currentSkippedRegions + << " duration=" << stat.duration +#ifdef HAVE_IPP + << " durationImplIPP=" << stat.durationImplIPP +#endif +#ifdef HAVE_OPENCL + << " durationImplOpenCL=" << stat.durationImplOpenCL +#endif +#ifdef HAVE_OPENVX + << " durationImplOpenVX=" << stat.durationImplOpenVX +#endif + ; + return out; +} + +struct RegionStatisticsStatus +{ + int _skipDepth; +#ifdef HAVE_IPP + int ignoreDepthImplIPP; +#endif +#ifdef HAVE_OPENCL + int ignoreDepthImplOpenCL; +#endif +#ifdef HAVE_OPENVX + int ignoreDepthImplOpenVX; +#endif + + RegionStatisticsStatus() { reset(); } + + void reset() + { + _skipDepth = -1; +#ifdef HAVE_IPP + ignoreDepthImplIPP = 0; +#endif +#ifdef HAVE_OPENCL + ignoreDepthImplOpenCL = 0; +#endif +#ifdef HAVE_OPENVX + ignoreDepthImplOpenVX = 0; +#endif + } + + void propagateFrom(const RegionStatisticsStatus& src) + { + _skipDepth = -1; + if (src._skipDepth >= 0) + enableSkipMode(0); +#ifdef HAVE_IPP + ignoreDepthImplIPP = src.ignoreDepthImplIPP ? 1 : 0; +#endif +#ifdef HAVE_OPENCL + ignoreDepthImplOpenCL = src.ignoreDepthImplOpenCL ? 1 : 0; +#endif +#ifdef HAVE_OPENVX + ignoreDepthImplOpenVX = src.ignoreDepthImplOpenVX ? 1 : 0; +#endif + } + + void enableSkipMode(int depth); + void checkResetSkipMode(int leaveDepth); +}; + +static inline +std::ostream& operator<<(std::ostream& out, const RegionStatisticsStatus& s) +{ + out << "ignore={"; + if (s._skipDepth >= 0) + out << " SKIP=" << s._skipDepth; +#ifdef HAVE_IPP + if (s.ignoreDepthImplIPP) + out << " IPP=" << s.ignoreDepthImplIPP; +#endif +#ifdef HAVE_OPENCL + if (s.ignoreDepthImplOpenCL) + out << " OpenCL=" << s.ignoreDepthImplOpenCL; +#endif +#ifdef HAVE_OPENVX + if (s.ignoreDepthImplOpenVX) + out << " OpenVX=" << s.ignoreDepthImplOpenVX; +#endif + out << "}"; + return out; +} + +//! TraceManager for local thread +struct TraceManagerThreadLocal +{ + const int threadID; + int region_counter; + + size_t totalSkippedEvents; + + Region* currentActiveRegion; + + struct StackEntry + { + Region* region; + const Region::LocationStaticStorage* location; + int64 beginTimestamp; + StackEntry(Region* region_, const Region::LocationStaticStorage* location_, int64 beginTimestamp_) : + region(region_), location(location_), beginTimestamp(beginTimestamp_) + {} + StackEntry() : region(NULL), location(NULL), beginTimestamp(-1) {} + }; + std::deque stack; + + int regionDepth; // functions only (no named regions) + int regionDepthOpenCV; // functions from OpenCV library + + RegionStatistics stat; + RegionStatisticsStatus stat_status; + + StackEntry dummy_stack_top; // parallel_for root region + RegionStatistics parallel_for_stat; + RegionStatisticsStatus parallel_for_stat_status; + size_t parallel_for_stack_size; + + + mutable cv::Ptr storage; + + TraceManagerThreadLocal() : + threadID(cv::utils::getThreadID()), + region_counter(0), totalSkippedEvents(0), + currentActiveRegion(NULL), + regionDepth(0), + regionDepthOpenCV(0), + parallel_for_stack_size(0) + { + } + + ~TraceManagerThreadLocal(); + + TraceStorage* getStorage() const; + + void recordLocation(const Region::LocationStaticStorage& location); + void recordRegionEnter(const Region& region); + void recordRegionLeave(const Region& region, const RegionStatistics& result); + void recordRegionArg(const Region& region, const TraceArg& arg, const char& value); + + inline void stackPush(Region* region, const Region::LocationStaticStorage* location, int64 beginTimestamp) + { + stack.push_back(StackEntry(region, location, beginTimestamp)); + } + inline Region* stackTopRegion() const + { + if (stack.empty()) + return dummy_stack_top.region; + return stack.back().region; + } + inline const Region::LocationStaticStorage* stackTopLocation() const + { + if (stack.empty()) + return dummy_stack_top.location; + return stack.back().location; + } + inline int64 stackTopBeginTimestamp() const + { + if (stack.empty()) + return dummy_stack_top.beginTimestamp; + return stack.back().beginTimestamp; + } + inline void stackPop() + { + CV_DbgAssert(!stack.empty()); + stack.pop_back(); + } + void dumpStack(std::ostream& out, bool onlyFunctions) const; + + inline Region* getCurrentActiveRegion() + { + return currentActiveRegion; + } + + inline int getCurrentDepth() const { return (int)stack.size(); } +}; + +class CV_EXPORTS TraceManager +{ +public: + TraceManager(); + ~TraceManager(); + + static bool isActivated(); + + Mutex mutexCreate; + Mutex mutexCount; + + TLSData tls; + + cv::Ptr trace_storage; +private: + // disable copying + TraceManager(const TraceManager&); + TraceManager& operator=(const TraceManager&); +}; + +CV_EXPORTS TraceManager& getTraceManager(); +inline Region* getCurrentActiveRegion() { return getTraceManager().tls.get()->getCurrentActiveRegion(); } +inline Region* getCurrentRegion() { return getTraceManager().tls.get()->stackTopRegion(); } + +void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx); +void parallelForAttachNestedRegion(const Region& rootRegion); +void parallelForFinalize(const Region& rootRegion); + + + + + + + +struct Region::LocationExtraData +{ + int global_location_id; // 0 - region is disabled +#ifdef OPENCV_WITH_ITT + // Special fields for ITT + __itt_string_handle* volatile ittHandle_name; + __itt_string_handle* volatile ittHandle_filename; +#endif + LocationExtraData(const LocationStaticStorage& location); + + static Region::LocationExtraData* init(const Region::LocationStaticStorage& location); +}; + +class Region::Impl +{ +public: + const LocationStaticStorage& location; + + Region& region; + Region* const parentRegion; + + const int threadID; + const int global_region_id; + + const int64 beginTimestamp; + int64 endTimestamp; + + int directChildrenCount; + + enum OptimizationPath { + CODE_PATH_PLAIN = 0, + CODE_PATH_IPP, + CODE_PATH_OPENCL, + CODE_PATH_OPENVX + }; + +#ifdef OPENCV_WITH_ITT + bool itt_id_registered; + __itt_id itt_id; +#endif + + Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_); + + void enterRegion(TraceManagerThreadLocal& ctx); + void leaveRegion(TraceManagerThreadLocal& ctx); + + void registerRegion(TraceManagerThreadLocal& ctx); + + void release(); +protected: + ~Impl(); +}; + + + +}}}} // namespace + +//! @endcond + +#endif + +#endif // OPENCV_TRACE_PRIVATE_HPP diff --git a/modules/core/src/algorithm.cpp b/modules/core/src/algorithm.cpp index 4e7701a..24f4dfb 100644 --- a/modules/core/src/algorithm.cpp +++ b/modules/core/src/algorithm.cpp @@ -47,14 +47,17 @@ namespace cv Algorithm::Algorithm() { + CV_TRACE_FUNCTION(); } Algorithm::~Algorithm() { + CV_TRACE_FUNCTION(); } void Algorithm::save(const String& filename) const { + CV_TRACE_FUNCTION(); FileStorage fs(filename, FileStorage::WRITE); fs << getDefaultName() << "{"; write(fs); @@ -63,11 +66,13 @@ void Algorithm::save(const String& filename) const String Algorithm::getDefaultName() const { + CV_TRACE_FUNCTION(); return String("my_object"); } void Algorithm::writeFormat(FileStorage& fs) const { + CV_TRACE_FUNCTION(); fs << "format" << (int)3; } diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index ec039d4..6e41bb8 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -907,7 +907,7 @@ Mat repeat(const Mat& src, int ny, int nx) */ int cv::borderInterpolate( int p, int len, int borderType ) { - CV_INSTRUMENT_REGION() + CV_TRACE_FUNCTION_VERBOSE(); if( (unsigned)p < (unsigned)len ) ; diff --git a/modules/core/src/kmeans.cpp b/modules/core/src/kmeans.cpp index 584efcf..5439933 100644 --- a/modules/core/src/kmeans.cpp +++ b/modules/core/src/kmeans.cpp @@ -74,6 +74,7 @@ public: void operator()( const cv::Range& range ) const { + CV_TRACE_FUNCTION(); const int begin = range.start; const int end = range.end; @@ -101,6 +102,7 @@ Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding static void generateCentersPP(const Mat& _data, Mat& _out_centers, int K, RNG& rng, int trials) { + CV_TRACE_FUNCTION(); int i, j, k, dims = _data.cols, N = _data.rows; const float* data = _data.ptr(0); size_t step = _data.step/sizeof(data[0]); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index c732dab..724d54e 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -3130,7 +3130,7 @@ void cv::hconcat(InputArray _src, OutputArray dst) void cv::vconcat(const Mat* src, size_t nsrc, OutputArray _dst) { - CV_INSTRUMENT_REGION() + CV_TRACE_FUNCTION_SKIP_NESTED() if( nsrc == 0 || !src ) { diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index c519ad8..7d7c24e 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -65,67 +65,6 @@ # endif #endif - -// TODO Move to some common place -static bool getBoolParameter(const char* name, bool defaultValue) -{ -/* - * If your system doesn't support getenv(), define NO_GETENV to disable - * this feature. - */ -#ifdef NO_GETENV - const char* envValue = NULL; -#else - const char* envValue = getenv(name); -#endif - if (envValue == NULL) - { - return defaultValue; - } - cv::String value = envValue; - if (value == "1" || value == "True" || value == "true" || value == "TRUE") - { - return true; - } - if (value == "0" || value == "False" || value == "false" || value == "FALSE") - { - return false; - } - CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str())); -} - - -// TODO Move to some common place -static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue) -{ -#ifdef NO_GETENV - const char* envValue = NULL; -#else - const char* envValue = getenv(name); -#endif - if (envValue == NULL) - { - return defaultValue; - } - cv::String value = envValue; - size_t pos = 0; - for (; pos < value.size(); pos++) - { - if (!isdigit(value[pos])) - break; - } - cv::String valueStr = value.substr(0, pos); - cv::String suffixStr = value.substr(pos, value.length() - pos); - int v = atoi(valueStr.c_str()); - if (suffixStr.length() == 0) - return v; - else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb") - return v * 1024 * 1024; - else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb") - return v * 1024; - CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str())); -} - #if CV_OPENCL_SHOW_SVM_LOG // TODO add timestamp logging #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf @@ -159,7 +98,7 @@ static bool isRaiseError() static bool value = false; if (!initialized) { - value = getBoolParameter("OPENCV_OPENCL_RAISE_ERROR", false); + value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false); initialized = true; } return value; @@ -1232,7 +1171,7 @@ static bool checkForceSVMUmatUsage() static bool force = false; if (!initialized) { - force = getBoolParameter("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false); + force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false); initialized = true; } return force; @@ -1243,7 +1182,7 @@ static bool checkDisableSVMUMatUsage() static bool force = false; if (!initialized) { - force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false); + force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false); initialized = true; } return force; @@ -1254,7 +1193,7 @@ static bool checkDisableSVM() static bool force = false; if (!initialized) { - force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE", false); + force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false); initialized = true; } return force; @@ -1285,7 +1224,7 @@ static size_t getProgramCountLimit() static size_t count = 0; if (!initialized) { - count = getConfigurationParameterForSize("OPENCV_OPENCL_PROGRAM_CACHE", 0); + count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0); initialized = true; } return count; @@ -3195,12 +3134,12 @@ public: { size_t defaultPoolSize, poolSize; defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0; - poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize); + poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize); bufferPool.setMaxReservedSize(poolSize); - poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize); + poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize); bufferPoolHostPtr.setMaxReservedSize(poolSize); #ifdef HAVE_OPENCL_SVM - poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize); + poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize); bufferPoolSVM.setMaxReservedSize(poolSize); #endif @@ -4980,7 +4919,7 @@ bool internal::isOpenCLForced() static bool value = false; if (!initialized) { - value = getBoolParameter("OPENCV_OPENCL_FORCE", false); + value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false); initialized = true; } return value; @@ -4992,7 +4931,7 @@ bool internal::isPerformanceCheckBypassed() static bool value = false; if (!initialized) { - value = getBoolParameter("OPENCV_OPENCL_PERF_CHECK_BYPASS", false); + value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false); initialized = true; } return value; diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 3bbf028..71f6b11 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -42,6 +42,8 @@ #include "precomp.hpp" +#include + #if defined WIN32 || defined WINCE #include #undef small @@ -163,10 +165,10 @@ namespace } #endif - class ParallelLoopBodyWrapper : public cv::ParallelLoopBody + class ParallelLoopBodyWrapperContext { public: - ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) : + ParallelLoopBodyWrapperContext(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) : is_rng_used(false) { @@ -178,11 +180,16 @@ namespace // propagate main thread state rng = cv::theRNG(); +#ifdef OPENCV_TRACE + traceRootRegion = CV_TRACE_NS::details::getCurrentRegion(); + traceRootContext = CV_TRACE_NS::details::getTraceManager().tls.get(); +#endif + #ifdef ENABLE_INSTRUMENTATION pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode; #endif } - ~ParallelLoopBodyWrapper() + ~ParallelLoopBodyWrapperContext() { #ifdef ENABLE_INSTRUMENTATION for(size_t i = 0; i < pThreadRoot->m_childs.size(); i++) @@ -198,49 +205,91 @@ namespace // Note: this behaviour is not equal to single-threaded mode. cv::theRNG().next(); } +#ifdef OPENCV_TRACE + if (traceRootRegion) + CV_TRACE_NS::details::parallelForFinalize(*traceRootRegion); +#endif + } + + const cv::ParallelLoopBody* body; + cv::Range wholeRange; + int nstripes; + cv::RNG rng; + mutable bool is_rng_used; +#ifdef OPENCV_TRACE + CV_TRACE_NS::details::Region* traceRootRegion; + CV_TRACE_NS::details::TraceManagerThreadLocal* traceRootContext; +#endif +#ifdef ENABLE_INSTRUMENTATION + cv::instr::InstrNode *pThreadRoot; +#endif + private: + ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled + ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled + }; + + class ParallelLoopBodyWrapper : public cv::ParallelLoopBody + { + public: + ParallelLoopBodyWrapper(ParallelLoopBodyWrapperContext& ctx_) : + ctx(ctx_) + { + } + ~ParallelLoopBodyWrapper() + { } void operator()(const cv::Range& sr) const { +#ifdef OPENCV_TRACE + // TODO CV_TRACE_NS::details::setCurrentRegion(rootRegion); + if (ctx.traceRootRegion && ctx.traceRootContext) + CV_TRACE_NS::details::parallelForSetRootRegion(*ctx.traceRootRegion, *ctx.traceRootContext); + CV__TRACE_OPENCV_FUNCTION_NAME("parallel_for_body"); + if (ctx.traceRootRegion) + CV_TRACE_NS::details::parallelForAttachNestedRegion(*ctx.traceRootRegion); +#endif + #ifdef ENABLE_INSTRUMENTATION { cv::instr::InstrTLSStruct *pInstrTLS = &cv::instr::getInstrumentTLSStruct(); - pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread + pInstrTLS->pCurrentNode = ctx.pThreadRoot; // Initialize TLS node for thread } -#endif CV_INSTRUMENT_REGION() +#endif // propagate main thread state - cv::theRNG() = rng; + cv::theRNG() = ctx.rng; cv::Range r; + cv::Range wholeRange = ctx.wholeRange; + int nstripes = ctx.nstripes; r.start = (int)(wholeRange.start + ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes); r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start + ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes); - (*body)(r); - if (!is_rng_used && !(cv::theRNG() == rng)) - is_rng_used = true; +#ifdef OPENCV_TRACE + CV_TRACE_ARG_VALUE(range_start, "range.start", (int64)r.start); + CV_TRACE_ARG_VALUE(range_end, "range.end", (int64)r.end); +#endif + + (*ctx.body)(r); + + if (!ctx.is_rng_used && !(cv::theRNG() == ctx.rng)) + ctx.is_rng_used = true; } - cv::Range stripeRange() const { return cv::Range(0, nstripes); } + cv::Range stripeRange() const { return cv::Range(0, ctx.nstripes); } protected: - const cv::ParallelLoopBody* body; - cv::Range wholeRange; - int nstripes; - cv::RNG rng; - mutable bool is_rng_used; -#ifdef ENABLE_INSTRUMENTATION - cv::instr::InstrNode *pThreadRoot; -#endif + ParallelLoopBodyWrapperContext& ctx; }; #if defined HAVE_TBB class ProxyLoopBody : public ParallelLoopBodyWrapper { public: - ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) - : ParallelLoopBodyWrapper(_body, _r, _nstripes) + ProxyLoopBody(ParallelLoopBodyWrapperContext& ctx_) + : ParallelLoopBodyWrapper(ctx_) {} void operator ()(const tbb::blocked_range& range) const @@ -261,8 +310,8 @@ namespace class ProxyLoopBody : public ParallelLoopBodyWrapper { public: - ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) - : ParallelLoopBodyWrapper(_body, _r, _nstripes) + ProxyLoopBody(ParallelLoopBodyWrapperContext& ctx) + : ParallelLoopBodyWrapper(ctx) {} void operator ()(int i) const @@ -316,19 +365,30 @@ static SchedPtr pplScheduler; void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) { +#ifdef OPENCV_TRACE + CV__TRACE_OPENCV_FUNCTION_NAME_("parallel_for", 0); + CV_TRACE_ARG_VALUE(range_start, "range.start", (int64)range.start); + CV_TRACE_ARG_VALUE(range_end, "range.end", (int64)range.end); + CV_TRACE_ARG_VALUE(nstripes, "nstripes", (int64)nstripes); +#endif + CV_INSTRUMENT_REGION_MT_FORK() if (range.empty()) return; #ifdef CV_PARALLEL_FRAMEWORK - if(numThreads != 0) + static int flagNestedParallelFor = 0; + bool isNotNesterParallelFor = CV_XADD(&flagNestedParallelFor, 1) == 0; + if(numThreads != 0 && isNotNesterParallelFor) { - ProxyLoopBody pbody(body, range, nstripes); + ParallelLoopBodyWrapperContext ctx(body, range, nstripes); + ProxyLoopBody pbody(ctx); cv::Range stripeRange = pbody.stripeRange(); if( stripeRange.end - stripeRange.start == 1 ) { body(range); + flagNestedParallelFor = 0; return; } @@ -384,7 +444,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, #error You have hacked and compiling with unsupported parallel framework #endif - + flagNestedParallelFor = 0; } else diff --git a/modules/core/src/parallel_pthreads.cpp b/modules/core/src/parallel_pthreads.cpp index 4aeceeb..d17d3cb 100644 --- a/modules/core/src/parallel_pthreads.cpp +++ b/modules/core/src/parallel_pthreads.cpp @@ -309,6 +309,8 @@ void ForThread::execute() void ForThread::thread_body() { + (void)cv::utils::getThreadID(); // notify OpenCV about new thread + m_parent->m_is_work_thread.get()->value = true; pthread_mutex_lock(&m_thread_mutex); diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index e7ab982..6c947e2 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -6921,7 +6921,7 @@ FileNode FileStorage::root(int streamidx) const FileStorage& operator << (FileStorage& fs, const String& str) { - CV_INSTRUMENT_REGION() + CV_TRACE_REGION_VERBOSE(); enum { NAME_EXPECTED = FileStorage::NAME_EXPECTED, VALUE_EXPECTED = FileStorage::VALUE_EXPECTED, diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index b17ad2b..e752808 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -299,6 +299,12 @@ TLSData& getCoreTlsData(); #define CL_RUNTIME_EXPORT #endif +namespace utils { +bool getConfigurationParameterBool(const char* name, bool defaultValue); +size_t getConfigurationParameterSizeT(const char* name, size_t defaultValue); +cv::String getConfigurationParameterString(const char* name, const char* defaultValue); +} + extern bool __termination; // skip some cleanups, because process is terminating // (for example, if ExitProcess() was already called) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index c3317dd..db653ab 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -44,6 +44,8 @@ #include "precomp.hpp" #include +#include + namespace cv { static Mutex* __initialization_mutex = NULL; @@ -1490,6 +1492,7 @@ void TLSDataContainer::cleanup() void* TLSDataContainer::getData() const { + CV_Assert(key_ != -1 && "Can't fetch data from terminated TLS container."); void* pData = getTlsStorage().getData(key_); // Check if data was already allocated if(!pData) { @@ -1534,6 +1537,99 @@ BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved) } #endif + +namespace { +static int g_threadNum = 0; +class ThreadID { +public: + const int id; + ThreadID() : + id(CV_XADD(&g_threadNum, 1)) + { +#ifdef OPENCV_WITH_ITT + __itt_thread_set_name(cv::format("OpenCVThread-%03d", id).c_str()); +#endif + } +}; + +static TLSData& getThreadIDTLS() +{ + CV_SINGLETON_LAZY_INIT_REF(TLSData, new TLSData()); +} + +} // namespace +int utils::getThreadID() { return getThreadIDTLS().get()->id; } + +bool utils::getConfigurationParameterBool(const char* name, bool defaultValue) +{ +#ifdef NO_GETENV + const char* envValue = NULL; +#else + const char* envValue = getenv(name); +#endif + if (envValue == NULL) + { + return defaultValue; + } + cv::String value = envValue; + if (value == "1" || value == "True" || value == "true" || value == "TRUE") + { + return true; + } + if (value == "0" || value == "False" || value == "false" || value == "FALSE") + { + return false; + } + CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str())); +} + + +size_t utils::getConfigurationParameterSizeT(const char* name, size_t defaultValue) +{ +#ifdef NO_GETENV + const char* envValue = NULL; +#else + const char* envValue = getenv(name); +#endif + if (envValue == NULL) + { + return defaultValue; + } + cv::String value = envValue; + size_t pos = 0; + for (; pos < value.size(); pos++) + { + if (!isdigit(value[pos])) + break; + } + cv::String valueStr = value.substr(0, pos); + cv::String suffixStr = value.substr(pos, value.length() - pos); + int v = atoi(valueStr.c_str()); + if (suffixStr.length() == 0) + return v; + else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb") + return v * 1024 * 1024; + else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb") + return v * 1024; + CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str())); +} + +cv::String utils::getConfigurationParameterString(const char* name, const char* defaultValue) +{ +#ifdef NO_GETENV + const char* envValue = NULL; +#else + const char* envValue = getenv(name); +#endif + if (envValue == NULL) + { + return defaultValue; + } + cv::String value = envValue; + return value; +} + + #ifdef CV_COLLECT_IMPL_DATA ImplCollector& getImplData() { diff --git a/modules/core/src/trace.cpp b/modules/core/src/trace.cpp new file mode 100644 index 0000000..157023e --- /dev/null +++ b/modules/core/src/trace.cpp @@ -0,0 +1,1115 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include + +#include +#include + +#include // va_start + +#include +#include +#include + +#if 0 +#define CV_LOG(...) CV_LOG_INFO(NULL, __VA_ARGS__) +#else +#define CV_LOG(...) {} +#endif + +#if 0 +#define CV_LOG_ITT(...) CV_LOG_INFO(NULL, __VA_ARGS__) +#else +#define CV_LOG_ITT(...) {} +#endif + +#if 1 +#define CV_LOG_TRACE_BAILOUT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) +#else +#define CV_LOG_TRACE_BAILOUT(...) {} +#endif + +#if 0 +#define CV_LOG_PARALLEL(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) +#else +#define CV_LOG_PARALLEL(...) {} +#endif + +#if 0 +#define CV_LOG_CTX_STAT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) +#else +#define CV_LOG_CTX_STAT(...) {} +#endif + +#if 0 +#define CV_LOG_SKIP(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) +#else +#define CV_LOG_SKIP(...) {} +#endif + +namespace cv { +namespace utils { +namespace trace { +namespace details { + +#ifdef OPENCV_TRACE + +static int64 g_zero_timestamp = 0; + +static int64 getTimestamp() +{ + int64 t = getTickCount(); + static double tick_to_ns = 1e9 / getTickFrequency(); + return (int64)((t - g_zero_timestamp) * tick_to_ns); +} + +// TODO lazy configuration flags +static bool param_traceEnable = utils::getConfigurationParameterBool("OPENCV_TRACE", false); + +static int param_maxRegionDepthOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_DEPTH_OPENCV", 1); +static int param_maxRegionChildrenOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN_OPENCV", 1000); +static int param_maxRegionChildren = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN", 10000); +static cv::String param_traceLocation = utils::getConfigurationParameterString("OPENCV_TRACE_LOCATION", "OpenCVTrace"); + +#ifdef HAVE_OPENCL +static bool param_synchronizeOpenCL = utils::getConfigurationParameterBool("OPENCV_TRACE_SYNC_OPENCL", false); +#endif + +#ifdef OPENCV_WITH_ITT +static bool param_ITT_registerParentScope = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_PARENT", false); +#endif + +static const char* _spaces(int count) +{ + static const char buf[64] = +" "; + return &buf[63 - (count & 63)]; +} + +/** + * Text-based trace messages + */ +class TraceMessage +{ +public: + char buffer[1024]; + size_t len; + bool hasError; + + TraceMessage() : + len(0), + hasError(false) + {} + + bool printf(const char* format, ...) + { + char* buf = &buffer[len]; + size_t sz = sizeof(buffer) - len; + va_list ap; + va_start(ap, format); + int n = cv_vsnprintf(buf, (int)sz, format, ap); + va_end(ap); + if (n < 0 || (size_t)n > sz) + { + hasError = true; + return false; + } + len += n; + return true; + } + + bool formatlocation(const Region::LocationStaticStorage& location) + { + return this->printf("l,%lld,\"%s\",%d,\"%s\",0x%llX\n", + (long long int)(*location.ppExtra)->global_location_id, + location.filename, + location.line, + location.name, + (long long int)(location.flags & ~0xF0000000)); + } + bool formatRegionEnter(const Region& region) + { + bool ok = this->printf("b,%d,%lld,%lld,%lld", + (int)region.pImpl->threadID, + (long long int)region.pImpl->beginTimestamp, + (long long int)((*region.pImpl->location.ppExtra)->global_location_id), + (long long int)region.pImpl->global_region_id); + if (region.pImpl->parentRegion && region.pImpl->parentRegion->pImpl) + { + if (region.pImpl->parentRegion->pImpl->threadID != region.pImpl->threadID) + ok &= this->printf(",parentThread=%d,parent=%lld", + (int)region.pImpl->parentRegion->pImpl->threadID, + (long long int)region.pImpl->parentRegion->pImpl->global_region_id); + } + ok &= this->printf("\n"); + return ok; + } + bool formatRegionLeave(const Region& region, const RegionStatistics& result) + { + CV_DbgAssert(region.pImpl->endTimestamp - region.pImpl->beginTimestamp == result.duration); + bool ok = this->printf("e,%d,%lld,%lld,%lld,%lld", + (int)region.pImpl->threadID, + (long long int)region.pImpl->endTimestamp, + (long long int)(*region.pImpl->location.ppExtra)->global_location_id, + (long long int)region.pImpl->global_region_id, + (long long int)result.duration); + if (result.currentSkippedRegions) + ok &= this->printf(",skip=%d", (int)result.currentSkippedRegions); +#ifdef HAVE_IPP + if (result.durationImplIPP) + ok &= this->printf(",tIPP=%lld", (long long int)result.durationImplIPP); +#endif +#ifdef HAVE_OPENCL + if (result.durationImplOpenCL) + ok &= this->printf(",tOCL=%lld", (long long int)result.durationImplOpenCL); +#endif +#ifdef HAVE_OPENVX + if (result.durationImplOpenVX) + ok &= this->printf(",tOVX=%lld", (long long int)result.durationImplOpenVX)); +#endif + ok &= this->printf("\n"); + return ok; + } + bool recordRegionArg(const Region& region, const TraceArg& arg, const char* value) + { + return this->printf("a,%d,%lld,%lld,\"%s\",\"%s\"\n", + region.pImpl->threadID, + (long long int)region.pImpl->beginTimestamp, + (long long int)region.pImpl->global_region_id, + arg.name, + value); + } +}; + + +#ifdef OPENCV_WITH_ITT +static __itt_domain* domain = NULL; + +static bool isITTEnabled() +{ + static bool isInitialized = false; + static bool isEnabled = false; + if (!isInitialized) + { + isEnabled = !!(__itt_api_version()); + CV_LOG_ITT("ITT is " << (isEnabled ? "enabled" : "disabled")); + domain = __itt_domain_create("OpenCVTrace"); + isInitialized = true; + } + return isEnabled; +} +#endif + + +Region::LocationExtraData::LocationExtraData(const LocationStaticStorage& location) +{ + CV_UNUSED(location); + static int g_location_id_counter = 0; + global_location_id = CV_XADD(&g_location_id_counter, 1) + 1; + CV_LOG("Register location: " << global_location_id << " (" << (void*)&location << ")" + << std::endl << " file: " << location.filename + << std::endl << " line: " << location.line + << std::endl << " name: " << location.name); +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + // Caching is not required here, because there is builtin cache. + // https://software.intel.com/en-us/node/544203: + // Consecutive calls to __itt_string_handle_create with the same name return the same value. + ittHandle_name = __itt_string_handle_create(location.name); + ittHandle_filename = __itt_string_handle_create(location.filename); + } +#endif +} + +/*static*/ Region::LocationExtraData* Region::LocationExtraData::init(const Region::LocationStaticStorage& location) +{ + LocationExtraData** pLocationExtra = location.ppExtra; + CV_DbgAssert(pLocationExtra); + if (*pLocationExtra == NULL) + { + cv::AutoLock lock(cv::getInitializationMutex()); + if (*pLocationExtra == NULL) + { + *pLocationExtra = new Region::LocationExtraData(location); + TraceStorage* s = getTraceManager().trace_storage.get(); + if (s) + { + TraceMessage msg; + msg.formatlocation(location); + s->put(msg); + } + } + } + return *pLocationExtra; +} + + +Region::Impl::Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_) : + location(location_), + region(region_), + parentRegion(parentRegion_), + threadID(ctx.threadID), + global_region_id(++ctx.region_counter), + beginTimestamp(beginTimestamp_), + endTimestamp(0), + directChildrenCount(0) +#ifdef OPENCV_WITH_ITT + ,itt_id_registered(false) + ,itt_id(__itt_null) +#endif +{ + CV_DbgAssert(ctx.currentActiveRegion == parentRegion); + region.pImpl = this; + + registerRegion(ctx); + + enterRegion(ctx); +} + +Region::Impl::~Impl() +{ +#ifdef OPENCV_WITH_ITT + if (itt_id_registered) + { + CV_LOG_ITT(" Destroy ITT region: I=" << (void*)this); + __itt_id_destroy(domain, itt_id); + itt_id_registered = false; + } +#endif + region.pImpl = NULL; +} + +void Region::Impl::enterRegion(TraceManagerThreadLocal& ctx) +{ + ctx.currentActiveRegion = ®ion; + + if (location.flags & REGION_FLAG_FUNCTION) + { + if ((location.flags & REGION_FLAG_APP_CODE) == 0) + { + ctx.regionDepthOpenCV++; + } + ctx.regionDepth++; + } + + TraceStorage* s = ctx.getStorage(); + if (s) + { + TraceMessage msg; + msg.formatRegionEnter(region); + s->put(msg); + } +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + __itt_id parentID = __itt_null; + if (param_ITT_registerParentScope && parentRegion && parentRegion->pImpl && parentRegion->pImpl->itt_id_registered && (location.flags & REGION_FLAG_REGION_FORCE) == 0) + parentID = parentRegion->pImpl->itt_id; + __itt_task_begin(domain, itt_id, parentID, (*location.ppExtra)->ittHandle_name); + } +#endif +} + +void Region::Impl::leaveRegion(TraceManagerThreadLocal& ctx) +{ + int64 duration = endTimestamp - beginTimestamp; CV_UNUSED(duration); + RegionStatistics result; + ctx.stat.grab(result); + ctx.totalSkippedEvents += result.currentSkippedRegions; + CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "leaveRegion(): " << (void*)this << " " << result); +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + if (result.currentSkippedRegions) + { + __itt_metadata_add(domain, itt_id, __itt_string_handle_create("skipped trace entries"), __itt_metadata_u32, 1, &result.currentSkippedRegions); + } +#ifdef HAVE_IPP + if (result.durationImplIPP) + __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tIPP"), __itt_metadata_u64, 1, &result.durationImplIPP); +#endif +#ifdef HAVE_OPENCL + if (result.durationImplOpenCL) + __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenCL"), __itt_metadata_u64, 1, &result.durationImplOpenCL); +#endif +#ifdef HAVE_OPENVX + if (result.durationImplOpenVX) + __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenVX"), __itt_metadata_u64, 1, &result.durationImplOpenVX); +#endif + __itt_task_end(domain); + } +#endif + TraceStorage* s = ctx.getStorage(); + if (s) + { + TraceMessage msg; + msg.formatRegionLeave(region, result); + s->put(msg); + } + + if (location.flags & REGION_FLAG_FUNCTION) + { + if ((location.flags & REGION_FLAG_APP_CODE) == 0) + { + ctx.regionDepthOpenCV--; + } + ctx.regionDepth--; + } + + ctx.currentActiveRegion = parentRegion; +} + +void Region::Impl::release() +{ + delete this; +} + +void Region::Impl::registerRegion(TraceManagerThreadLocal& ctx) +{ +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + if (!itt_id_registered) + { + CV_LOG_ITT(" Register ITT region: I=" << (void*)this << " " << ctx.threadID << "-" << global_region_id); +#if 1 // workaround for some ITT backends + itt_id = __itt_id_make((void*)(intptr_t)(((int64)(ctx.threadID + 1) << 32) | global_region_id), global_region_id); +#else + itt_id = __itt_id_make((void*)(intptr_t)(ctx.threadID + 1), global_region_id); +#endif + __itt_id_create(domain, itt_id); + itt_id_registered = true; + } + } +#else + CV_UNUSED(ctx); +#endif +} + +void RegionStatisticsStatus::enableSkipMode(int depth) +{ + CV_DbgAssert(_skipDepth < 0); + CV_LOG_SKIP(NULL, "SKIP-ENABLE: depth=" << depth); + _skipDepth = depth; +} +void RegionStatisticsStatus::checkResetSkipMode(int leaveDepth) +{ + if (leaveDepth <= _skipDepth) + { + CV_LOG_SKIP(NULL, "SKIP-RESET: leaveDepth=" << leaveDepth << " skipDepth=" << _skipDepth); + _skipDepth = -1; + } +} + +Region::Region(const LocationStaticStorage& location) : + pImpl(NULL), + implFlags(0) +{ + // Checks: + // - global enable flag + // - parent region is disabled + // - children count threshold + // - region location + // - depth (opencv nested calls) + if (!TraceManager::isActivated()) + { + CV_LOG("Trace is disabled. Bailout"); + return; + } + + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region(): " << (void*)this << ": " << location.name); + + Region* parentRegion = ctx.stackTopRegion(); + const Region::LocationStaticStorage* parentLocation = ctx.stackTopLocation(); + + if (location.flags & REGION_FLAG_REGION_NEXT) + { + if (parentRegion && parentRegion->pImpl) + { + CV_DbgAssert((parentRegion->pImpl->location.flags & REGION_FLAG_FUNCTION) == 0); + parentRegion->destroy(); parentRegion->implFlags = 0; + parentRegion = ctx.stackTopRegion(); + parentLocation = ctx.stackTopLocation(); + } + } + + int parentChildren = 0; + if (parentRegion && parentRegion->pImpl) + { + if (parentLocation == NULL) + { + // parallel_for_body code path + parentChildren = CV_XADD(&parentRegion->pImpl->directChildrenCount, 1) + 1; + } + else + { + parentChildren = ++parentRegion->pImpl->directChildrenCount; + } + } + + int64 beginTimestamp = getTimestamp(); + + int currentDepth = ctx.getCurrentDepth() + 1; + switch (location.flags & REGION_FLAG_IMPL_MASK) + { +#ifdef HAVE_IPP + case REGION_FLAG_IMPL_IPP: + if (!ctx.stat_status.ignoreDepthImplIPP) + ctx.stat_status.ignoreDepthImplIPP = currentDepth; + break; +#endif +#ifdef HAVE_OPENCL + case REGION_FLAG_IMPL_OPENCL: + if (!ctx.stat_status.ignoreDepthImplOpenCL) + ctx.stat_status.ignoreDepthImplOpenCL = currentDepth; + break; +#endif +#ifdef HAVE_OPENVX + case REGION_FLAG_IMPL_OPENVX: + if (!ctx.stat_status.ignoreDepthImplOpenVX) + ctx.stat_status.ignoreDepthImplOpenVX = currentDepth; + break; +#endif + default: + break; + } + + ctx.stackPush(this, &location, beginTimestamp); + implFlags |= REGION_FLAG__NEED_STACK_POP; + + if ((location.flags & REGION_FLAG_REGION_FORCE) == 0) + { + if (ctx.stat_status._skipDepth >= 0 && currentDepth > ctx.stat_status._skipDepth) + { + CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region is disabled. Bailout"); + ctx.stat.currentSkippedRegions++; + return; + } + + if (param_maxRegionChildrenOpenCV > 0 && (location.flags & REGION_FLAG_APP_CODE) == 0 && parentLocation && (parentLocation->flags & REGION_FLAG_APP_CODE) == 0) + { + if (parentChildren >= param_maxRegionChildrenOpenCV) + { + CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "OpenCV parent region exceeds children count. Bailout"); + ctx.stat_status.enableSkipMode(currentDepth - 1); + ctx.stat.currentSkippedRegions++; + DEBUG_ONLY(ctx.dumpStack(std::cout, false)); + return; + } + } + if (param_maxRegionChildren > 0 && parentChildren >= param_maxRegionChildren) + { + CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Parent region exceeds children count. Bailout"); + ctx.stat_status.enableSkipMode(currentDepth - 1); + ctx.stat.currentSkippedRegions++; + DEBUG_ONLY(ctx.dumpStack(std::cout, false)); + return; + } + } + + LocationExtraData::init(location); + + if ((*location.ppExtra)->global_location_id == 0) + { + CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Region location is disabled. Bailout"); + ctx.stat_status.enableSkipMode(currentDepth); + ctx.stat.currentSkippedRegions++; + return; + } + + if (parentLocation && (parentLocation->flags & REGION_FLAG_SKIP_NESTED)) + { + CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region disables inner regions. Bailout"); + ctx.stat_status.enableSkipMode(currentDepth); + ctx.stat.currentSkippedRegions++; + return; + } + + if (param_maxRegionDepthOpenCV) + { + if ((location.flags & REGION_FLAG_APP_CODE) == 0) + { + if (ctx.regionDepthOpenCV >= param_maxRegionDepthOpenCV) + { + CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "OpenCV region depth is exceed = " << ctx.regionDepthOpenCV << ". Bailout"); + if (ctx.stat.currentSkippedRegions == 0) + { + DEBUG_ONLY(ctx.dumpStack(std::cout, false)); + } + ctx.stat_status.enableSkipMode(currentDepth); + ctx.stat.currentSkippedRegions++; + return; + } + } + } + + new Impl(ctx, parentRegion, *this, location, beginTimestamp); + CV_DbgAssert(pImpl != NULL); + implFlags |= REGION_FLAG__ACTIVE; + + // parallel_for path + if (parentRegion && parentRegion->pImpl) + { + if (parentLocation == NULL) + { + pImpl->directChildrenCount = parentChildren; + } + } +} + +void Region::destroy() +{ + CV_DbgAssert(implFlags != 0); + + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region::destruct(): " << (void*)this << " pImpl=" << pImpl << " implFlags=" << implFlags << ' ' << (ctx.stackTopLocation() ? ctx.stackTopLocation()->name : "")); + + CV_DbgAssert(implFlags & REGION_FLAG__NEED_STACK_POP); + const int currentDepth = ctx.getCurrentDepth(); CV_UNUSED(currentDepth); + + CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << ctx.stat << ' ' << ctx.stat_status); + + const Region::LocationStaticStorage* location = ctx.stackTopLocation(); + Impl::OptimizationPath myCodePath = Impl::CODE_PATH_PLAIN; + if (location) + { + switch (location->flags & REGION_FLAG_IMPL_MASK) + { +#ifdef HAVE_IPP + case REGION_FLAG_IMPL_IPP: + myCodePath = Impl::CODE_PATH_IPP; + break; +#endif +#ifdef HAVE_OPENCL + case REGION_FLAG_IMPL_OPENCL: + if (param_synchronizeOpenCL && cv::ocl::useOpenCL()) + cv::ocl::finish(); + myCodePath = Impl::CODE_PATH_OPENCL; + break; +#endif +#ifdef HAVE_OPENVX + case REGION_FLAG_IMPL_OPENVX: + myCodePath = Impl::CODE_PATH_OPENVX; + break; +#endif + default: + break; + } + } + + int64 endTimestamp = getTimestamp(); + int64 duration = endTimestamp - ctx.stackTopBeginTimestamp(); + + bool active = isActive(); + + if (active) + ctx.stat.duration = duration; + else if (ctx.stack.size() == ctx.parallel_for_stack_size + 1) + ctx.stat.duration += duration; + + switch (myCodePath) { + case Impl::CODE_PATH_PLAIN: + // nothing + break; +#ifdef HAVE_IPP + case Impl::CODE_PATH_IPP: + if (ctx.stat_status.ignoreDepthImplIPP == currentDepth) + { + ctx.stat.durationImplIPP += duration; + ctx.stat_status.ignoreDepthImplIPP = 0; + } + else if (active) + { + ctx.stat.durationImplIPP = duration; + } + break; +#endif +#ifdef HAVE_OPENCL + case Impl::CODE_PATH_OPENCL: + if (ctx.stat_status.ignoreDepthImplOpenCL == currentDepth) + { + ctx.stat.durationImplOpenCL += duration; + ctx.stat_status.ignoreDepthImplOpenCL = 0; + } + else if (active) + { + ctx.stat.durationImplOpenCL = duration; + } + break; +#endif +#ifdef HAVE_OPENVX + case Impl::CODE_PATH_OPENVX: + if (ctx.stat_status.ignoreDepthImplOpenVX == currentDepth) + { + ctx.stat.durationImplOpenVX += duration; + ctx.stat_status.ignoreDepthImplOpenVX = 0; + } + else if (active) + { + ctx.stat.durationImplOpenVX = duration; + } + break; +#endif + default: + break; + } + + if (pImpl) + { + CV_DbgAssert((implFlags & (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP)) == (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP)); + CV_DbgAssert(ctx.stackTopRegion() == this); + pImpl->endTimestamp = endTimestamp; + pImpl->leaveRegion(ctx); + pImpl->release(); + pImpl = NULL; + DEBUG_ONLY(implFlags &= ~REGION_FLAG__ACTIVE); + } + else + { + CV_DbgAssert(ctx.stat_status._skipDepth <= currentDepth); + } + + if (implFlags & REGION_FLAG__NEED_STACK_POP) + { + CV_DbgAssert(ctx.stackTopRegion() == this); + ctx.stackPop(); + ctx.stat_status.checkResetSkipMode(currentDepth); + DEBUG_ONLY(implFlags &= ~REGION_FLAG__NEED_STACK_POP); + } + CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << "===> " << ctx.stat << ' ' << ctx.stat_status); +} + + +TraceManagerThreadLocal::~TraceManagerThreadLocal() +{ +} + +void TraceManagerThreadLocal::dumpStack(std::ostream& out, bool onlyFunctions) const +{ + std::stringstream ss; + std::deque::const_iterator it = stack.begin(); + std::deque::const_iterator end = stack.end(); + int depth = 0; + for (; it != end; ++it) + { + const Region::LocationStaticStorage* location = it->location; + if (location) + { + if (!onlyFunctions || (location->flags & REGION_FLAG_FUNCTION)) + { + ss << _spaces(4*depth) << location->name << std::endl; + depth++; + } + } + else + { + ss << _spaces(4*depth) << "" << std::endl; + depth++; + } + } + out << ss.str(); +} + +class AsyncTraceStorage : public TraceStorage +{ + mutable std::ofstream out; +public: + const std::string name; + + AsyncTraceStorage(const std::string& filename) : + out(filename.c_str(), std::ios::trunc), + name(filename) + { + out << "#description: OpenCV trace file" << std::endl; + out << "#version: 1.0" << std::endl; + } + ~AsyncTraceStorage() + { + out.close(); + } + + bool put(const TraceMessage& msg) const + { + if (msg.hasError) + return false; + out << msg.buffer; + //DEBUG_ONLY(std::flush(out)); // TODO configure flag + return true; + } +}; + +class SyncTraceStorage : public TraceStorage +{ + mutable std::ofstream out; + mutable cv::Mutex mutex; +public: + const std::string name; + + SyncTraceStorage(const std::string& filename) : + out(filename.c_str(), std::ios::trunc), + name(filename) + { + out << "#description: OpenCV trace file" << std::endl; + out << "#version: 1.0" << std::endl; + } + ~SyncTraceStorage() + { + cv::AutoLock l(mutex); + out.close(); + } + + bool put(const TraceMessage& msg) const + { + if (msg.hasError) + return false; + { + cv::AutoLock l(mutex); + out << msg.buffer; + std::flush(out); // TODO configure flag + } + return true; + } +}; + + +TraceStorage* TraceManagerThreadLocal::getStorage() const +{ + // TODO configuration option for stdout/single trace file + if (storage.empty()) + { + TraceStorage* global = getTraceManager().trace_storage.get(); + if (global) + { + const std::string filepath = cv::format("%s-%03d.txt", param_traceLocation.c_str(), threadID).c_str(); + TraceMessage msg; + const char* pos = strrchr(filepath.c_str(), '/'); // extract filename +#ifdef _WIN32 + if (!pos) + strrchr(filepath.c_str(), '\\'); +#endif + if (!pos) + pos = filepath.c_str(); + msg.printf("#thread file: %s\n", pos); + global->put(msg); + storage.reset(new AsyncTraceStorage(filepath)); + } + } + return storage.get(); +} + + + +static bool activated = false; +static bool isInitialized = false; + +TraceManager::TraceManager() +{ + g_zero_timestamp = cv::getTickCount(); + + isInitialized = true; + CV_LOG("TraceManager ctor: " << (void*)this); + + CV_LOG("TraceManager configure()"); + activated = param_traceEnable; + + if (activated) + trace_storage.reset(new SyncTraceStorage(std::string(param_traceLocation) + ".txt")); + +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + activated = true; // force trace pipeline activation (without OpenCV storage) + __itt_region_begin(domain, __itt_null, __itt_null, __itt_string_handle_create("OpenCVTrace")); + } +#endif +} +TraceManager::~TraceManager() +{ + CV_LOG("TraceManager dtor: " << (void*)this); + +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + __itt_region_end(domain, __itt_null); + } +#endif + + std::vector threads_ctx; + tls.gather(threads_ctx); + size_t totalEvents = 0, totalSkippedEvents = 0; + for (size_t i = 0; i < threads_ctx.size(); i++) + { + TraceManagerThreadLocal* ctx = threads_ctx[i]; + if (ctx) + { + totalEvents += ctx->region_counter; + totalSkippedEvents += ctx->totalSkippedEvents; + } + } + if (totalEvents || activated) + { + CV_LOG_INFO(NULL, "Trace: Total events: " << totalEvents); + } + if (totalSkippedEvents) + { + CV_LOG_WARNING(NULL, "Trace: Total skipped events: " << totalSkippedEvents); + } + + // This is a global static object, so process starts shutdown here + // Turn off trace + cv::__termination = true; // also set in DllMain() notifications handler for DLL_PROCESS_DETACH + activated = false; +} + +bool TraceManager::isActivated() +{ + // Check if process starts shutdown, and set earlyExit to true + // to prevent further instrumentation processing earlier. + if (cv::__termination) + { + activated = false; + return false; + } + + if (!isInitialized) + { + TraceManager& m = getTraceManager(); + (void)m; // TODO + } + + return activated; +} + + +static TraceManager* getTraceManagerCallOnce() +{ + static TraceManager globalInstance; + return &globalInstance; +} +TraceManager& getTraceManager() +{ + CV_SINGLETON_LAZY_INIT_REF(TraceManager, getTraceManagerCallOnce()) +} + +void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx) +{ + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + + if (ctx.dummy_stack_top.region == &rootRegion) // already attached + return; + + CV_Assert(ctx.dummy_stack_top.region == NULL); + ctx.dummy_stack_top = TraceManagerThreadLocal::StackEntry(const_cast(&rootRegion), NULL, -1); + + if (&ctx == &root_ctx) + { + ctx.stat.grab(ctx.parallel_for_stat); + ctx.parallel_for_stat_status = ctx.stat_status; + ctx.parallel_for_stack_size = ctx.stack.size(); + return; + } + + CV_Assert(ctx.stack.empty()); + + ctx.currentActiveRegion = const_cast(&rootRegion); + + ctx.regionDepth = root_ctx.regionDepth; + ctx.regionDepthOpenCV = root_ctx.regionDepthOpenCV; + + ctx.parallel_for_stack_size = 0; + + ctx.stat_status.propagateFrom(root_ctx.stat_status); +} + +void parallelForAttachNestedRegion(const Region& rootRegion) +{ + CV_UNUSED(rootRegion); + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + + CV_DbgAssert(ctx.dummy_stack_top.region == &rootRegion); + + Region* region = ctx.getCurrentActiveRegion(); + CV_LOG_PARALLEL(NULL, " PARALLEL_FOR: " << (void*)region << " ==> " << &rootRegion); + if (!region) + return; + +#ifdef OPENCV_WITH_ITT + if (!rootRegion.pImpl || !rootRegion.pImpl->itt_id_registered) + return; + + if (!region->pImpl) + return; + + CV_LOG_PARALLEL(NULL, " PARALLEL_FOR ITT: " << (void*)rootRegion.pImpl->itt_id.d1 << ":" << rootRegion.pImpl->itt_id.d2 << ":" << (void*)rootRegion.pImpl->itt_id.d3 << " => " + << (void*)region->pImpl->itt_id.d1 << ":" << region->pImpl->itt_id.d2 << ":" << (void*)region->pImpl->itt_id.d3); + __itt_relation_add(domain, region->pImpl->itt_id, __itt_relation_is_child_of, rootRegion.pImpl->itt_id); +#endif +} + +void parallelForFinalize(const Region& rootRegion) +{ + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + + int64 endTimestamp = getTimestamp(); + int64 duration = endTimestamp - ctx.stackTopBeginTimestamp(); + CV_LOG_PARALLEL(NULL, "parallel_for duration: " << duration << " " << &rootRegion); + + std::vector threads_ctx; + getTraceManager().tls.gather(threads_ctx); + RegionStatistics parallel_for_stat; + int threads = 0; + for (size_t i = 0; i < threads_ctx.size(); i++) + { + TraceManagerThreadLocal* child_ctx = threads_ctx[i]; + + if (child_ctx && child_ctx->stackTopRegion() == &rootRegion) + { + CV_LOG_PARALLEL(NULL, "Thread=" << child_ctx->threadID << " " << child_ctx->stat); + threads++; + RegionStatistics child_stat; + child_ctx->stat.grab(child_stat); + parallel_for_stat.append(child_stat); + if (child_ctx != &ctx) + { + child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry(); + } + else + { + ctx.parallel_for_stat.grab(ctx.stat); + ctx.stat_status = ctx.parallel_for_stat_status; + child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry(); + } + } + } + float parallel_coeff = std::min(1.0f, duration / (float)(parallel_for_stat.duration)); + CV_LOG_PARALLEL(NULL, "parallel_coeff=" << 1.0f / parallel_coeff); + CV_LOG_PARALLEL(NULL, parallel_for_stat); + if (parallel_coeff != 1.0f) + { + parallel_for_stat.multiply(parallel_coeff); + CV_LOG_PARALLEL(NULL, parallel_for_stat); + } + parallel_for_stat.duration = 0; + ctx.stat.append(parallel_for_stat); + CV_LOG_PARALLEL(NULL, ctx.stat); +} + +struct TraceArg::ExtraData +{ +#ifdef OPENCV_WITH_ITT + // Special fields for ITT + __itt_string_handle* volatile ittHandle_name; +#endif + ExtraData(TraceManagerThreadLocal& ctx, const TraceArg& arg) + { + CV_UNUSED(ctx); CV_UNUSED(arg); +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + // Caching is not required here, because there is builtin cache. + // https://software.intel.com/en-us/node/544203: + // Consecutive calls to __itt_string_handle_create with the same name return the same value. + ittHandle_name = __itt_string_handle_create(arg.name); + } +#endif + } +}; + +static void initTraceArg(TraceManagerThreadLocal& ctx, const TraceArg& arg) +{ + TraceArg::ExtraData** pExtra = arg.ppExtra; + if (*pExtra == NULL) + { + cv::AutoLock lock(cv::getInitializationMutex()); + if (*pExtra == NULL) + { + *pExtra = new TraceArg::ExtraData(ctx, arg); + } + } +} +void traceArg(const TraceArg& arg, const char* value) +{ + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + Region* region = ctx.getCurrentActiveRegion(); + if (!region) + return; + CV_Assert(region->pImpl); + initTraceArg(ctx, arg); + if (!value) + value = ""; +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + __itt_metadata_str_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, value, strlen(value)); + } +#endif +} +void traceArg(const TraceArg& arg, int value) +{ + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + Region* region = ctx.getCurrentActiveRegion(); + if (!region) + return; + CV_Assert(region->pImpl); + initTraceArg(ctx, arg); +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, sizeof(int) == 4 ? __itt_metadata_s32 : __itt_metadata_s64, 1, &value); + } +#else + CV_UNUSED(value); +#endif +} +void traceArg(const TraceArg& arg, int64 value) +{ + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + Region* region = ctx.getCurrentActiveRegion(); + if (!region) + return; + CV_Assert(region->pImpl); + initTraceArg(ctx, arg); +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_s64, 1, &value); + } +#else + CV_UNUSED(value); +#endif +} +void traceArg(const TraceArg& arg, double value) +{ + TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef(); + Region* region = ctx.getCurrentActiveRegion(); + if (!region) + return; + CV_Assert(region->pImpl); + initTraceArg(ctx, arg); +#ifdef OPENCV_WITH_ITT + if (isITTEnabled()) + { + __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_double, 1, &value); + } +#else + CV_UNUSED(value); +#endif +} + +#else + +Region::Region(const LocationStaticStorage&) : pImpl(NULL), implFlags(0) {} +void Region::destroy() {} + +void traceArg(const TraceArg&, const char*) {} +void traceArg(const TraceArg&, int) {}; +void traceArg(const TraceArg&, int64) {}; +void traceArg(const TraceArg&, double) {}; + +#endif + +}}}} // namespace diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index 35ce8ca..24686af 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -168,46 +168,55 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) void cv::namedWindow( const String& winname, int flags ) { + CV_TRACE_FUNCTION(); cvNamedWindow( winname.c_str(), flags ); } void cv::destroyWindow( const String& winname ) { + CV_TRACE_FUNCTION(); cvDestroyWindow( winname.c_str() ); } void cv::destroyAllWindows() { + CV_TRACE_FUNCTION(); cvDestroyAllWindows(); } void cv::resizeWindow( const String& winname, int width, int height ) { + CV_TRACE_FUNCTION(); cvResizeWindow( winname.c_str(), width, height ); } void cv::moveWindow( const String& winname, int x, int y ) { + CV_TRACE_FUNCTION(); cvMoveWindow( winname.c_str(), x, y ); } void cv::setWindowProperty(const String& winname, int prop_id, double prop_value) { + CV_TRACE_FUNCTION(); cvSetWindowProperty( winname.c_str(), prop_id, prop_value); } double cv::getWindowProperty(const String& winname, int prop_id) { + CV_TRACE_FUNCTION(); return cvGetWindowProperty(winname.c_str(), prop_id); } int cv::waitKeyEx(int delay) { + CV_TRACE_FUNCTION(); return cvWaitKey(delay); } int cv::waitKey(int delay) { + CV_TRACE_FUNCTION(); int code = waitKeyEx(delay); #ifndef HAVE_WINRT static int use_legacy = -1; @@ -225,42 +234,50 @@ int cv::createTrackbar(const String& trackbarName, const String& winName, int* value, int count, TrackbarCallback callback, void* userdata) { + CV_TRACE_FUNCTION(); return cvCreateTrackbar2(trackbarName.c_str(), winName.c_str(), value, count, callback, userdata); } void cv::setTrackbarPos( const String& trackbarName, const String& winName, int value ) { + CV_TRACE_FUNCTION(); cvSetTrackbarPos(trackbarName.c_str(), winName.c_str(), value ); } void cv::setTrackbarMax(const String& trackbarName, const String& winName, int maxval) { + CV_TRACE_FUNCTION(); cvSetTrackbarMax(trackbarName.c_str(), winName.c_str(), maxval); } void cv::setTrackbarMin(const String& trackbarName, const String& winName, int minval) { + CV_TRACE_FUNCTION(); cvSetTrackbarMin(trackbarName.c_str(), winName.c_str(), minval); } int cv::getTrackbarPos( const String& trackbarName, const String& winName ) { + CV_TRACE_FUNCTION(); return cvGetTrackbarPos(trackbarName.c_str(), winName.c_str()); } void cv::setMouseCallback( const String& windowName, MouseCallback onMouse, void* param) { + CV_TRACE_FUNCTION(); cvSetMouseCallback(windowName.c_str(), onMouse, param); } int cv::getMouseWheelDelta( int flags ) { + CV_TRACE_FUNCTION(); return CV_GET_WHEEL_DELTA(flags); } int cv::startWindowThread() { + CV_TRACE_FUNCTION(); return cvStartWindowThread(); } @@ -268,16 +285,19 @@ int cv::startWindowThread() void cv::setOpenGlDrawCallback(const String& name, OpenGlDrawCallback callback, void* userdata) { + CV_TRACE_FUNCTION(); cvSetOpenGlDrawCallback(name.c_str(), callback, userdata); } void cv::setOpenGlContext(const String& windowName) { + CV_TRACE_FUNCTION(); cvSetOpenGlContext(windowName.c_str()); } void cv::updateWindow(const String& windowName) { + CV_TRACE_FUNCTION(); cvUpdateWindow(windowName.c_str()); } @@ -299,6 +319,7 @@ namespace void cv::imshow( const String& winname, InputArray _img ) { + CV_TRACE_FUNCTION(); const Size size = _img.size(); #ifndef HAVE_OPENGL CV_Assert(size.width>0 && size.height>0); @@ -355,6 +376,7 @@ void cv::imshow( const String& winname, InputArray _img ) void cv::imshow(const String& winname, const ogl::Texture2D& _tex) { + CV_TRACE_FUNCTION(); #ifndef HAVE_OPENGL (void) winname; (void) _tex; diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp index 6b15dbe..2614019 100644 --- a/modules/imgcodecs/src/loadsave.cpp +++ b/modules/imgcodecs/src/loadsave.cpp @@ -556,6 +556,8 @@ imreadmulti_(const String& filename, int flags, std::vector& mats) */ Mat imread( const String& filename, int flags ) { + CV_TRACE_FUNCTION(); + /// create the basic container Mat img; @@ -584,6 +586,8 @@ Mat imread( const String& filename, int flags ) */ bool imreadmulti(const String& filename, std::vector& mats, int flags) { + CV_TRACE_FUNCTION(); + return imreadmulti_(filename, flags, mats); } @@ -621,6 +625,8 @@ static bool imwrite_( const String& filename, const Mat& image, bool imwrite( const String& filename, InputArray _img, const std::vector& params ) { + CV_TRACE_FUNCTION(); + Mat img = _img.getMat(); return imwrite_(filename, img, params, false); } @@ -725,6 +731,8 @@ imdecode_( const Mat& buf, int flags, int hdrtype, Mat* mat=0 ) Mat imdecode( InputArray _buf, int flags ) { + CV_TRACE_FUNCTION(); + Mat buf = _buf.getMat(), img; imdecode_( buf, flags, LOAD_MAT, &img ); @@ -739,6 +747,8 @@ Mat imdecode( InputArray _buf, int flags ) Mat imdecode( InputArray _buf, int flags, Mat* dst ) { + CV_TRACE_FUNCTION(); + Mat buf = _buf.getMat(), img; dst = dst ? dst : &img; imdecode_( buf, flags, LOAD_MAT, dst ); @@ -755,6 +765,8 @@ Mat imdecode( InputArray _buf, int flags, Mat* dst ) bool imencode( const String& ext, InputArray _image, std::vector& buf, const std::vector& params ) { + CV_TRACE_FUNCTION(); + Mat image = _image.getMat(); int channels = image.channels(); diff --git a/modules/imgproc/perf/opencl/perf_imgproc.cpp b/modules/imgproc/perf/opencl/perf_imgproc.cpp index 6d9b1a0..583f12b 100644 --- a/modules/imgproc/perf/opencl/perf_imgproc.cpp +++ b/modules/imgproc/perf/opencl/perf_imgproc.cpp @@ -47,8 +47,6 @@ #include "../perf_precomp.hpp" #include "opencv2/ts/ocl_perf.hpp" -#ifdef HAVE_OPENCL - namespace cvtest { namespace ocl { @@ -318,11 +316,11 @@ OCL_PERF_TEST_P(CannyFixture, Canny, ::testing::Combine(OCL_TEST_SIZES, OCL_PERF declare.in(img).out(edges); - OCL_TEST_CYCLE() cv::Canny(img, edges, 50.0, 100.0, apertureSize, L2Grad); + PERF_SAMPLE_BEGIN(); + cv::Canny(img, edges, 50.0, 100.0, apertureSize, L2Grad); + PERF_SAMPLE_END(); SANITY_CHECK_NOTHING(); } } } // namespace cvtest::ocl - -#endif // HAVE_OPENCL diff --git a/modules/imgproc/perf/perf_canny.cpp b/modules/imgproc/perf/perf_canny.cpp index 2046556..3a25ff2 100644 --- a/modules/imgproc/perf/perf_canny.cpp +++ b/modules/imgproc/perf/perf_canny.cpp @@ -31,7 +31,9 @@ PERF_TEST_P(Img_Aperture_L2_thresholds, canny, declare.in(img).out(edges); - TEST_CYCLE() Canny(img, edges, thresh_low, thresh_high, aperture, useL2); + PERF_SAMPLE_BEGIN(); + Canny(img, edges, thresh_low, thresh_high, aperture, useL2); + PERF_SAMPLE_END(); SANITY_CHECK(edges); } diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index 9750c27..b4a4ac4 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -350,6 +350,8 @@ public: void operator()(const Range &boundaries) const { + CV_TRACE_FUNCTION(); + Mat dx, dy; AutoBuffer dxMax(0), dyMax(0); std::deque stack, borderPeaksLocal; @@ -358,6 +360,7 @@ public: short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL; uchar *_pmap; + CV_TRACE_REGION("gradient") if(needGradient) { Sobel(src.rowRange(rowStart, rowEnd), dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); @@ -369,6 +372,7 @@ public: dy = src2.rowRange(rowStart, rowEnd); } + CV_TRACE_REGION_NEXT("magnitude"); if(cn > 1) { dxMax.allocate(2 * dx.cols); @@ -740,6 +744,7 @@ public: uint pmapDiff = (uint)(((rowEnd == src.rows) ? map.datalimit : (map.data + boundaries.end * mapstep)) - pmapLower); // now track the edges (hysteresis thresholding) + CV_TRACE_REGION_NEXT("hysteresis"); while (!stack.empty()) { uchar *m = stack.back(); @@ -1035,6 +1040,7 @@ void Canny( InputArray _src, OutputArray _dst, parallel_for_(Range(0, src.rows), parallelCanny(src, map, stack, low, high, aperture_size, L2gradient), numOfThreads); + CV_TRACE_REGION("global_hysteresis"); // now track the edges (hysteresis thresholding) ptrdiff_t mapstep = map.cols; @@ -1053,6 +1059,7 @@ void Canny( InputArray _src, OutputArray _dst, if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack); } + CV_TRACE_REGION_NEXT("finalPass"); parallel_for_(Range(0, src.rows), finalPass(map, dst), src.total()/(double)(1<<16)); } @@ -1105,6 +1112,7 @@ void Canny( InputArray _dx, InputArray _dy, OutputArray _dst, parallel_for_(Range(0, dx.rows), parallelCanny(dx, dy, map, stack, low, high, L2gradient), numOfThreads); + CV_TRACE_REGION("global_hysteresis") // now track the edges (hysteresis thresholding) ptrdiff_t mapstep = map.cols; @@ -1123,6 +1131,7 @@ void Canny( InputArray _dx, InputArray _dy, OutputArray _dst, if (!m[mapstep+1]) CANNY_PUSH((m+mapstep+1), stack); } + CV_TRACE_REGION_NEXT("finalPass"); parallel_for_(Range(0, dx.rows), finalPass(map, dst), dx.total()/(double)(1<<16)); } diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index ecf8f46..6e554cf 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -271,6 +271,8 @@ public: virtual void operator()(const Range& range) const { + CV_TRACE_FUNCTION(); + const uchar* yS = src_data + static_cast(range.start) * src_step; uchar* yD = dst_data + static_cast(range.start) * dst_step; diff --git a/modules/ml/src/data.cpp b/modules/ml/src/data.cpp index 5e1b6d2..e48030f 100644 --- a/modules/ml/src/data.cpp +++ b/modules/ml/src/data.cpp @@ -1028,6 +1028,7 @@ Ptr TrainData::loadFromCSV(const String& filename, const String& varTypeSpec, char delimiter, char missch) { + CV_TRACE_FUNCTION_SKIP_NESTED(); Ptr td = makePtr(); if(!td->loadCSV(filename, headerLines, responseStartIdx, responseEndIdx, varTypeSpec, delimiter, missch)) td.release(); @@ -1038,6 +1039,7 @@ Ptr TrainData::create(InputArray samples, int layout, InputArray resp InputArray varIdx, InputArray sampleIdx, InputArray sampleWeights, InputArray varType) { + CV_TRACE_FUNCTION_SKIP_NESTED(); Ptr td = makePtr(); td->setData(samples, layout, responses, varIdx, sampleIdx, sampleWeights, varType, noArray()); return td; diff --git a/modules/ml/src/inner_functions.cpp b/modules/ml/src/inner_functions.cpp index 819d409..e9e8d4b 100644 --- a/modules/ml/src/inner_functions.cpp +++ b/modules/ml/src/inner_functions.cpp @@ -45,6 +45,7 @@ namespace cv { namespace ml { ParamGrid::ParamGrid() { minVal = maxVal = 0.; logStep = 1; } ParamGrid::ParamGrid(double _minVal, double _maxVal, double _logStep) { + CV_TRACE_FUNCTION(); minVal = std::min(_minVal, _maxVal); maxVal = std::max(_minVal, _maxVal); logStep = std::max(_logStep, 1.); @@ -60,17 +61,20 @@ int StatModel::getVarCount() const { return 0; } bool StatModel::train( const Ptr&, int ) { + CV_TRACE_FUNCTION(); CV_Error(CV_StsNotImplemented, ""); return false; } bool StatModel::train( InputArray samples, int layout, InputArray responses ) { + CV_TRACE_FUNCTION(); return train(TrainData::create(samples, layout, responses)); } float StatModel::calcError( const Ptr& data, bool testerr, OutputArray _resp ) const { + CV_TRACE_FUNCTION_SKIP_NESTED(); Mat samples = data->getSamples(); int layout = data->getLayout(); Mat sidx = testerr ? data->getTestSampleIdx() : data->getTrainSampleIdx(); @@ -119,6 +123,7 @@ float StatModel::calcError( const Ptr& data, bool testerr, OutputArra /* Calculates upper triangular matrix S, where A is a symmetrical matrix A=S'*S */ static void Cholesky( const Mat& A, Mat& S ) { + CV_TRACE_FUNCTION(); CV_Assert(A.type() == CV_32F); S = A.clone(); @@ -133,6 +138,7 @@ static void Cholesky( const Mat& A, Mat& S ) average row vector, - symmetric covariation matrix */ void randMVNormal( InputArray _mean, InputArray _cov, int nsamples, OutputArray _samples ) { + CV_TRACE_FUNCTION(); // check mean vector and covariance matrix Mat mean = _mean.getMat(), cov = _cov.getMat(); int dim = (int)mean.total(); // dimensionality diff --git a/modules/ml/src/lr.cpp b/modules/ml/src/lr.cpp index f930014..00c092d 100644 --- a/modules/ml/src/lr.cpp +++ b/modules/ml/src/lr.cpp @@ -135,6 +135,7 @@ Ptr LogisticRegression::load(const String& filepath, const S bool LogisticRegressionImpl::train(const Ptr& trainData, int) { + CV_TRACE_FUNCTION_SKIP_NESTED(); // return value bool ok = false; @@ -313,6 +314,7 @@ float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, i Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const { + CV_TRACE_FUNCTION(); Mat dest; exp(-data, dest); return 1.0/(1.0+dest); @@ -320,6 +322,7 @@ Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta) { + CV_TRACE_FUNCTION(); float llambda = 0; /*changed llambda from int to float to solve issue #7924*/ int m; int n; @@ -410,6 +413,7 @@ struct LogisticRegressionImpl_ComputeDradient_Impl : ParallelLoopBody void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient ) { + CV_TRACE_FUNCTION(); const int m = _data.rows; Mat pcal_a, pcal_b, pcal_ab; @@ -431,6 +435,7 @@ void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labe Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta) { + CV_TRACE_FUNCTION(); // implements batch gradient descent if(this->params.alpha<=0) { diff --git a/modules/ml/src/rtrees.cpp b/modules/ml/src/rtrees.cpp index fa2a239..4482188 100644 --- a/modules/ml/src/rtrees.cpp +++ b/modules/ml/src/rtrees.cpp @@ -49,6 +49,7 @@ namespace ml { ////////////////////////////////////////////////////////////////////////////////////////// RTreeParams::RTreeParams() { + CV_TRACE_FUNCTION(); calcVarImportance = false; nactiveVars = 0; termCrit = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 50, 0.1); @@ -58,6 +59,7 @@ RTreeParams::RTreeParams(bool _calcVarImportance, int _nactiveVars, TermCriteria _termCrit ) { + CV_TRACE_FUNCTION(); calcVarImportance = _calcVarImportance; nactiveVars = _nactiveVars; termCrit = _termCrit; @@ -69,6 +71,7 @@ class DTreesImplForRTrees : public DTreesImpl public: DTreesImplForRTrees() { + CV_TRACE_FUNCTION(); params.setMaxDepth(5); params.setMinSampleCount(10); params.setRegressionAccuracy(0.f); @@ -83,6 +86,7 @@ public: void clear() { + CV_TRACE_FUNCTION(); DTreesImpl::clear(); oobError = 0.; rng = RNG((uint64)-1); @@ -90,6 +94,7 @@ public: const vector& getActiveVars() { + CV_TRACE_FUNCTION(); int i, nvars = (int)allVars.size(), m = (int)activeVars.size(); for( i = 0; i < nvars; i++ ) { @@ -104,6 +109,7 @@ public: void startTraining( const Ptr& trainData, int flags ) { + CV_TRACE_FUNCTION(); DTreesImpl::startTraining(trainData, flags); int nvars = w->data->getNVars(); int i, m = rparams.nactiveVars > 0 ? rparams.nactiveVars : cvRound(std::sqrt((double)nvars)); @@ -116,6 +122,7 @@ public: void endTraining() { + CV_TRACE_FUNCTION(); DTreesImpl::endTraining(); vector a, b; std::swap(allVars, a); @@ -124,6 +131,7 @@ public: bool train( const Ptr& trainData, int flags ) { + CV_TRACE_FUNCTION(); startTraining(trainData, flags); int treeidx, ntrees = (rparams.termCrit.type & TermCriteria::COUNT) != 0 ? rparams.termCrit.maxCount : 10000; @@ -286,12 +294,14 @@ public: void writeTrainingParams( FileStorage& fs ) const { + CV_TRACE_FUNCTION(); DTreesImpl::writeTrainingParams(fs); fs << "nactive_vars" << rparams.nactiveVars; } void write( FileStorage& fs ) const { + CV_TRACE_FUNCTION(); if( roots.empty() ) CV_Error( CV_StsBadArg, "RTrees have not been trained" ); @@ -319,6 +329,7 @@ public: void readParams( const FileNode& fn ) { + CV_TRACE_FUNCTION(); DTreesImpl::readParams(fn); FileNode tparams_node = fn["training_params"]; @@ -327,6 +338,7 @@ public: void read( const FileNode& fn ) { + CV_TRACE_FUNCTION(); clear(); //int nclasses = (int)fn["nclasses"]; @@ -351,6 +363,7 @@ public: void getVotes( InputArray input, OutputArray output, int flags ) const { + CV_TRACE_FUNCTION(); CV_Assert( !roots.empty() ); int nclasses = (int)classLabels.size(), ntrees = (int)roots.size(); Mat samples = input.getMat(), results; @@ -435,6 +448,7 @@ public: bool train( const Ptr& trainData, int flags ) { + CV_TRACE_FUNCTION(); if (impl.getCVFolds() != 0) CV_Error(Error::StsBadArg, "Cross validation for RTrees is not implemented"); return impl.train(trainData, flags); @@ -442,22 +456,26 @@ public: float predict( InputArray samples, OutputArray results, int flags ) const { + CV_TRACE_FUNCTION(); return impl.predict(samples, results, flags); } void write( FileStorage& fs ) const { + CV_TRACE_FUNCTION(); impl.write(fs); } void read( const FileNode& fn ) { + CV_TRACE_FUNCTION(); impl.read(fn); } void getVotes_( InputArray samples, OutputArray results, int flags ) const { - impl.getVotes(samples, results, flags); + CV_TRACE_FUNCTION(); + impl.getVotes(samples, results, flags); } Mat getVarImportance() const { return Mat_(impl.varImportance, true); } @@ -477,17 +495,20 @@ public: Ptr RTrees::create() { + CV_TRACE_FUNCTION(); return makePtr(); } //Function needed for Python and Java wrappers Ptr RTrees::load(const String& filepath, const String& nodeName) { + CV_TRACE_FUNCTION(); return Algorithm::load(filepath, nodeName); } void RTrees::getVotes(InputArray input, OutputArray output, int flags) const { + CV_TRACE_FUNCTION(); const RTreesImpl* this_ = dynamic_cast(this); if(!this_) CV_Error(Error::StsNotImplemented, "the class is not RTreesImpl"); diff --git a/modules/ml/test/test_emknearestkmeans.cpp b/modules/ml/test/test_emknearestkmeans.cpp index 2764876..eaed926 100644 --- a/modules/ml/test/test_emknearestkmeans.cpp +++ b/modules/ml/test/test_emknearestkmeans.cpp @@ -50,6 +50,7 @@ using cv::ml::KNearest; static void defaultDistribs( Mat& means, vector& covs, int type=CV_32FC1 ) { + CV_TRACE_FUNCTION(); float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f}; float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f}; float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f}; @@ -76,6 +77,7 @@ void defaultDistribs( Mat& means, vector& covs, int type=CV_32FC1 ) static void generateData( Mat& data, Mat& labels, const vector& sizes, const Mat& _means, const vector& covs, int dataType, int labelType ) { + CV_TRACE_FUNCTION(); vector::const_iterator sit = sizes.begin(); int total = 0; for( ; sit != sizes.end(); ++sit ) @@ -226,6 +228,7 @@ protected: void CV_KMeansTest::run( int /*start_from*/ ) { + CV_TRACE_FUNCTION(); const int iters = 100; int sizesArr[] = { 5000, 7000, 8000 }; int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2]; diff --git a/modules/ml/test/test_lr.cpp b/modules/ml/test/test_lr.cpp index 125fd8b..130c155 100644 --- a/modules/ml/test/test_lr.cpp +++ b/modules/ml/test/test_lr.cpp @@ -64,6 +64,7 @@ using namespace cv::ml; static bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error) { + CV_TRACE_FUNCTION(); error = 0.0f; float accuracy = 0.0f; Mat _p_labels_temp; @@ -91,6 +92,7 @@ protected: void CV_LRTest::run( int /*start_from*/ ) { + CV_TRACE_FUNCTION(); // initialize varibles from the popular Iris Dataset string dataFileName = ts->get_data_path() + "iris.data"; Ptr tdata = TrainData::loadFromCSV(dataFileName, 0); @@ -150,6 +152,7 @@ protected: void CV_LRTest_SaveLoad::run( int /*start_from*/ ) { + CV_TRACE_FUNCTION(); int code = cvtest::TS::OK; // initialize varibles from the popular Iris Dataset diff --git a/modules/ml/test/test_mltests.cpp b/modules/ml/test/test_mltests.cpp index 7193331..5e863f6 100644 --- a/modules/ml/test/test_mltests.cpp +++ b/modules/ml/test/test_mltests.cpp @@ -51,6 +51,7 @@ CV_AMLTest::CV_AMLTest( const char* _modelName ) : CV_MLBaseTest( _modelName ) int CV_AMLTest::run_test_case( int testCaseIdx ) { + CV_TRACE_FUNCTION(); int code = cvtest::TS::OK; code = prepare_test_case( testCaseIdx ); @@ -91,6 +92,7 @@ int CV_AMLTest::run_test_case( int testCaseIdx ) int CV_AMLTest::validate_test_results( int testCaseIdx ) { + CV_TRACE_FUNCTION(); int iters; float mean, sigma; // read validation params diff --git a/modules/ml/test/test_mltests2.cpp b/modules/ml/test/test_mltests2.cpp index 15ae200..b823b84 100644 --- a/modules/ml/test/test_mltests2.cpp +++ b/modules/ml/test/test_mltests2.cpp @@ -87,6 +87,7 @@ int str_to_ann_train_method( String& str ) void ann_check_data( Ptr _data ) { + CV_TRACE_FUNCTION(); Mat values = _data->getSamples(); Mat var_idx = _data->getVarIdx(); int nvars = (int)var_idx.total(); @@ -99,6 +100,7 @@ void ann_check_data( Ptr _data ) // unroll the categorical responses to binary vectors Mat ann_get_new_responses( Ptr _data, map& cls_map ) { + CV_TRACE_FUNCTION(); Mat train_sidx = _data->getTrainSampleIdx(); int* train_sidx_ptr = train_sidx.ptr(); Mat responses = _data->getResponses(); @@ -130,6 +132,7 @@ Mat ann_get_new_responses( Ptr _data, map& cls_map ) float ann_calc_error( Ptr ann, Ptr _data, map& cls_map, int type, vector *resp_labels ) { + CV_TRACE_FUNCTION(); float err = 0; Mat samples = _data->getSamples(); Mat responses = _data->getResponses(); @@ -241,6 +244,7 @@ CV_MLBaseTest::~CV_MLBaseTest() int CV_MLBaseTest::read_params( CvFileStorage* __fs ) { + CV_TRACE_FUNCTION(); FileStorage _fs(__fs, false); if( !_fs.isOpened() ) test_case_count = -1; @@ -265,6 +269,7 @@ int CV_MLBaseTest::read_params( CvFileStorage* __fs ) void CV_MLBaseTest::run( int ) { + CV_TRACE_FUNCTION(); string filename = ts->get_data_path(); filename += get_validation_filename(); validationFS.open( filename, FileStorage::READ ); @@ -273,6 +278,7 @@ void CV_MLBaseTest::run( int ) int code = cvtest::TS::OK; for (int i = 0; i < test_case_count; i++) { + CV_TRACE_REGION("iteration"); int temp_code = run_test_case( i ); if (temp_code == cvtest::TS::OK) temp_code = validate_test_results( i ); @@ -289,6 +295,7 @@ void CV_MLBaseTest::run( int ) int CV_MLBaseTest::prepare_test_case( int test_case_idx ) { + CV_TRACE_FUNCTION(); clear(); string dataPath = ts->get_data_path(); @@ -331,6 +338,7 @@ string& CV_MLBaseTest::get_validation_filename() int CV_MLBaseTest::train( int testCaseIdx ) { + CV_TRACE_FUNCTION(); bool is_trained = false; FileNode modelParamsNode = validationFS.getFirstTopLevelNode()["validation"][modelName][dataSetNames[testCaseIdx]]["model_params"]; @@ -489,6 +497,7 @@ int CV_MLBaseTest::train( int testCaseIdx ) float CV_MLBaseTest::get_test_error( int /*testCaseIdx*/, vector *resp ) { + CV_TRACE_FUNCTION(); int type = CV_TEST_ERROR; float err = 0; Mat _resp; @@ -506,11 +515,13 @@ float CV_MLBaseTest::get_test_error( int /*testCaseIdx*/, vector *resp ) void CV_MLBaseTest::save( const char* filename ) { + CV_TRACE_FUNCTION(); model->save( filename ); } void CV_MLBaseTest::load( const char* filename ) { + CV_TRACE_FUNCTION(); if( modelName == CV_NBAYES ) model = Algorithm::load( filename ); else if( modelName == CV_KNEAREST ) diff --git a/modules/python/common.cmake b/modules/python/common.cmake index e014d0e..f1d4ba1 100644 --- a/modules/python/common.cmake +++ b/modules/python/common.cmake @@ -37,6 +37,7 @@ ocv_list_filterout(opencv_hdrs "modules/core/.*/cuda") ocv_list_filterout(opencv_hdrs "modules/cuda.*") ocv_list_filterout(opencv_hdrs "modules/cudev") ocv_list_filterout(opencv_hdrs "modules/core/.*/hal/") +ocv_list_filterout(opencv_hdrs "modules/.+/utils/.*") ocv_list_filterout(opencv_hdrs "modules/.*/detection_based_tracker.hpp") # Conditional compilation set(cv2_generated_hdrs diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp index 382a087..41a76b1 100644 --- a/modules/ts/include/opencv2/ts.hpp +++ b/modules/ts/include/opencv2/ts.hpp @@ -1,7 +1,22 @@ #ifndef OPENCV_TS_HPP #define OPENCV_TS_HPP -#include "opencv2/core/cvdef.h" +#ifndef __OPENCV_TESTS +#define __OPENCV_TESTS 1 +#endif + +#include "opencv2/opencv_modules.hpp" + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/imgcodecs.hpp" +#include "opencv2/videoio.hpp" +#include "opencv2/highgui.hpp" + +#include "opencv2/core/utility.hpp" + +#include "opencv2/core/utils/trace.hpp" + #include // for va_list #include "cvconfig.h" @@ -46,9 +61,6 @@ #define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > #define GET_PARAM(k) std::tr1::get< k >(GetParam()) -#include "opencv2/core.hpp" -#include "opencv2/core/utility.hpp" - namespace cvtest { @@ -615,6 +627,8 @@ void parseCustomOptions(int argc, char **argv); #define CV_TEST_MAIN_EX(resourcesubdir, INIT0, ...) \ int main(int argc, char **argv) \ { \ + CV_TRACE_FUNCTION(); \ + { CV_TRACE_REGION("INIT"); \ using namespace cvtest; \ TS* ts = TS::ptr(); \ ts->init(resourcesubdir); \ @@ -624,6 +638,7 @@ int main(int argc, char **argv) \ TEST_DUMP_OCL_INFO \ __CV_TEST_EXEC_ARGS(__VA_ARGS__) \ parseCustomOptions(argc, argv); \ + } \ return RUN_ALL_TESTS(); \ } diff --git a/modules/ts/include/opencv2/ts/cuda_perf.hpp b/modules/ts/include/opencv2/ts/cuda_perf.hpp index 672b9ff..3c162ad 100644 --- a/modules/ts/include/opencv2/ts/cuda_perf.hpp +++ b/modules/ts/include/opencv2/ts/cuda_perf.hpp @@ -43,12 +43,9 @@ #ifndef OPENCV_CUDA_PERF_UTILITY_HPP #define OPENCV_CUDA_PERF_UTILITY_HPP -#include "opencv2/core.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/videoio.hpp" -#include "opencv2/imgproc.hpp" +#include "opencv2/ts.hpp" + #include "opencv2/ts/ts_perf.hpp" -#include "cvconfig.h" namespace perf { diff --git a/modules/ts/include/opencv2/ts/cuda_test.hpp b/modules/ts/include/opencv2/ts/cuda_test.hpp index b107761..4100d29 100644 --- a/modules/ts/include/opencv2/ts/cuda_test.hpp +++ b/modules/ts/include/opencv2/ts/cuda_test.hpp @@ -43,14 +43,10 @@ #ifndef OPENCV_CUDA_TEST_UTILITY_HPP #define OPENCV_CUDA_TEST_UTILITY_HPP +#include "opencv2/ts.hpp" + #include -#include "cvconfig.h" -#include "opencv2/core.hpp" #include "opencv2/core/cuda.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/ts.hpp" namespace cvtest { diff --git a/modules/ts/include/opencv2/ts/ocl_perf.hpp b/modules/ts/include/opencv2/ts/ocl_perf.hpp index 58091f3..4d57aaa 100644 --- a/modules/ts/include/opencv2/ts/ocl_perf.hpp +++ b/modules/ts/include/opencv2/ts/ocl_perf.hpp @@ -42,6 +42,8 @@ #ifndef OPENCV_TS_OCL_PERF_HPP #define OPENCV_TS_OCL_PERF_HPP +#include "opencv2/ts.hpp" + #include "ocl_test.hpp" #include "ts_perf.hpp" @@ -67,7 +69,7 @@ using std::tr1::tuple; protected: \ virtual void PerfTestBody(); \ }; \ - TEST_F(OCL##_##fixture##_##name, name) { declare.strategy(OCL_PERF_STRATEGY); RunPerfTestBody(); } \ + TEST_F(OCL##_##fixture##_##name, name) { CV_TRACE_REGION("PERF_TEST: " #fixture "_" #name); declare.strategy(OCL_PERF_STRATEGY); RunPerfTestBody(); } \ void OCL##_##fixture##_##name::PerfTestBody() #define SIMPLE_PERF_TEST_P(fixture, name, params) \ @@ -79,7 +81,7 @@ using std::tr1::tuple; protected: \ virtual void PerfTestBody(); \ }; \ - TEST_P(OCL##_##fixture##_##name, name) { declare.strategy(OCL_PERF_STRATEGY); RunPerfTestBody(); } \ + TEST_P(OCL##_##fixture##_##name, name) { CV_TRACE_REGION("PERF_TEST_P: " #fixture "_" #name); declare.strategy(OCL_PERF_STRATEGY); RunPerfTestBody(); } \ INSTANTIATE_TEST_CASE_P(/*none*/, OCL##_##fixture##_##name, params); \ void OCL##_##fixture##_##name::PerfTestBody() @@ -95,17 +97,27 @@ using std::tr1::tuple; #define OCL_PERF_ENUM ::testing::Values -// TODO Replace finish call to dstUMat.wait() +//! deprecated #define OCL_TEST_CYCLE() \ for (cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) - +//! deprecated #define OCL_TEST_CYCLE_N(n) \ for (declare.iterations(n), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) - +//! deprecated #define OCL_TEST_CYCLE_MULTIRUN(runsNum) \ for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \ for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r) +#undef PERF_SAMPLE_BEGIN +#undef PERF_SAMPLE_END +#define PERF_SAMPLE_BEGIN() \ + cvtest::ocl::perf::safeFinish(); \ + for(; next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \ + { \ + CV_TRACE_REGION("iteration"); +#define PERF_SAMPLE_END() \ + } + namespace perf { diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index 8c94ede..54b33ec 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -42,8 +42,6 @@ #ifndef OPENCV_TS_OCL_TEST_HPP #define OPENCV_TS_OCL_TEST_HPP -#include "opencv2/opencv_modules.hpp" - #include "opencv2/ts.hpp" #include "opencv2/imgcodecs.hpp" diff --git a/modules/ts/include/opencv2/ts/ts_ext.hpp b/modules/ts/include/opencv2/ts/ts_ext.hpp index 05ccc63..0bdd346 100644 --- a/modules/ts/include/opencv2/ts/ts_ext.hpp +++ b/modules/ts/include/opencv2/ts/ts_ext.hpp @@ -16,8 +16,9 @@ void checkIppStatus(); cv::ipp::setIppStatus(0); \ cv::theRNG().state = cvtest::param_seed; #define CV_TEST_CLEANUP ::cvtest::checkIppStatus(); -#define CV_TEST_BODY_IMPL \ +#define CV_TEST_BODY_IMPL(name) \ { \ + CV__TRACE_APP_FUNCTION_NAME(name); \ try { \ CV_TEST_INIT \ Body(); \ @@ -53,7 +54,7 @@ void checkIppStatus(); ::testing::Test::TearDownTestCase, \ new ::testing::internal::TestFactoryImpl<\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\ - void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL \ + void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_case_name "_" #test_name ) \ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::Body() #undef TEST_F @@ -79,7 +80,7 @@ void checkIppStatus(); test_fixture::TearDownTestCase, \ new ::testing::internal::TestFactoryImpl<\ GTEST_TEST_CLASS_NAME_(test_fixture, test_name)>);\ - void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV_TEST_BODY_IMPL \ + void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_fixture "_" #test_name ) \ void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::Body() #undef TEST_P @@ -111,7 +112,7 @@ void checkIppStatus(); int GTEST_TEST_CLASS_NAME_(test_case_name, \ test_name)::gtest_registering_dummy_ = \ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \ - void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL \ + void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_case_name "_" #test_name ) \ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::Body() #endif // OPENCV_TS_EXT_HPP diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index bfa1811..5bbf191 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -1,7 +1,8 @@ #ifndef OPENCV_TS_PERF_HPP #define OPENCV_TS_PERF_HPP -#include "opencv2/core.hpp" +#include "opencv2/ts.hpp" + #include "ts_gtest.h" #include "ts_ext.hpp" @@ -537,7 +538,7 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os); protected:\ virtual void PerfTestBody();\ };\ - TEST_F(test_case_name, test_name){ RunPerfTestBody(); }\ + TEST_F(test_case_name, test_name){ CV_TRACE_REGION("PERF_TEST: " #test_case_name "_" #test_name); RunPerfTestBody(); }\ }\ void PERF_PROXY_NAMESPACE_NAME_(test_case_name, test_name)::test_case_name::PerfTestBody() @@ -575,7 +576,7 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os); protected:\ virtual void PerfTestBody();\ };\ - TEST_F(fixture, testname){ RunPerfTestBody(); }\ + TEST_F(fixture, testname){ CV_TRACE_REGION("PERF_TEST: " #fixture "_" #testname); RunPerfTestBody(); }\ }\ void PERF_PROXY_NAMESPACE_NAME_(fixture, testname)::fixture::PerfTestBody() @@ -608,7 +609,7 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os); protected:\ virtual void PerfTestBody();\ };\ - TEST_P(fixture##_##name, name /*perf*/){ RunPerfTestBody(); }\ + TEST_P(fixture##_##name, name /*perf*/){ CV_TRACE_REGION("PERF_TEST: " #fixture "_" #name); RunPerfTestBody(); }\ INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ void fixture##_##name::PerfTestBody() @@ -631,7 +632,10 @@ void dumpOpenCLDevice(); #define TEST_DUMP_OCL_INFO #endif + #define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...) \ + CV_TRACE_FUNCTION(); \ + { CV_TRACE_REGION("INIT"); \ ::perf::Regression::Init(#modulename); \ ::perf::TestBase::Init(std::vector(impls, impls + sizeof impls / sizeof *impls), \ argc, argv); \ @@ -641,6 +645,7 @@ void dumpOpenCLDevice(); ::perf::TestBase::RecordRunParameters(); \ __CV_TEST_EXEC_ARGS(__VA_ARGS__) \ TEST_DUMP_OCL_INFO \ + } \ return RUN_ALL_TESTS(); // impls must be an array, not a pointer; "plain" should always be one of the implementations @@ -657,10 +662,20 @@ int main(int argc, char **argv)\ CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\ } +//! deprecated #define TEST_CYCLE_N(n) for(declare.iterations(n); next() && startTimer(); stopTimer()) +//! deprecated #define TEST_CYCLE() for(; next() && startTimer(); stopTimer()) +//! deprecated #define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); next() && startTimer(); stopTimer()) for(int r = 0; r < runsNum; ++r) +#define PERF_SAMPLE_BEGIN() \ + for(; next() && startTimer(); stopTimer()) \ + { \ + CV_TRACE_REGION("iteration"); +#define PERF_SAMPLE_END() \ + } + namespace perf { namespace comparators diff --git a/modules/ts/misc/run.py b/modules/ts/misc/run.py index ad6a38d..0f2116e 100755 --- a/modules/ts/misc/run.py +++ b/modules/ts/misc/run.py @@ -49,6 +49,9 @@ if __name__ == "__main__": parser.add_argument("--serial", metavar="serial number", default="", help="Android: directs command to the USB device or emulator with the given serial number") parser.add_argument("--package", metavar="package", default="", help="Android: run jUnit tests for specified package") + parser.add_argument("--trace", action="store_true", default=False, help="Trace: enable OpenCV tracing") + parser.add_argument("--trace_dump", metavar="trace_dump", default=-1, help="Trace: dump highlight calls (specify max entries count, 0 - dump all)") + args, other_args = parser.parse_known_args() log.setLevel(logging.DEBUG if args.verbose else logging.INFO) diff --git a/modules/ts/misc/run_suite.py b/modules/ts/misc/run_suite.py index ca0841d..311f415 100644 --- a/modules/ts/misc/run_suite.py +++ b/modules/ts/misc/run_suite.py @@ -4,6 +4,8 @@ import datetime from run_utils import * from run_long import LONG_TESTS_DEBUG_VALGRIND, longTestFilter +timestamp = datetime.datetime.now() + class TestSuite(object): def __init__(self, options, cache): self.options = options @@ -20,7 +22,8 @@ class TestSuite(object): res.append("CUDA") return res - def getLogName(self, app, timestamp): + def getLogBaseName(self, app): + global timestamp app = self.getAlias(app) rev = self.cache.getGitVersion() if isinstance(timestamp, datetime.datetime): @@ -34,7 +37,10 @@ class TestSuite(object): lname = "_".join([p for p in pieces if p]) lname = re.sub(r'[\(\)\[\]\s,]', '_', lname) l = re.sub(r'_+', '_', lname) - return l + ".xml" + return l + + def getLogName(self, app): + return self.getLogBaseName(app) + '.xml' def listTests(self, short = False, main = False): if len(self.tests) == 0: @@ -138,10 +144,25 @@ class TestSuite(object): if isColorEnabled(args): args.append("--gtest_color=yes") cmd = self.wrapInValgrind([exe] + args) + env = {} + if not self.options.valgrind and self.options.trace: + env['OPENCV_TRACE'] = '1' + env['OPENCV_TRACE_LOCATION'] = 'OpenCVTrace-{}'.format(self.getLogBaseName(exe)) + env['OPENCV_TRACE_SYNC_OPENCL'] = '1' tempDir = TempEnvDir('OPENCV_TEMP_PATH', "__opencv_temp.") tempDir.init() log.warning("Run: %s" % " ".join(cmd)) - ret = execute(cmd, cwd = workingDir) + ret = execute(cmd, cwd = workingDir, env=env) + try: + if not self.options.valgrind and self.options.trace and int(self.options.trace_dump) >= 0: + import trace_profiler + trace = trace_profiler.Trace(env['OPENCV_TRACE_LOCATION']+'.txt') + trace.process() + trace.dump(max_entries=int(self.options.trace_dump)) + except: + import traceback + traceback.print_exc() + pass tempDir.clean() hostlogpath = os.path.join(workingDir, logfile) if os.path.isfile(hostlogpath): @@ -157,7 +178,6 @@ class TestSuite(object): args = args[:] logs = [] test_list = self.getTestList(tests, black) - date = datetime.datetime.now() if len(test_list) != 1: args = [a for a in args if not a.startswith("--gtest_output=")] ret = 0 @@ -170,7 +190,7 @@ class TestSuite(object): else: userlog = [a for a in args if a.startswith("--gtest_output=")] if len(userlog) == 0: - logname = self.getLogName(exe, date) + logname = self.getLogName(exe) more_args.append("--gtest_output=xml:" + logname) else: logname = userlog[0][userlog[0].find(":")+1:] diff --git a/modules/ts/misc/run_utils.py b/modules/ts/misc/run_utils.py index 2e9a66f..8c07763 100644 --- a/modules/ts/misc/run_utils.py +++ b/modules/ts/misc/run_utils.py @@ -25,10 +25,12 @@ class Err(Exception): def execute(cmd, silent = False, cwd = ".", env = None): try: log.debug("Run: %s", cmd) - if env: + if env is not None: for k in env: log.debug(" Environ: %s=%s", k, env[k]) - env = os.environ.update(env) + new_env = os.environ.copy() + new_env.update(env) + env = new_env if silent: return check_output(cmd, stderr = STDOUT, cwd = cwd, env = env).decode("latin-1") else: diff --git a/modules/ts/misc/trace_profiler.py b/modules/ts/misc/trace_profiler.py new file mode 100644 index 0000000..1d10890 --- /dev/null +++ b/modules/ts/misc/trace_profiler.py @@ -0,0 +1,435 @@ +from __future__ import print_function + +import os +import sys +import csv +from pprint import pprint +from collections import deque + +# trace.hpp +REGION_FLAG_IMPL_MASK = 15 << 16; +REGION_FLAG_IMPL_IPP = 1 << 16; +REGION_FLAG_IMPL_OPENCL = 2 << 16; + +DEBUG = False + +if DEBUG: + dprint = print + dpprint = pprint +else: + def dprint(args, **kwargs): + pass + def dpprint(args, **kwargs): + pass + +def tryNum(s): + if s.startswith('0x'): + try: + return int(s, 16) + except ValueError: + pass + try: + return int(s) + except ValueError: + pass + if sys.version_info[0] < 3: + try: + return long(s) + except ValueError: + pass + return s + +def formatTimestamp(t): + return "%.3f" % (t * 1e-6) + +try: + from statistics import median +except ImportError: + def median(lst): + sortedLst = sorted(lst) + lstLen = len(lst) + index = (lstLen - 1) // 2 + if (lstLen % 2): + return sortedLst[index] + else: + return (sortedLst[index] + sortedLst[index + 1]) * 0.5 + +def getCXXFunctionName(spec): + def dropParams(spec): + pos = len(spec) - 1 + depth = 0 + while pos >= 0: + if spec[pos] == ')': + depth = depth + 1 + elif spec[pos] == '(': + depth = depth - 1 + if depth == 0: + if pos == 0 or spec[pos - 1] in ['#', ':']: + res = dropParams(spec[pos+1:-1]) + return (spec[:pos] + res[0], res[1]) + return (spec[:pos], spec[pos:]) + pos = pos - 1 + return (spec, '') + + def extractName(spec): + pos = len(spec) - 1 + inName = False + while pos >= 0: + if spec[pos] == ' ': + if inName: + return spec[pos+1:] + elif spec[pos].isalnum(): + inName = True + pos = pos - 1 + return spec + + if spec.startswith('IPP') or spec.startswith('OpenCL'): + prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL') + prefix = spec[:prefix_size] + if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']: + prefix = prefix + spec[prefix_size] + prefix_size = prefix_size + 1 + begin = prefix_size + while begin < len(spec): + if spec[begin].isalnum() or spec[begin] in ['_', ':']: + break + begin = begin + 1 + if begin == len(spec): + return spec + end = begin + while end < len(spec): + if not (spec[end].isalnum() or spec[end] in ['_', ':']): + break + end = end + 1 + return prefix + spec[begin:end] + + spec = spec.replace(') const', ')') # const methods + (ret_type_name, params) = dropParams(spec) + name = extractName(ret_type_name) + if 'operator' in name: + return name + params + if name.startswith('&'): + return name[1:] + return name + +stack_size = 10 + +class Trace: + def __init__(self, filename=None): + self.tasks = {} + self.tasks_list = [] + self.locations = {} + self.threads_stack = {} + self.pending_files = deque() + if filename: + self.load(filename) + + class TraceTask: + def __init__(self, threadID, taskID, locationID, beginTimestamp): + self.threadID = threadID + self.taskID = taskID + self.locationID = locationID + self.beginTimestamp = beginTimestamp + self.endTimestamp = None + self.parentTaskID = None + self.parentThreadID = None + self.childTask = [] + self.selfTimeIPP = 0 + self.selfTimeOpenCL = 0 + self.totalTimeIPP = 0 + self.totalTimeOpenCL = 0 + + def __repr__(self): + return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format( + self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID, + self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL) + + + class TraceLocation: + def __init__(self, locationID, filename, line, name, flags): + self.locationID = locationID + self.filename = os.path.split(filename)[1] + self.line = line + self.name = getCXXFunctionName(name) + self.flags = flags + + def __str__(self): + return "{}#{}:{}".format(self.name, self.filename, self.line) + + def __repr__(self): + return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name) + + def parse_file(self, filename): + dprint("Process file: '{}'".format(filename)) + with open(filename) as infile: + for line in infile: + line = str(line).strip() + if line[0] == "#": + if line.startswith("#thread file:"): + name = str(line.split(':', 1)[1]).strip() + self.pending_files.append(os.path.join(os.path.split(filename)[0], name)) + continue + self.parse_line(line) + + def parse_line(self, line): + opts = line.split(',') + dpprint(opts) + if opts[0] == 'l': + opts = list(csv.reader([line]))[0] # process quote more + locationID = int(opts[1]) + filename = str(opts[2]) + line = int(opts[3]) + name = opts[4] + flags = tryNum(opts[5]) + self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags) + return + extra_opts = {} + for e in opts[5:]: + if not '=' in e: + continue + (k, v) = e.split('=') + extra_opts[k] = tryNum(v) + if extra_opts: + dpprint(extra_opts) + threadID = None + taskID = None + locationID = None + ts = None + if opts[0] in ['b', 'e']: + threadID = int(opts[1]) + taskID = int(opts[4]) + locationID = int(opts[3]) + ts = tryNum(opts[2]) + thread_stack = None + currentTask = (None, None) + if threadID is not None: + if not threadID in self.threads_stack: + thread_stack = deque() + self.threads_stack[threadID] = thread_stack + else: + thread_stack = self.threads_stack[threadID] + currentTask = None if not thread_stack else thread_stack[-1] + t = (threadID, taskID) + if opts[0] == 'b': + assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t]) + task = self.TraceTask(threadID, taskID, locationID, ts) + self.tasks[t] = task + self.tasks_list.append(task) + thread_stack.append((threadID, taskID)) + if currentTask: + task.parentThreadID = currentTask[0] + task.parentTaskID = currentTask[1] + if 'parentThread' in extra_opts: + task.parentThreadID = extra_opts['parentThread'] + if 'parent' in extra_opts: + task.parentTaskID = extra_opts['parent'] + if opts[0] == 'e': + task = self.tasks[t] + task.endTimestamp = ts + if 'tIPP' in extra_opts: + task.selfTimeIPP = extra_opts['tIPP'] + if 'tOCL' in extra_opts: + task.selfTimeOpenCL = extra_opts['tOCL'] + thread_stack.pop() + + def load(self, filename): + self.pending_files.append(filename) + if DEBUG: + with open(filename, 'r') as f: + print(f.read(), end='') + while self.pending_files: + self.parse_file(self.pending_files.pop()) + + def getParentTask(self, task): + return self.tasks.get((task.parentThreadID, task.parentTaskID), None) + + def process(self): + self.tasks_list.sort(key=lambda x: x.beginTimestamp) + + parallel_for_location = None + for (id, l) in self.locations.items(): + if l.name == 'parallel_for': + parallel_for_location = l.locationID + break + + for task in self.tasks_list: + try: + task.duration = task.endTimestamp - task.beginTimestamp + task.selfDuration = task.duration + except: + task.duration = None + task.selfDuration = None + task.totalTimeIPP = task.selfTimeIPP + task.totalTimeOpenCL = task.selfTimeOpenCL + + dpprint(self.tasks) + dprint("Calculate total times") + + for task in self.tasks_list: + parentTask = self.getParentTask(task) + if parentTask: + parentTask.selfDuration = parentTask.selfDuration - task.duration + parentTask.childTask.append(task) + timeIPP = task.selfTimeIPP + timeOpenCL = task.selfTimeOpenCL + while parentTask: + if parentTask.locationID == parallel_for_location: # TODO parallel_for + break + parentLocation = self.locations[parentTask.locationID] + if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP: + parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP + timeIPP = 0 + else: + parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP + if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL: + parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL + timeOpenCL = 0 + else: + parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL + parentTask = self.getParentTask(parentTask) + + dpprint(self.tasks) + dprint("Calculate total times (parallel_for)") + + for task in self.tasks_list: + if task.locationID == parallel_for_location: + task.selfDuration = 0 + childDuration = sum([t.duration for t in task.childTask]) + if task.duration == 0 or childDuration == 0: + continue + timeCoef = task.duration / float(childDuration) + childTimeIPP = sum([t.totalTimeIPP for t in task.childTask]) + childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask]) + if childTimeIPP == 0 and childTimeOpenCL == 0: + continue + timeIPP = childTimeIPP * timeCoef + timeOpenCL = childTimeOpenCL * timeCoef + parentTask = task + while parentTask: + parentLocation = self.locations[parentTask.locationID] + if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP: + parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP + timeIPP = 0 + else: + parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP + if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL: + parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL + timeOpenCL = 0 + else: + parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL + parentTask = self.getParentTask(parentTask) + + dpprint(self.tasks) + dprint("Done") + + def dump(self, max_entries): + assert isinstance(max_entries, int) + + class CallInfo(): + def __init__(self, callID): + self.callID = callID + self.totalTimes = [] + self.selfTimes = [] + self.threads = set() + self.selfTimesIPP = [] + self.selfTimesOpenCL = [] + self.totalTimesIPP = [] + self.totalTimesOpenCL = [] + + calls = {} + + for currentTask in self.tasks_list: + task = currentTask + callID = [] + for i in range(stack_size): + callID.append(task.locationID) + task = self.getParentTask(task) + if not task: + break + callID = tuple(callID) + if not callID in calls: + call = CallInfo(callID) + calls[callID] = call + else: + call = calls[callID] + call.totalTimes.append(currentTask.duration) + call.selfTimes.append(currentTask.selfDuration) + call.threads.add(currentTask.threadID) + call.selfTimesIPP.append(currentTask.selfTimeIPP) + call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL) + call.totalTimesIPP.append(currentTask.totalTimeIPP) + call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL) + + dpprint(self.tasks) + dpprint(self.locations) + dpprint(calls) + + calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()} + calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()} + calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()} + calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True) + + calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()} + calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()} + + calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()} + calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()} + + if max_entries > 0 and len(calls_sorted) > max_entries: + calls_sorted = calls_sorted[:max_entries] + + def formatPercents(p): + if p is not None: + return "{:>3d}".format(int(p*100)) + return '' + + name_width = 70 + timestamp_width = 12 + def fmtTS(): + return '{:>' + str(timestamp_width) + '}' + fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2) + fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2) + print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%")) + print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%")) + for (index, callID) in enumerate(calls_sorted): + call_self_times = calls[callID].selfTimes + loc0 = self.locations[callID[0]] + loc_array = [] # [str(callID)] + for (i, l) in enumerate(callID): + loc = self.locations[l] + loc_array.append(loc.name if i > 0 else str(loc)) + loc_str = '|'.join(loc_array) + if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...' + print(fmt.format(index + 1, loc_str, len(call_self_times), + len(calls[callID].threads), + formatTimestamp(min(call_self_times)), + formatTimestamp(max(call_self_times)), + formatTimestamp(calls_median[callID]), + formatTimestamp(sum(call_self_times)/float(len(call_self_times))), + formatTimestamp(sum(call_self_times)), + formatTimestamp(calls_self_sum_IPP[callID]), + formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None), + formatTimestamp(calls_self_sum_OpenCL[callID]), + formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None), + )) + call_total_times = calls[callID].totalTimes + print(fmt2.format("", "", "", "", + formatTimestamp(min(call_total_times)), + formatTimestamp(max(call_total_times)), + formatTimestamp(median(call_total_times)), + formatTimestamp(sum(call_total_times)/float(len(call_total_times))), + formatTimestamp(sum(call_total_times)), + formatTimestamp(calls_total_sum_IPP[callID]), + formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None), + formatTimestamp(calls_total_sum_OpenCL[callID]), + formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None), + )) + print() + +if __name__ == "__main__": + tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt' + count = int(sys.argv[2]) if len(sys.argv) > 2 else 10 + trace = Trace(tracefile) + trace.process() + trace.dump(max_entries = count) + print("OK") diff --git a/modules/ts/src/precomp.hpp b/modules/ts/src/precomp.hpp index fbb13ec..155c377 100644 --- a/modules/ts/src/precomp.hpp +++ b/modules/ts/src/precomp.hpp @@ -1,7 +1,5 @@ -#include "opencv2/core/utility.hpp" -#include "opencv2/core/private.hpp" #include "opencv2/ts.hpp" -#include "cvconfig.h" +#include "opencv2/core/private.hpp" #ifdef GTEST_LINKED_AS_SHARED_LIBRARY #error ts module should not have GTEST_LINKED_AS_SHARED_LIBRARY defined diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index b2763d4..db6de8e 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -225,6 +225,7 @@ bool BaseTest::can_do_fast_forward() void BaseTest::safe_run( int start_from ) { + CV_TRACE_FUNCTION(); read_params( ts->get_file_storage() ); ts->update_context( 0, -1, true ); ts->update_context( this, -1, true ); diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index e5a5e31..159437c 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -954,6 +954,8 @@ void TestBase::Init(int argc, const char* const argv[]) void TestBase::Init(const std::vector & availableImpls, int argc, const char* const argv[]) { + CV_TRACE_FUNCTION(); + available_impls = availableImpls; const std::string command_line_keys = @@ -1182,6 +1184,7 @@ enum PERF_STRATEGY TestBase::getCurrentModulePerformanceStrategy() int64 TestBase::_calibrate() { + CV_TRACE_FUNCTION(); class _helper : public ::perf::TestBase { public: @@ -1248,6 +1251,7 @@ void TestBase::declareArray(SizeVector& sizes, cv::InputOutputArray a, WarmUpTyp void TestBase::warmup(cv::InputOutputArray a, WarmUpType wtype) { + CV_TRACE_FUNCTION(); if (a.empty()) return; else if (a.isUMat()) @@ -1419,6 +1423,7 @@ bool TestBase::next() median_ms > perf_validation_time_threshold_ms && (grow || metrics.stddev > perf_stability_criteria * fabs(metrics.mean))) { + CV_TRACE_REGION("idle_delay"); printf("Performance is unstable, it may be a result of overheat problems\n"); printf("Idle delay for %d ms... \n", perf_validation_idle_delay_ms); #if defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64 @@ -1682,8 +1687,17 @@ void TestBase::validateMetrics() void TestBase::reportMetrics(bool toJUnitXML) { + CV_TRACE_FUNCTION(); + performance_metrics& m = calcMetrics(); + CV_TRACE_ARG_VALUE(samples, "samples", (int64)m.samples); + CV_TRACE_ARG_VALUE(outliers, "outliers", (int64)m.outliers); + CV_TRACE_ARG_VALUE(median, "mean_ms", (double)(m.mean * 1000.0f / metrics.frequency)); + CV_TRACE_ARG_VALUE(median, "median_ms", (double)(m.median * 1000.0f / metrics.frequency)); + CV_TRACE_ARG_VALUE(stddev, "stddev_ms", (double)(m.stddev * 1000.0f / metrics.frequency)); + CV_TRACE_ARG_VALUE(stddev_percents, "stddev_percents", (double)(m.stddev / (double)m.mean * 100.0f)); + if (m.terminationReason == performance_metrics::TERM_SKIP_TEST) { if (toJUnitXML) diff --git a/modules/videoio/src/cap.cpp b/modules/videoio/src/cap.cpp index b6f991b..97c3a64 100644 --- a/modules/videoio/src/cap.cpp +++ b/modules/videoio/src/cap.cpp @@ -590,28 +590,33 @@ VideoCapture::VideoCapture() VideoCapture::VideoCapture(const String& filename, int apiPreference) { + CV_TRACE_FUNCTION(); open(filename, apiPreference); } VideoCapture::VideoCapture(const String& filename) { + CV_TRACE_FUNCTION(); open(filename, CAP_ANY); } VideoCapture::VideoCapture(int index) { + CV_TRACE_FUNCTION(); open(index); } VideoCapture::~VideoCapture() { + CV_TRACE_FUNCTION(); + icap.release(); cap.release(); } bool VideoCapture::open(const String& filename, int apiPreference) { - CV_INSTRUMENT_REGION() + CV_TRACE_FUNCTION(); if (isOpened()) release(); icap = IVideoCapture_create(filename); @@ -624,14 +629,14 @@ bool VideoCapture::open(const String& filename, int apiPreference) bool VideoCapture::open(const String& filename) { - CV_INSTRUMENT_REGION() + CV_TRACE_FUNCTION(); return open(filename, CAP_ANY); } bool VideoCapture::open(int index) { - CV_INSTRUMENT_REGION() + CV_TRACE_FUNCTION(); if (isOpened()) release(); icap = IVideoCapture_create(index); @@ -642,6 +647,8 @@ bool VideoCapture::open(int index) } bool VideoCapture::open(int cameraNum, int apiPreference) { + CV_TRACE_FUNCTION(); + cameraNum = cameraNum + apiPreference; return open(cameraNum); } @@ -653,6 +660,7 @@ bool VideoCapture::isOpened() const void VideoCapture::release() { + CV_TRACE_FUNCTION(); icap.release(); cap.release(); } diff --git a/samples/cpp/application_trace.cpp b/samples/cpp/application_trace.cpp new file mode 100644 index 0000000..ddddd51 --- /dev/null +++ b/samples/cpp/application_trace.cpp @@ -0,0 +1,92 @@ +/* OpenCV Application Tracing support demo. */ +#include + +#include +#include + +using namespace cv; +using namespace std; + +static void process_frame(const cv::UMat& frame) +{ + CV_TRACE_FUNCTION(); // OpenCV Trace macro for function + + imshow("Live", frame); + + UMat gray, processed; + cv::cvtColor(frame, gray, COLOR_BGR2GRAY); + Canny(gray, processed, 32, 64, 3); + imshow("Processed", processed); +} + +int main(int argc, char** argv) +{ + CV_TRACE_FUNCTION(); + + cv::CommandLineParser parser(argc, argv, + "{help h ? | | help message}" + "{n | 100 | number of frames to process }" + "{@video | 0 | video filename or cameraID }" + ); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + VideoCapture capture; + std::string video = parser.get("@video"); + if (video.size() == 1 && isdigit(video[0])) + capture.open(parser.get("@video")); + else + capture.open(video); + int nframes = 0; + if (capture.isOpened()) + { + nframes = (int)capture.get(CAP_PROP_FRAME_COUNT); + cout << "Video " << video << + ": width=" << capture.get(CAP_PROP_FRAME_WIDTH) << + ", height=" << capture.get(CAP_PROP_FRAME_HEIGHT) << + ", nframes=" << nframes << endl; + } + else + { + cout << "Could not initialize video capturing...\n"; + return -1; + } + + int N = parser.get("n"); + if (nframes > 0 && N > nframes) + N = nframes; + + cout << "Start processing..." << endl + << "Press ESC key to terminate" << endl; + + UMat frame; + for (int i = 0; N > 0 ? (i < N) : true; i++) + { + CV_TRACE_REGION("FRAME"); // OpenCV Trace macro for named "scope" region + { + CV_TRACE_REGION("read"); + capture.read(frame); + + if (frame.empty()) + { + cerr << "Can't capture frame: " << i << std::endl; + break; + } + + // OpenCV Trace macro for NEXT named region in the same C++ scope + // Previous "read" region will be marked complete on this line. + // Use this to eliminate unnecessary curly braces. + CV_TRACE_REGION_NEXT("process"); + process_frame(frame); + + CV_TRACE_REGION_NEXT("delay"); + if (waitKey(1) == 27/*ESC*/) + break; + } + } + + return 0; +}