trace: initial support for code trace
authorAlexander Alekhin <alexander.alekhin@intel.com>
Thu, 25 May 2017 15:59:01 +0000 (18:59 +0300)
committerAlexander Alekhin <alexander.alekhin@intel.com>
Mon, 26 Jun 2017 14:07:13 +0000 (17:07 +0300)
58 files changed:
3rdparty/ittnotify/CMakeLists.txt
CMakeLists.txt
apps/CMakeLists.txt
apps/version/opencv_version.cpp
cmake/OpenCVDetectTrace.cmake [new file with mode: 0644]
cmake/OpenCVModule.cmake
cmake/templates/cvconfig.h.in
modules/calib3d/test/test_stereomatching.cpp
modules/core/CMakeLists.txt
modules/core/include/opencv2/core/cvstd.inl.hpp
modules/core/include/opencv2/core/mat.inl.hpp
modules/core/include/opencv2/core/private.hpp
modules/core/include/opencv2/core/utility.hpp
modules/core/include/opencv2/core/utils/logger.hpp [new file with mode: 0644]
modules/core/include/opencv2/core/utils/trace.hpp [new file with mode: 0644]
modules/core/include/opencv2/core/utils/trace.private.hpp [new file with mode: 0644]
modules/core/src/algorithm.cpp
modules/core/src/copy.cpp
modules/core/src/kmeans.cpp
modules/core/src/matrix.cpp
modules/core/src/ocl.cpp
modules/core/src/parallel.cpp
modules/core/src/parallel_pthreads.cpp
modules/core/src/persistence.cpp
modules/core/src/precomp.hpp
modules/core/src/system.cpp
modules/core/src/trace.cpp [new file with mode: 0644]
modules/highgui/src/window.cpp
modules/imgcodecs/src/loadsave.cpp
modules/imgproc/perf/opencl/perf_imgproc.cpp
modules/imgproc/perf/perf_canny.cpp
modules/imgproc/src/canny.cpp
modules/imgproc/src/color.cpp
modules/ml/src/data.cpp
modules/ml/src/inner_functions.cpp
modules/ml/src/lr.cpp
modules/ml/src/rtrees.cpp
modules/ml/test/test_emknearestkmeans.cpp
modules/ml/test/test_lr.cpp
modules/ml/test/test_mltests.cpp
modules/ml/test/test_mltests2.cpp
modules/python/common.cmake
modules/ts/include/opencv2/ts.hpp
modules/ts/include/opencv2/ts/cuda_perf.hpp
modules/ts/include/opencv2/ts/cuda_test.hpp
modules/ts/include/opencv2/ts/ocl_perf.hpp
modules/ts/include/opencv2/ts/ocl_test.hpp
modules/ts/include/opencv2/ts/ts_ext.hpp
modules/ts/include/opencv2/ts/ts_perf.hpp
modules/ts/misc/run.py
modules/ts/misc/run_suite.py
modules/ts/misc/run_utils.py
modules/ts/misc/trace_profiler.py [new file with mode: 0644]
modules/ts/src/precomp.hpp
modules/ts/src/ts.cpp
modules/ts/src/ts_perf.cpp
modules/videoio/src/cap.cpp
samples/cpp/application_trace.cpp [new file with mode: 0644]

index 0b2e859..a164d8a 100644 (file)
@@ -8,6 +8,13 @@ if(NOT ITT_LIBRARY)
 endif()
 project(${ITT_LIBRARY} C)
 
+if(NOT WIN32)
+  include(CheckLibraryExists)
+  if(COMMAND CHECK_LIBRARY_EXISTS)
+    CHECK_LIBRARY_EXISTS(dl dlerror "" HAVE_DL_LIBRARY)
+  endif()
+endif()
+
 ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include")
 set(ITT_INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include")
 
@@ -30,6 +37,12 @@ set(ITT_SRCS
 
 add_library(${ITT_LIBRARY} STATIC ${ITT_SRCS} ${ITT_PUBLIC_HDRS} ${ITT_PRIVATE_HDRS})
 
+if(NOT WIN32)
+  if(HAVE_DL_LIBRARY)
+    target_link_libraries(${ITT_LIBRARY} dl)
+  endif()
+endif()
+
 if(UNIX)
   if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
index d4fe1c3..1dbe5d5 100644 (file)
@@ -264,6 +264,7 @@ OCV_OPTION(WITH_MFX            "Include Intel Media SDK support"             OFF
 OCV_OPTION(WITH_GDAL           "Include GDAL Support"                        OFF  IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
 OCV_OPTION(WITH_GPHOTO2        "Include gPhoto2 library support"             ON   IF (UNIX AND NOT ANDROID) )
 OCV_OPTION(WITH_LAPACK         "Include Lapack library support"              ON   IF (NOT ANDROID AND NOT IOS) )
+OCV_OPTION(WITH_ITT            "Include Intel ITT support"                   ON   IF (NOT APPLE_FRAMEWORK) )
 
 # OpenCV build components
 # ===================================================
@@ -291,6 +292,7 @@ OCV_OPTION(BUILD_PNG                "Build libpng from source"           WIN32 O
 OCV_OPTION(BUILD_OPENEXR            "Build openexr from source"          (WIN32 OR ANDROID OR APPLE) AND NOT WINRT)
 OCV_OPTION(BUILD_TBB                "Download and build TBB from source" ANDROID )
 OCV_OPTION(BUILD_IPP_IW             "Build IPP IW from source"           NOT MINGW IF (X86_64 OR X86) AND NOT WINRT )
+OCV_OPTION(BUILD_ITT                "Build Intel ITT from source"        NOT MINGW IF (X86_64 OR X86) AND NOT WINRT AND NOT APPLE_FRAMEWORK )
 
 # OpenCV installation options
 # ===================================================
@@ -324,7 +326,7 @@ OCV_OPTION(ENABLE_BUILD_HARDENING     "Enable hardening of the resulting binarie
 OCV_OPTION(GENERATE_ABI_DESCRIPTOR    "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
 OCV_OPTION(CV_ENABLE_INTRINSICS       "Use intrinsic-based optimized code" ON )
 OCV_OPTION(CV_DISABLE_OPTIMIZATION    "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF )
-
+OCV_OPTION(CV_TRACE                   "Enable OpenCV code trace" ON)
 
 
 if(ENABLE_IMPL_COLLECTION)
@@ -733,6 +735,16 @@ if(HAVE_CUDA)
     endif()
   endforeach()
 endif()
+
+
+# ----------------------------------------------------------------------------
+# Code trace support
+# ----------------------------------------------------------------------------
+if(CV_TRACE)
+  include(cmake/OpenCVDetectTrace.cmake)
+endif()
+
+
 # ----------------------------------------------------------------------------
 # Solution folders:
 # ----------------------------------------------------------------------------
@@ -1278,6 +1290,14 @@ endif()
 status("")
 status("  Parallel framework:" TRUE THEN "${CV_PARALLEL_FRAMEWORK}" ELSE NO)
 
+if(CV_TRACE OR OPENCV_TRACE)
+  set(__msg "")
+  if(HAVE_ITT)
+    set(__msg "with Intel ITT")
+  endif()
+  status("")
+  status("  Trace: " OPENCV_TRACE THEN "YES (${__msg})" ELSE NO)
+endif()
 
 # ========================== Other third-party libraries ==========================
 status("")
index f2cdc87..ed66375 100644 (file)
@@ -1,4 +1,6 @@
 add_definitions(-D__OPENCV_BUILD=1)
+add_definitions(-D__OPENCV_APPS=1)
+
 link_libraries(${OPENCV_LINKER_LIBS})
 
 add_subdirectory(traincascade)
index 78f2810..9ad4bac 100644 (file)
@@ -5,9 +5,15 @@
 #include <iostream>
 
 #include <opencv2/core.hpp>
+#include <opencv2/core/utils/trace.hpp>
 
 int main(int argc, const char** argv)
 {
+    CV_TRACE_FUNCTION();
+    CV_TRACE_ARG(argc);
+    CV_TRACE_ARG_VALUE(argv0, "argv0", argv[0]);
+    CV_TRACE_ARG_VALUE(argv1, "argv1", argv[1]);
+
     cv::CommandLineParser parser(argc, argv,
         "{ help h usage ? |      | show this help message }"
         "{ verbose v      |      | show build configuration log }"
diff --git a/cmake/OpenCVDetectTrace.cmake b/cmake/OpenCVDetectTrace.cmake
new file mode 100644 (file)
index 0000000..07a8f64
--- /dev/null
@@ -0,0 +1,13 @@
+if(WITH_ITT)
+  if(BUILD_ITT)
+    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/ittnotify")
+    set(ITT_INCLUDE_DIR "${OpenCV_SOURCE_DIR}/3rdparty/ittnotify/include")
+    set(ITT_INCLUDE_DIRS "${ITT_INCLUDE_DIR}")
+    set(ITT_LIBRARIES "ittnotify")
+    set(HAVE_ITT 1)
+  else()
+    #TODO
+  endif()
+endif()
+
+set(OPENCV_TRACE 1)
index 9f0e24a..4bcf633 100644 (file)
@@ -683,6 +683,8 @@ macro(ocv_glob_module_sources)
        "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/*.h"
        "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/hal/*.hpp"
        "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/hal/*.h"
+       "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/utils/*.hpp"
+       "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/utils/*.h"
   )
   file(GLOB lib_hdrs_detail
        "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/detail/*.hpp"
@@ -927,7 +929,7 @@ macro(_ocv_create_module)
     if(OPENCV_MODULE_${m}_HEADERS AND ";${OPENCV_MODULES_PUBLIC};" MATCHES ";${m};")
       foreach(hdr ${OPENCV_MODULE_${m}_HEADERS})
         string(REGEX REPLACE "^.*opencv2/" "opencv2/" hdr2 "${hdr}")
-        if(NOT hdr2 MATCHES "opencv2/${m}/private.*" AND hdr2 MATCHES "^(opencv2/?.*)/[^/]+.h(..)?$" )
+        if(NOT hdr2 MATCHES "private" AND hdr2 MATCHES "^(opencv2/?.*)/[^/]+.h(..)?$" )
           install(FILES ${hdr} OPTIONAL DESTINATION "${OPENCV_INCLUDE_INSTALL_PATH}/${CMAKE_MATCH_1}" COMPONENT dev)
         endif()
       endforeach()
@@ -1158,6 +1160,8 @@ function(ocv_add_accuracy_tests)
         RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}"
       )
 
+      ocv_append_target_property(${the_target} COMPILE_DEFINITIONS "__OPENCV_TESTS=1")
+
       if(ENABLE_SOLUTION_FOLDERS)
         set_target_properties(${the_target} PROPERTIES FOLDER "tests accuracy")
       endif()
index 5c5e96e..76bb431 100644 (file)
 #define HAVE_VIDEO_OUTPUT
 #endif
 
+/* OpenCV trace utilities */
+#cmakedefine OPENCV_TRACE
+
 
 #endif // OPENCV_CVCONFIG_H_INCLUDED
index d4f20b1..3806650 100644 (file)
@@ -789,8 +789,11 @@ TEST(Calib3d_StereoSGBM_HH4, regression)
 {
     String path = cvtest::TS::ptr()->get_data_path() + "cv/stereomatching/datasets/teddy/";
     Mat leftImg = imread(path + "im2.png", 0);
+    ASSERT_FALSE(leftImg.empty());
     Mat rightImg = imread(path + "im6.png", 0);
+    ASSERT_FALSE(rightImg.empty());
     Mat testData = imread(path + "disp2_hh4.png",-1);
+    ASSERT_FALSE(testData.empty());
     Mat leftDisp;
     Mat toCheck;
     {
index 3e69dda..6de15ba 100644 (file)
@@ -21,6 +21,10 @@ if(HAVE_CUDA)
   ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wenum-compare -Wunused-function -Wshadow)
 endif()
 
+if(CV_TRACE AND HAVE_ITT AND BUILD_ITT)
+  add_definitions(-DOPENCV_WITH_ITT=1)
+endif()
+
 file(GLOB lib_cuda_hdrs        "include/opencv2/${name}/cuda/*.hpp"        "include/opencv2/${name}/cuda/*.h")
 file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "include/opencv2/${name}/cuda/detail/*.h")
 
@@ -37,9 +41,16 @@ if(ANDROID AND HAVE_CPUFEATURES)
   ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/system.cpp "HAVE_CPUFEATURES=1")
   ocv_module_include_directories(${CPUFEATURES_INCLUDE_DIRS})
 endif()
+if(ITT_INCLUDE_DIRS)
+  ocv_module_include_directories(${ITT_INCLUDE_DIRS})
+endif()
 ocv_create_module(${extra_libs})
 
-ocv_target_link_libraries(${the_module} ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" "${VA_LIBRARIES}" "${LAPACK_LIBRARIES}" "${CPUFEATURES_LIBRARIES}" "${HALIDE_LIBRARIES}")
+ocv_target_link_libraries(${the_module}
+    "${ZLIB_LIBRARIES}" "${OPENCL_LIBRARIES}" "${VA_LIBRARIES}"
+    "${LAPACK_LIBRARIES}" "${CPUFEATURES_LIBRARIES}" "${HALIDE_LIBRARIES}"
+    "${ITT_LIBRARIES}"
+)
 
 ocv_add_accuracy_tests()
 ocv_add_perf_tests()
index 874364e..c8c7ba9 100644 (file)
 
 //! @cond IGNORED
 
+#ifdef _MSC_VER
+#pragma warning( push )
+#pragma warning( disable: 4127 )
+#endif
+
 namespace cv
 {
 #ifndef OPENCV_NOSTL
@@ -233,14 +238,7 @@ template<typename _Tp, int n> static inline
 std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
 {
     out << "[";
-#ifdef _MSC_VER
-#pragma warning( push )
-#pragma warning( disable: 4127 )
-#endif
     if(Vec<_Tp, n>::depth < CV_32F)
-#ifdef _MSC_VER
-#pragma warning( pop )
-#endif
     {
         for (int i = 0; i < n - 1; ++i) {
             out << (int)vec[i] << ", ";
@@ -285,6 +283,10 @@ static inline std::ostream& operator << (std::ostream& out, const MatSize& msize
 #endif // OPENCV_NOSTL
 } // cv
 
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
 //! @endcond
 
 #endif // OPENCV_CORE_CVSTDINL_HPP
index cf3b3a0..332accf 100644 (file)
 #  error mat.inl.hpp header must be compiled as C++
 #endif
 
+#ifdef _MSC_VER
+#pragma warning( push )
+#pragma warning( disable: 4127 )
+#endif
+
 namespace cv
 {
 
@@ -3855,4 +3860,8 @@ inline UMatDataAutoLock::~UMatDataAutoLock() { u->unlock(); }
 
 } //cv
 
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
 #endif
index dbe8bb6..1028505 100644 (file)
@@ -51,6 +51,8 @@
 #include "opencv2/core.hpp"
 #include "cvconfig.h"
 
+#include <opencv2/core/utils/trace.hpp>
+
 #ifdef HAVE_EIGEN
 #  if defined __GNUC__ && defined __APPLE__
 #    pragma GCC diagnostic ignored "-Wshadow"
@@ -548,6 +550,7 @@ static struct __IppInitializer__ __ipp_initializer__;
     {                                                                       \
         if (cv::ipp::useIPP() && (condition))                               \
         {                                                                   \
+            CV__TRACE_REGION_("IPP:" #func, CV_TRACE_NS::details::REGION_FLAG_IMPL_IPP) \
             if(func)                                                        \
             {                                                               \
                 CV_IMPL_ADD(CV_IMPL_IPP);                                   \
@@ -562,23 +565,21 @@ static struct __IppInitializer__ __ipp_initializer__;
     }
 #else
 #define CV_IPP_RUN_(condition, func, ...)                                   \
-    if (cv::ipp::useIPP() && (condition) && (func))                         \
-    {                                                                       \
-        CV_IMPL_ADD(CV_IMPL_IPP);                                           \
-        return __VA_ARGS__;                                                 \
-    }
+        if (cv::ipp::useIPP() && (condition))                               \
+        {                                                                   \
+            CV__TRACE_REGION_("IPP:" #func, CV_TRACE_NS::details::REGION_FLAG_IMPL_IPP) \
+            if(func)                                                        \
+            {                                                               \
+                CV_IMPL_ADD(CV_IMPL_IPP);                                   \
+                return __VA_ARGS__;                                         \
+            }                                                               \
+        }
 #endif
-#define CV_IPP_RUN_FAST(func, ...)                                          \
-    if (cv::ipp::useIPP() && (func))                                        \
-    {                                                                       \
-        CV_IMPL_ADD(CV_IMPL_IPP);                                           \
-        return __VA_ARGS__;                                                 \
-    }
 #else
 #define CV_IPP_RUN_(condition, func, ...)
-#define CV_IPP_RUN_FAST(func, ...)
 #endif
 
+#define CV_IPP_RUN_FAST(func, ...) CV_IPP_RUN_(true, func, __VA_ARGS__)
 #define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_((condition), (func), __VA_ARGS__)
 
 
@@ -768,15 +769,15 @@ CV_EXPORTS InstrNode*   getCurrentNode();
 #else
 #define CV_INSTRUMENT_REGION_META(...)
 
-#define CV_INSTRUMENT_REGION_()
-#define CV_INSTRUMENT_REGION_NAME(...)
+#define CV_INSTRUMENT_REGION_()                            CV_TRACE_FUNCTION()
+#define CV_INSTRUMENT_REGION_NAME(...)                     CV_TRACE_REGION(__VA_ARGS__)
 #define CV_INSTRUMENT_REGION_MT_FORK()
 
-#define CV_INSTRUMENT_REGION_IPP()
+#define CV_INSTRUMENT_REGION_IPP()                         CV__TRACE_REGION_("IPP", CV_TRACE_NS::details::REGION_FLAG_IMPL_IPP)
 #define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
 #define CV_INSTRUMENT_MARK_IPP(...)
 
-#define CV_INSTRUMENT_REGION_OPENCL()
+#define CV_INSTRUMENT_REGION_OPENCL()                      CV__TRACE_REGION_("OpenCL", CV_TRACE_NS::details::REGION_FLAG_IMPL_OPENCL)
 #define CV_INSTRUMENT_REGION_OPENCL_COMPILE(...)
 #define CV_INSTRUMENT_REGION_OPENCL_RUN(...)
 #define CV_INSTRUMENT_MARK_OPENCL(...)
index 87c20f1..8a923c9 100644 (file)
@@ -641,6 +641,7 @@ public:
     inline TLSData()        {}
     inline ~TLSData()       { release();            } // Release key and delete associated data
     inline T* get() const   { return (T*)getData(); } // Get data associated with key
+    inline T& getRef() const { T* ptr = (T*)getData(); CV_Assert(ptr); return *ptr; } // Get data associated with key
 
     // Get data from all threads
     inline void gather(std::vector<T*> &data) const
@@ -1168,6 +1169,12 @@ static inline void    setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); }
 CV_EXPORTS FLAGS      getFlags();
 }
 
+namespace utils {
+
+CV_EXPORTS int getThreadID();
+
+} // namespace
+
 } //namespace cv
 
 #ifndef DISABLE_OPENCV_24_COMPATIBILITY
diff --git a/modules/core/include/opencv2/core/utils/logger.hpp b/modules/core/include/opencv2/core/utils/logger.hpp
new file mode 100644 (file)
index 0000000..d7e73de
--- /dev/null
@@ -0,0 +1,84 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_LOGGING_HPP
+#define OPENCV_LOGGING_HPP
+
+#include <iostream>
+#include <sstream>
+#include <limits.h> // INT_MAX
+
+// TODO This file contains just interface part with implementation stubs.
+
+//! @addtogroup core_logging
+// This section describes OpenCV logging utilities.
+//
+//! @{
+
+namespace utils {
+namespace logging {
+
+// Supported logging levels and their semantic
+#define CV_LOG_LEVEL_SILENT 0          //!< for using in setLogVevel() call
+#define CV_LOG_LEVEL_FATAL 1           //!< Fatal (critical) error (unrecoverable internal error)
+#define CV_LOG_LEVEL_ERROR 2           //!< Error message
+#define CV_LOG_LEVEL_WARN 3            //!< Warning message
+#define CV_LOG_LEVEL_INFO 4            //!< Info message
+#define CV_LOG_LEVEL_DEBUG 5           //!< Debug message. Disabled in the "Release" build.
+#define CV_LOG_LEVEL_VERBOSE 6         //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build.
+
+//! Supported logging levels and their semantic
+enum LogLevel {
+    LOG_LEVEL_SILENT = 0,              //!< for using in setLogVevel() call
+    LOG_LEVEL_FATAL = 1,               //!< Fatal (critical) error (unrecoverable internal error)
+    LOG_LEVEL_ERROR = 2,               //!< Error message
+    LOG_LEVEL_WARNING = 3,             //!< Warning message
+    LOG_LEVEL_INFO = 4,                //!< Info message
+    LOG_LEVEL_DEBUG = 5,               //!< Debug message. Disabled in the "Release" build.
+    LOG_LEVEL_VERBOSE = 6,             //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build.
+#ifndef CV_DOXYGEN
+    ENUM_LOG_LEVEL_FORCE_INT = INT_MAX
+#endif
+};
+
+
+/**
+ * \def CV_LOG_STRIP_LEVEL
+ *
+ * Define CV_LOG_STRIP_LEVEL=CV_LOG_LEVEL_[DEBUG|INFO|WARN|ERROR|FATAL|DISABLED] to compile out anything at that and before that logging level
+ */
+#ifndef CV_LOG_STRIP_LEVEL
+# if defined NDEBUG
+#   define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG
+# else
+#   define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE
+# endif
+#endif
+
+
+#define CV_LOG_FATAL(tag, ...)   for(;;) { std::stringstream ss; ss << "[FATAL:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cerr << ss.str(); break; }
+#define CV_LOG_ERROR(tag, ...)   for(;;) { std::stringstream ss; ss << "[ERROR:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cerr << ss.str(); break; }
+#define CV_LOG_WARNING(tag, ...) for(;;) { std::stringstream ss; ss << "[ WARN:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cout << ss.str(); break; }
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO
+#define CV_LOG_INFO(tag, ...)
+#else
+#define CV_LOG_INFO(tag, ...)    for(;;) { std::stringstream ss; ss << "[ INFO:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cout << ss.str(); break; }
+#endif
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG
+#define CV_LOG_DEBUG(tag, ...)
+#else
+#define CV_LOG_DEBUG(tag, ...)   for(;;) { std::stringstream ss; ss << "[DEBUG:" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cout << ss.str(); break; }
+#endif
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE
+#define CV_LOG_VERBOSE(tag, v, ...)
+#else
+#define CV_LOG_VERBOSE(tag, v, ...) for(;;) { std::stringstream ss; ss << "[VERB" << v << ":" << cv::utils::getThreadID() << "] " << __VA_ARGS__ << std::endl; std::cout << ss.str(); break; }
+#endif
+
+
+}} // namespace
+
+//! @}
+
+#endif // OPENCV_LOGGING_HPP
diff --git a/modules/core/include/opencv2/core/utils/trace.hpp b/modules/core/include/opencv2/core/utils/trace.hpp
new file mode 100644 (file)
index 0000000..1539fb9
--- /dev/null
@@ -0,0 +1,250 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_TRACE_HPP
+#define OPENCV_TRACE_HPP
+
+#include <opencv2/core/cvdef.h>
+
+//! @addtogroup core_logging
+// This section describes OpenCV tracing utilities.
+//
+//! @{
+
+namespace cv {
+namespace utils {
+namespace trace {
+
+//! Macro to trace function
+#define CV_TRACE_FUNCTION()
+
+#define CV_TRACE_FUNCTION_SKIP_NESTED()
+
+//! Trace code scope.
+//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "initialize".
+#define CV_TRACE_REGION(name_as_static_string_literal)
+//! mark completed of the current opened region and create new one
+//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "step1".
+#define CV_TRACE_REGION_NEXT(name_as_static_string_literal)
+
+//! Macro to trace argument value
+#define CV_TRACE_ARG(arg_id)
+
+//! Macro to trace argument value (expanded version)
+#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value)
+
+//! @cond IGNORED
+#define CV_TRACE_NS cv::utils::trace
+
+namespace details {
+
+#ifndef __OPENCV_TRACE
+# if defined __OPENCV_BUILD && !defined __OPENCV_TESTS && !defined __OPENCV_APPS
+#   define __OPENCV_TRACE 1
+# else
+#   define __OPENCV_TRACE 0
+# endif
+#endif
+
+#ifndef CV_TRACE_FILENAME
+# define CV_TRACE_FILENAME __FILE__
+#endif
+
+#ifndef CV__TRACE_FUNCTION
+# if defined _MSC_VER
+#   define CV__TRACE_FUNCTION __FUNCSIG__
+# elif defined __GNUC__
+#   define CV__TRACE_FUNCTION __PRETTY_FUNCTION__
+# else
+#   define CV__TRACE_FUNCTION "<unknown>"
+# endif
+#endif
+
+//! Thread-local instance (usually allocated on stack)
+class CV_EXPORTS Region
+{
+public:
+    struct LocationExtraData;
+    struct LocationStaticStorage
+    {
+        LocationExtraData** ppExtra;   //< implementation specific data
+        const char* name;              //< region name (function name or other custom name)
+        const char* filename;          //< source code filename
+        int line;                      //< source code line
+        int flags;                     //< flags (implementation code path: Plain, IPP, OpenCL)
+    };
+
+    Region(const LocationStaticStorage& location);
+    inline ~Region()
+    {
+        if (implFlags != 0)
+            destroy();
+        CV_DbgAssert(implFlags == 0);
+        CV_DbgAssert(pImpl == NULL);
+    }
+
+    class Impl;
+    Impl* pImpl; // NULL if current region is not active
+    int implFlags; // see RegionFlag, 0 if region is ignored
+
+    bool isActive() const { return pImpl != NULL; }
+
+    void destroy();
+private:
+    Region(const Region&); // disabled
+    Region& operator= (const Region&); // disabled
+};
+
+//! Specify region flags
+enum RegionLocationFlag {
+    REGION_FLAG_FUNCTION = (1 << 0),             //< region is function (=1) / nested named region (=0)
+    REGION_FLAG_APP_CODE = (1 << 1),             //< region is Application code (=1) / OpenCV library code (=0)
+    REGION_FLAG_SKIP_NESTED = (1 << 2),          //< avoid processing of nested regions
+
+    REGION_FLAG_IMPL_IPP = (1 << 16),            //< region is part of IPP code path
+    REGION_FLAG_IMPL_OPENCL = (2 << 16),         //< region is part of OpenCL code path
+    REGION_FLAG_IMPL_OPENVX = (3 << 16),         //< region is part of OpenVX code path
+
+    REGION_FLAG_IMPL_MASK = (15 << 16),
+
+    REGION_FLAG_REGION_FORCE = (1 << 30),
+    REGION_FLAG_REGION_NEXT = (1 << 31),         //< close previous region (see #CV_TRACE_REGION_NEXT macro)
+
+    ENUM_REGION_FLAG_FORCE_INT = INT_MAX
+};
+
+struct CV_EXPORTS TraceArg {
+public:
+    struct ExtraData;
+    ExtraData** ppExtra;
+    const char* name;
+    int flags;
+};
+/** @brief Add meta information to current region (function)
+ * See CV_TRACE_ARG macro
+ * @param arg argument information structure (global static cache)
+ * @param value argument value (can by dynamic string literal in case of string, static allocation is not required)
+ */
+CV_EXPORTS void traceArg(const TraceArg& arg, const char* value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, int value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, int64 value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, double value);
+
+#define CV__TRACE_LOCATION_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_, loc_id), __LINE__)
+#define CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_extra_, loc_id) , __LINE__)
+
+#define CV__TRACE_DEFINE_LOCATION_(loc_id, name, flags) \
+    static CV_TRACE_NS::details::Region::LocationExtraData* CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) = 0; \
+    static const CV_TRACE_NS::details::Region::LocationStaticStorage \
+        CV__TRACE_LOCATION_VARNAME(loc_id) = { &(CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id)), name, CV_TRACE_FILENAME, __LINE__, flags};
+
+#define CV__TRACE_DEFINE_LOCATION_FN(name, flags) CV__TRACE_DEFINE_LOCATION_(fn, name, (flags | CV_TRACE_NS::details::REGION_FLAG_FUNCTION))
+
+
+#define CV__TRACE_OPENCV_FUNCTION() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, 0); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_OPENCV_FUNCTION_NAME(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, 0); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION_NAME(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+
+#define CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_OPENCV_FUNCTION_NAME_SKIP_NESTED(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION_SKIP_NESTED() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED | CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+
+#define CV__TRACE_REGION_(name_as_static_string_literal, flags) \
+    CV__TRACE_DEFINE_LOCATION_(region, name_as_static_string_literal, flags); \
+    CV_TRACE_NS::details::Region CVAUX_CONCAT(__region_, __LINE__)(CV__TRACE_LOCATION_VARNAME(region));
+
+#define CV__TRACE_REGION(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, 0)
+#define CV__TRACE_REGION_NEXT(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, CV_TRACE_NS::details::REGION_FLAG_REGION_NEXT)
+
+#define CV__TRACE_ARG_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_ ## arg_id, __LINE__)
+#define CV__TRACE_ARG_EXTRA_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_extra_ ## arg_id, __LINE__)
+
+#define CV__TRACE_DEFINE_ARG_(arg_id, name, flags) \
+    static CV_TRACE_NS::details::TraceArg::ExtraData* CV__TRACE_ARG_EXTRA_VARNAME(arg_id) = 0; \
+    static const CV_TRACE_NS::details::TraceArg \
+        CV__TRACE_ARG_VARNAME(arg_id) = { &(CV__TRACE_ARG_EXTRA_VARNAME(arg_id)), name, flags };
+
+#define CV__TRACE_ARG_VALUE(arg_id, arg_name, value) \
+        CV__TRACE_DEFINE_ARG_(arg_id, arg_name, 0); \
+        CV_TRACE_NS::details::traceArg((CV__TRACE_ARG_VARNAME(arg_id)), value);
+
+#define CV__TRACE_ARG(arg_id) CV_TRACE_ARG_VALUE(arg_id, #arg_id, (arg_id))
+
+} // namespace
+
+#ifndef OPENCV_DISABLE_TRACE
+#undef CV_TRACE_FUNCTION
+#undef CV_TRACE_FUNCTION_SKIP_NESTED
+#if __OPENCV_TRACE
+#define CV_TRACE_FUNCTION CV__TRACE_OPENCV_FUNCTION
+#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED
+#else
+#define CV_TRACE_FUNCTION CV__TRACE_APP_FUNCTION
+#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_APP_FUNCTION_SKIP_NESTED
+#endif
+
+#undef CV_TRACE_REGION
+#define CV_TRACE_REGION CV__TRACE_REGION
+
+#undef CV_TRACE_REGION_NEXT
+#define CV_TRACE_REGION_NEXT CV__TRACE_REGION_NEXT
+
+#undef CV_TRACE_ARG_VALUE
+#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) \
+        if (__region_fn.isActive()) \
+        { \
+            CV__TRACE_ARG_VALUE(arg_id, arg_name, value); \
+        }
+
+#undef CV_TRACE_ARG
+#define CV_TRACE_ARG CV__TRACE_ARG
+
+#endif // OPENCV_DISABLE_TRACE
+
+#ifdef OPENCV_TRACE_VERBOSE
+#define CV_TRACE_FUNCTION_VERBOSE CV_TRACE_FUNCTION
+#define CV_TRACE_REGION_VERBOSE CV_TRACE_REGION
+#define CV_TRACE_REGION_NEXT_VERBOSE CV_TRACE_REGION_NEXT
+#define CV_TRACE_ARG_VALUE_VERBOSE CV_TRACE_ARG_VALUE
+#define CV_TRACE_ARG_VERBOSE CV_TRACE_ARG
+#else
+#define CV_TRACE_FUNCTION_VERBOSE(...)
+#define CV_TRACE_REGION_VERBOSE(...)
+#define CV_TRACE_REGION_NEXT_VERBOSE(...)
+#define CV_TRACE_ARG_VALUE_VERBOSE(...)
+#define CV_TRACE_ARG_VERBOSE(...)
+#endif
+
+//! @endcond
+
+}}} // namespace
+
+//! @}
+
+#endif // OPENCV_TRACE_HPP
diff --git a/modules/core/include/opencv2/core/utils/trace.private.hpp b/modules/core/include/opencv2/core/utils/trace.private.hpp
new file mode 100644 (file)
index 0000000..1798166
--- /dev/null
@@ -0,0 +1,419 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_TRACE_PRIVATE_HPP
+#define OPENCV_TRACE_PRIVATE_HPP
+
+#ifdef OPENCV_TRACE
+
+#include <opencv2/core/utils/logger.hpp>
+
+#include "trace.hpp"
+
+//! @cond IGNORED
+
+#include <deque>
+#include <ostream>
+
+#define INTEL_ITTNOTIFY_API_PRIVATE 1
+#ifdef OPENCV_WITH_ITT
+#include "ittnotify.h"
+#endif
+
+#ifndef DEBUG_ONLY
+#ifdef _DEBUG
+#define DEBUG_ONLY(...) __VA_ARGS__
+#else
+#define DEBUG_ONLY(...) (void)0
+#endif
+#endif
+
+#ifndef DEBUG_ONLY_
+#ifdef _DEBUG
+#define DEBUG_ONLY_(...) __VA_ARGS__
+#else
+#define DEBUG_ONLY_(...)
+#endif
+#endif
+
+
+namespace cv {
+namespace utils {
+namespace trace {
+namespace details {
+
+#define CV__TRACE_OPENCV_FUNCTION_NAME_(name, flags) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, flags); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+
+enum RegionFlag {
+    REGION_FLAG__NEED_STACK_POP = (1 << 0),
+    REGION_FLAG__ACTIVE = (1 << 1),
+
+    ENUM_REGION_FLAG_IMPL_FORCE_INT = INT_MAX
+};
+
+
+class TraceMessage;
+
+class TraceStorage {
+public:
+    TraceStorage() {}
+    virtual ~TraceStorage() {};
+
+    virtual bool put(const TraceMessage& msg) const = 0;
+};
+
+struct RegionStatistics
+{
+    int currentSkippedRegions;
+
+    int64 duration;
+#ifdef HAVE_IPP
+    int64 durationImplIPP;
+#endif
+#ifdef HAVE_OPENCL
+    int64 durationImplOpenCL;
+#endif
+#ifdef HAVE_OPENVX
+    int64 durationImplOpenVX;
+#endif
+
+    RegionStatistics() :
+        currentSkippedRegions(0),
+        duration(0)
+#ifdef HAVE_IPP
+        ,durationImplIPP(0)
+#endif
+#ifdef HAVE_OPENCL
+        ,durationImplOpenCL(0)
+#endif
+#ifdef HAVE_OPENVX
+        ,durationImplOpenVX(0)
+#endif
+    {}
+
+    void grab(RegionStatistics& result)
+    {
+        result.currentSkippedRegions = currentSkippedRegions; currentSkippedRegions = 0;
+        result.duration = duration; duration = 0;
+#ifdef HAVE_IPP
+        result.durationImplIPP = durationImplIPP; durationImplIPP = 0;
+#endif
+#ifdef HAVE_OPENCL
+        result.durationImplOpenCL = durationImplOpenCL; durationImplOpenCL = 0;
+#endif
+#ifdef HAVE_OPENVX
+        result.durationImplOpenVX = durationImplOpenVX; durationImplOpenVX = 0;
+#endif
+    }
+
+    void append(RegionStatistics& stat)
+    {
+        currentSkippedRegions += stat.currentSkippedRegions;
+        duration += stat.duration;
+#ifdef HAVE_IPP
+        durationImplIPP += stat.durationImplIPP;
+#endif
+#ifdef HAVE_OPENCL
+        durationImplOpenCL += stat.durationImplOpenCL;
+#endif
+#ifdef HAVE_OPENVX
+        durationImplOpenVX += stat.durationImplOpenVX;
+#endif
+    }
+
+    void multiply(const float c)
+    {
+        duration = (int64)(duration * c);
+#ifdef HAVE_IPP
+        durationImplIPP = (int64)(durationImplIPP * c);
+#endif
+#ifdef HAVE_OPENCL
+        durationImplOpenCL = (int64)(durationImplOpenCL * c);
+#endif
+#ifdef HAVE_OPENVX
+        durationImplOpenVX = (int64)(durationImplOpenVX * c);
+#endif
+    }
+};
+
+static inline
+std::ostream& operator<<(std::ostream& out, const RegionStatistics& stat)
+{
+    out << "skip=" << stat.currentSkippedRegions
+        << " duration=" << stat.duration
+#ifdef HAVE_IPP
+        << " durationImplIPP=" << stat.durationImplIPP
+#endif
+#ifdef HAVE_OPENCL
+        << " durationImplOpenCL=" << stat.durationImplOpenCL
+#endif
+#ifdef HAVE_OPENVX
+        << " durationImplOpenVX=" << stat.durationImplOpenVX
+#endif
+    ;
+    return out;
+}
+
+struct RegionStatisticsStatus
+{
+    int _skipDepth;
+#ifdef HAVE_IPP
+    int ignoreDepthImplIPP;
+#endif
+#ifdef HAVE_OPENCL
+    int ignoreDepthImplOpenCL;
+#endif
+#ifdef HAVE_OPENVX
+    int ignoreDepthImplOpenVX;
+#endif
+
+    RegionStatisticsStatus() { reset(); }
+
+    void reset()
+    {
+        _skipDepth = -1;
+#ifdef HAVE_IPP
+        ignoreDepthImplIPP = 0;
+#endif
+#ifdef HAVE_OPENCL
+        ignoreDepthImplOpenCL = 0;
+#endif
+#ifdef HAVE_OPENVX
+        ignoreDepthImplOpenVX = 0;
+#endif
+    }
+
+    void propagateFrom(const RegionStatisticsStatus& src)
+    {
+        _skipDepth = -1;
+        if (src._skipDepth >= 0)
+            enableSkipMode(0);
+#ifdef HAVE_IPP
+        ignoreDepthImplIPP = src.ignoreDepthImplIPP ? 1 : 0;
+#endif
+#ifdef HAVE_OPENCL
+        ignoreDepthImplOpenCL = src.ignoreDepthImplOpenCL ? 1 : 0;
+#endif
+#ifdef HAVE_OPENVX
+        ignoreDepthImplOpenVX = src.ignoreDepthImplOpenVX ? 1 : 0;
+#endif
+    }
+
+    void enableSkipMode(int depth);
+    void checkResetSkipMode(int leaveDepth);
+};
+
+static inline
+std::ostream& operator<<(std::ostream& out, const RegionStatisticsStatus& s)
+{
+    out << "ignore={";
+    if (s._skipDepth >= 0)
+        out << " SKIP=" << s._skipDepth;
+#ifdef HAVE_IPP
+    if (s.ignoreDepthImplIPP)
+        out << " IPP=" << s.ignoreDepthImplIPP;
+#endif
+#ifdef HAVE_OPENCL
+    if (s.ignoreDepthImplOpenCL)
+        out << " OpenCL=" << s.ignoreDepthImplOpenCL;
+#endif
+#ifdef HAVE_OPENVX
+    if (s.ignoreDepthImplOpenVX)
+        out << " OpenVX=" << s.ignoreDepthImplOpenVX;
+#endif
+    out << "}";
+    return out;
+}
+
+//! TraceManager for local thread
+struct TraceManagerThreadLocal
+{
+    const int threadID;
+    int region_counter;
+
+    size_t totalSkippedEvents;
+
+    Region* currentActiveRegion;
+
+    struct StackEntry
+    {
+        Region* region;
+        const Region::LocationStaticStorage* location;
+        int64 beginTimestamp;
+        StackEntry(Region* region_, const Region::LocationStaticStorage* location_, int64 beginTimestamp_) :
+            region(region_), location(location_), beginTimestamp(beginTimestamp_)
+        {}
+        StackEntry() : region(NULL), location(NULL), beginTimestamp(-1) {}
+    };
+    std::deque<StackEntry> stack;
+
+    int regionDepth;                   // functions only (no named regions)
+    int regionDepthOpenCV;             // functions from OpenCV library
+
+    RegionStatistics stat;
+    RegionStatisticsStatus stat_status;
+
+    StackEntry dummy_stack_top;        // parallel_for root region
+    RegionStatistics parallel_for_stat;
+    RegionStatisticsStatus parallel_for_stat_status;
+    size_t parallel_for_stack_size;
+
+
+    mutable cv::Ptr<TraceStorage> storage;
+
+    TraceManagerThreadLocal() :
+        threadID(cv::utils::getThreadID()),
+        region_counter(0), totalSkippedEvents(0),
+        currentActiveRegion(NULL),
+        regionDepth(0),
+        regionDepthOpenCV(0),
+        parallel_for_stack_size(0)
+    {
+    }
+
+    ~TraceManagerThreadLocal();
+
+    TraceStorage* getStorage() const;
+
+    void recordLocation(const Region::LocationStaticStorage& location);
+    void recordRegionEnter(const Region& region);
+    void recordRegionLeave(const Region& region, const RegionStatistics& result);
+    void recordRegionArg(const Region& region, const TraceArg& arg, const char& value);
+
+    inline void stackPush(Region* region, const Region::LocationStaticStorage* location, int64 beginTimestamp)
+    {
+        stack.push_back(StackEntry(region, location, beginTimestamp));
+    }
+    inline Region* stackTopRegion() const
+    {
+        if (stack.empty())
+            return dummy_stack_top.region;
+        return stack.back().region;
+    }
+    inline const Region::LocationStaticStorage* stackTopLocation() const
+    {
+        if (stack.empty())
+            return dummy_stack_top.location;
+        return stack.back().location;
+    }
+    inline int64 stackTopBeginTimestamp() const
+    {
+        if (stack.empty())
+            return dummy_stack_top.beginTimestamp;
+        return stack.back().beginTimestamp;
+    }
+    inline void stackPop()
+    {
+        CV_DbgAssert(!stack.empty());
+        stack.pop_back();
+    }
+    void dumpStack(std::ostream& out, bool onlyFunctions) const;
+
+    inline Region* getCurrentActiveRegion()
+    {
+        return currentActiveRegion;
+    }
+
+    inline int getCurrentDepth() const { return (int)stack.size(); }
+};
+
+class CV_EXPORTS TraceManager
+{
+public:
+    TraceManager();
+    ~TraceManager();
+
+    static bool isActivated();
+
+    Mutex mutexCreate;
+    Mutex mutexCount;
+
+    TLSData<TraceManagerThreadLocal> tls;
+
+    cv::Ptr<TraceStorage> trace_storage;
+private:
+    // disable copying
+    TraceManager(const TraceManager&);
+    TraceManager& operator=(const TraceManager&);
+};
+
+CV_EXPORTS TraceManager& getTraceManager();
+inline Region* getCurrentActiveRegion() { return getTraceManager().tls.get()->getCurrentActiveRegion(); }
+inline Region* getCurrentRegion() { return getTraceManager().tls.get()->stackTopRegion(); }
+
+void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx);
+void parallelForAttachNestedRegion(const Region& rootRegion);
+void parallelForFinalize(const Region& rootRegion);
+
+
+
+
+
+
+
+struct Region::LocationExtraData
+{
+    int global_location_id; // 0 - region is disabled
+#ifdef OPENCV_WITH_ITT
+    // Special fields for ITT
+    __itt_string_handle* volatile ittHandle_name;
+    __itt_string_handle* volatile ittHandle_filename;
+#endif
+    LocationExtraData(const LocationStaticStorage& location);
+
+    static Region::LocationExtraData* init(const Region::LocationStaticStorage& location);
+};
+
+class Region::Impl
+{
+public:
+    const LocationStaticStorage& location;
+
+    Region& region;
+    Region* const parentRegion;
+
+    const int threadID;
+    const int global_region_id;
+
+    const int64 beginTimestamp;
+    int64 endTimestamp;
+
+    int directChildrenCount;
+
+    enum OptimizationPath {
+        CODE_PATH_PLAIN = 0,
+        CODE_PATH_IPP,
+        CODE_PATH_OPENCL,
+        CODE_PATH_OPENVX
+    };
+
+#ifdef OPENCV_WITH_ITT
+    bool itt_id_registered;
+    __itt_id itt_id;
+#endif
+
+    Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_);
+
+    void enterRegion(TraceManagerThreadLocal& ctx);
+    void leaveRegion(TraceManagerThreadLocal& ctx);
+
+    void registerRegion(TraceManagerThreadLocal& ctx);
+
+    void release();
+protected:
+    ~Impl();
+};
+
+
+
+}}}} // namespace
+
+//! @endcond
+
+#endif
+
+#endif // OPENCV_TRACE_PRIVATE_HPP
index 4e7701a..24f4dfb 100644 (file)
@@ -47,14 +47,17 @@ namespace cv
 
 Algorithm::Algorithm()
 {
+    CV_TRACE_FUNCTION();
 }
 
 Algorithm::~Algorithm()
 {
+    CV_TRACE_FUNCTION();
 }
 
 void Algorithm::save(const String& filename) const
 {
+    CV_TRACE_FUNCTION();
     FileStorage fs(filename, FileStorage::WRITE);
     fs << getDefaultName() << "{";
     write(fs);
@@ -63,11 +66,13 @@ void Algorithm::save(const String& filename) const
 
 String Algorithm::getDefaultName() const
 {
+    CV_TRACE_FUNCTION();
     return String("my_object");
 }
 
 void Algorithm::writeFormat(FileStorage& fs) const
 {
+    CV_TRACE_FUNCTION();
     fs << "format" << (int)3;
 }
 
index ec039d4..6e41bb8 100644 (file)
@@ -907,7 +907,7 @@ Mat repeat(const Mat& src, int ny, int nx)
  */
 int cv::borderInterpolate( int p, int len, int borderType )
 {
-    CV_INSTRUMENT_REGION()
+    CV_TRACE_FUNCTION_VERBOSE();
 
     if( (unsigned)p < (unsigned)len )
         ;
index 584efcf..5439933 100644 (file)
@@ -74,6 +74,7 @@ public:
 
     void operator()( const cv::Range& range ) const
     {
+        CV_TRACE_FUNCTION();
         const int begin = range.start;
         const int end = range.end;
 
@@ -101,6 +102,7 @@ Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding
 static void generateCentersPP(const Mat& _data, Mat& _out_centers,
                               int K, RNG& rng, int trials)
 {
+    CV_TRACE_FUNCTION();
     int i, j, k, dims = _data.cols, N = _data.rows;
     const float* data = _data.ptr<float>(0);
     size_t step = _data.step/sizeof(data[0]);
index c732dab..724d54e 100644 (file)
@@ -3130,7 +3130,7 @@ void cv::hconcat(InputArray _src, OutputArray dst)
 
 void cv::vconcat(const Mat* src, size_t nsrc, OutputArray _dst)
 {
-    CV_INSTRUMENT_REGION()
+    CV_TRACE_FUNCTION_SKIP_NESTED()
 
     if( nsrc == 0 || !src )
     {
index c519ad8..7d7c24e 100644 (file)
 # endif
 #endif
 
-
-// TODO Move to some common place
-static bool getBoolParameter(const char* name, bool defaultValue)
-{
-/*
- * If your system doesn't support getenv(), define NO_GETENV to disable
- * this feature.
- */
-#ifdef NO_GETENV
-    const char* envValue = NULL;
-#else
-    const char* envValue = getenv(name);
-#endif
-    if (envValue == NULL)
-    {
-        return defaultValue;
-    }
-    cv::String value = envValue;
-    if (value == "1" || value == "True" || value == "true" || value == "TRUE")
-    {
-        return true;
-    }
-    if (value == "0" || value == "False" || value == "false" || value == "FALSE")
-    {
-        return false;
-    }
-    CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
-}
-
-
-// TODO Move to some common place
-static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
-{
-#ifdef NO_GETENV
-    const char* envValue = NULL;
-#else
-    const char* envValue = getenv(name);
-#endif
-    if (envValue == NULL)
-    {
-        return defaultValue;
-    }
-    cv::String value = envValue;
-    size_t pos = 0;
-    for (; pos < value.size(); pos++)
-    {
-        if (!isdigit(value[pos]))
-            break;
-    }
-    cv::String valueStr = value.substr(0, pos);
-    cv::String suffixStr = value.substr(pos, value.length() - pos);
-    int v = atoi(valueStr.c_str());
-    if (suffixStr.length() == 0)
-        return v;
-    else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
-        return v * 1024 * 1024;
-    else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
-        return v * 1024;
-    CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
-}
-
 #if CV_OPENCL_SHOW_SVM_LOG
 // TODO add timestamp logging
 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
@@ -159,7 +98,7 @@ static bool isRaiseError()
     static bool value = false;
     if (!initialized)
     {
-        value = getBoolParameter("OPENCV_OPENCL_RAISE_ERROR", false);
+        value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR", false);
         initialized = true;
     }
     return value;
@@ -1232,7 +1171,7 @@ static bool checkForceSVMUmatUsage()
     static bool force = false;
     if (!initialized)
     {
-        force = getBoolParameter("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
+        force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
         initialized = true;
     }
     return force;
@@ -1243,7 +1182,7 @@ static bool checkDisableSVMUMatUsage()
     static bool force = false;
     if (!initialized)
     {
-        force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
+        force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
         initialized = true;
     }
     return force;
@@ -1254,7 +1193,7 @@ static bool checkDisableSVM()
     static bool force = false;
     if (!initialized)
     {
-        force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE", false);
+        force = utils::getConfigurationParameterBool("OPENCV_OPENCL_SVM_DISABLE", false);
         initialized = true;
     }
     return force;
@@ -1285,7 +1224,7 @@ static size_t getProgramCountLimit()
     static size_t count = 0;
     if (!initialized)
     {
-        count = getConfigurationParameterForSize("OPENCV_OPENCL_PROGRAM_CACHE", 0);
+        count = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_PROGRAM_CACHE", 0);
         initialized = true;
     }
     return count;
@@ -3195,12 +3134,12 @@ public:
     {
         size_t defaultPoolSize, poolSize;
         defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
-        poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
+        poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
         bufferPool.setMaxReservedSize(poolSize);
-        poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
+        poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
         bufferPoolHostPtr.setMaxReservedSize(poolSize);
 #ifdef HAVE_OPENCL_SVM
-        poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
+        poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
         bufferPoolSVM.setMaxReservedSize(poolSize);
 #endif
 
@@ -4980,7 +4919,7 @@ bool internal::isOpenCLForced()
     static bool value = false;
     if (!initialized)
     {
-        value = getBoolParameter("OPENCV_OPENCL_FORCE", false);
+        value = utils::getConfigurationParameterBool("OPENCV_OPENCL_FORCE", false);
         initialized = true;
     }
     return value;
@@ -4992,7 +4931,7 @@ bool internal::isPerformanceCheckBypassed()
     static bool value = false;
     if (!initialized)
     {
-        value = getBoolParameter("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
+        value = utils::getConfigurationParameterBool("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
         initialized = true;
     }
     return value;
index 3bbf028..71f6b11 100644 (file)
@@ -42,6 +42,8 @@
 
 #include "precomp.hpp"
 
+#include <opencv2/core/utils/trace.private.hpp>
+
 #if defined WIN32 || defined WINCE
     #include <windows.h>
     #undef small
@@ -163,10 +165,10 @@ namespace
     }
 #endif
 
-    class ParallelLoopBodyWrapper : public cv::ParallelLoopBody
+    class ParallelLoopBodyWrapperContext
     {
     public:
-        ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) :
+        ParallelLoopBodyWrapperContext(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) :
             is_rng_used(false)
         {
 
@@ -178,11 +180,16 @@ namespace
             // propagate main thread state
             rng = cv::theRNG();
 
+#ifdef OPENCV_TRACE
+            traceRootRegion = CV_TRACE_NS::details::getCurrentRegion();
+            traceRootContext = CV_TRACE_NS::details::getTraceManager().tls.get();
+#endif
+
 #ifdef ENABLE_INSTRUMENTATION
             pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
 #endif
         }
-        ~ParallelLoopBodyWrapper()
+        ~ParallelLoopBodyWrapperContext()
         {
 #ifdef ENABLE_INSTRUMENTATION
             for(size_t i = 0; i < pThreadRoot->m_childs.size(); i++)
@@ -198,49 +205,91 @@ namespace
                 // Note: this behaviour is not equal to single-threaded mode.
                 cv::theRNG().next();
             }
+#ifdef OPENCV_TRACE
+            if (traceRootRegion)
+                CV_TRACE_NS::details::parallelForFinalize(*traceRootRegion);
+#endif
+        }
+
+        const cv::ParallelLoopBody* body;
+        cv::Range wholeRange;
+        int nstripes;
+        cv::RNG rng;
+        mutable bool is_rng_used;
+#ifdef OPENCV_TRACE
+        CV_TRACE_NS::details::Region* traceRootRegion;
+        CV_TRACE_NS::details::TraceManagerThreadLocal* traceRootContext;
+#endif
+#ifdef ENABLE_INSTRUMENTATION
+        cv::instr::InstrNode *pThreadRoot;
+#endif
+    private:
+        ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled
+        ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled
+    };
+
+    class ParallelLoopBodyWrapper : public cv::ParallelLoopBody
+    {
+    public:
+        ParallelLoopBodyWrapper(ParallelLoopBodyWrapperContext& ctx_) :
+            ctx(ctx_)
+        {
+        }
+        ~ParallelLoopBodyWrapper()
+        {
         }
         void operator()(const cv::Range& sr) const
         {
+#ifdef OPENCV_TRACE
+            // TODO CV_TRACE_NS::details::setCurrentRegion(rootRegion);
+            if (ctx.traceRootRegion && ctx.traceRootContext)
+                CV_TRACE_NS::details::parallelForSetRootRegion(*ctx.traceRootRegion, *ctx.traceRootContext);
+            CV__TRACE_OPENCV_FUNCTION_NAME("parallel_for_body");
+            if (ctx.traceRootRegion)
+                CV_TRACE_NS::details::parallelForAttachNestedRegion(*ctx.traceRootRegion);
+#endif
+
 #ifdef ENABLE_INSTRUMENTATION
             {
                 cv::instr::InstrTLSStruct *pInstrTLS = &cv::instr::getInstrumentTLSStruct();
-                pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread
+                pInstrTLS->pCurrentNode = ctx.pThreadRoot; // Initialize TLS node for thread
             }
-#endif
             CV_INSTRUMENT_REGION()
+#endif
 
             // propagate main thread state
-            cv::theRNG() = rng;
+            cv::theRNG() = ctx.rng;
 
             cv::Range r;
+            cv::Range wholeRange = ctx.wholeRange;
+            int nstripes = ctx.nstripes;
             r.start = (int)(wholeRange.start +
                             ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
             r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
                             ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
-            (*body)(r);
 
-            if (!is_rng_used && !(cv::theRNG() == rng))
-                is_rng_used = true;
+#ifdef OPENCV_TRACE
+            CV_TRACE_ARG_VALUE(range_start, "range.start", (int64)r.start);
+            CV_TRACE_ARG_VALUE(range_end, "range.end", (int64)r.end);
+#endif
+
+            (*ctx.body)(r);
+
+            if (!ctx.is_rng_used && !(cv::theRNG() == ctx.rng))
+                ctx.is_rng_used = true;
         }
-        cv::Range stripeRange() const { return cv::Range(0, nstripes); }
+        cv::Range stripeRange() const { return cv::Range(0, ctx.nstripes); }
 
     protected:
-        const cv::ParallelLoopBody* body;
-        cv::Range wholeRange;
-        int nstripes;
-        cv::RNG rng;
-        mutable bool is_rng_used;
-#ifdef ENABLE_INSTRUMENTATION
-        cv::instr::InstrNode *pThreadRoot;
-#endif
+        ParallelLoopBodyWrapperContext& ctx;
     };
 
 #if defined HAVE_TBB
     class ProxyLoopBody : public ParallelLoopBodyWrapper
     {
     public:
-        ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
-        : ParallelLoopBodyWrapper(_body, _r, _nstripes)
+        ProxyLoopBody(ParallelLoopBodyWrapperContext& ctx_)
+        : ParallelLoopBodyWrapper(ctx_)
         {}
 
         void operator ()(const tbb::blocked_range<int>& range) const
@@ -261,8 +310,8 @@ namespace
     class ProxyLoopBody : public ParallelLoopBodyWrapper
     {
     public:
-        ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
-        : ParallelLoopBodyWrapper(_body, _r, _nstripes)
+        ProxyLoopBody(ParallelLoopBodyWrapperContext& ctx)
+        : ParallelLoopBodyWrapper(ctx)
         {}
 
         void operator ()(int i) const
@@ -316,19 +365,30 @@ static SchedPtr pplScheduler;
 
 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
 {
+#ifdef OPENCV_TRACE
+    CV__TRACE_OPENCV_FUNCTION_NAME_("parallel_for", 0);
+    CV_TRACE_ARG_VALUE(range_start, "range.start", (int64)range.start);
+    CV_TRACE_ARG_VALUE(range_end, "range.end", (int64)range.end);
+    CV_TRACE_ARG_VALUE(nstripes, "nstripes", (int64)nstripes);
+#endif
+
     CV_INSTRUMENT_REGION_MT_FORK()
     if (range.empty())
         return;
 
 #ifdef CV_PARALLEL_FRAMEWORK
 
-    if(numThreads != 0)
+    static int flagNestedParallelFor = 0;
+    bool isNotNesterParallelFor = CV_XADD(&flagNestedParallelFor, 1) == 0;
+    if(numThreads != 0 && isNotNesterParallelFor)
     {
-        ProxyLoopBody pbody(body, range, nstripes);
+        ParallelLoopBodyWrapperContext ctx(body, range, nstripes);
+        ProxyLoopBody pbody(ctx);
         cv::Range stripeRange = pbody.stripeRange();
         if( stripeRange.end - stripeRange.start == 1 )
         {
             body(range);
+            flagNestedParallelFor = 0;
             return;
         }
 
@@ -384,7 +444,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
 #error You have hacked and compiling with unsupported parallel framework
 
 #endif
-
+        flagNestedParallelFor = 0;
     }
     else
 
index 4aeceeb..d17d3cb 100644 (file)
@@ -309,6 +309,8 @@ void ForThread::execute()
 
 void ForThread::thread_body()
 {
+    (void)cv::utils::getThreadID(); // notify OpenCV about new thread
+
     m_parent->m_is_work_thread.get()->value = true;
 
     pthread_mutex_lock(&m_thread_mutex);
index e7ab982..6c947e2 100644 (file)
@@ -6921,7 +6921,7 @@ FileNode FileStorage::root(int streamidx) const
 
 FileStorage& operator << (FileStorage& fs, const String& str)
 {
-    CV_INSTRUMENT_REGION()
+    CV_TRACE_REGION_VERBOSE();
 
     enum { NAME_EXPECTED = FileStorage::NAME_EXPECTED,
         VALUE_EXPECTED = FileStorage::VALUE_EXPECTED,
index b17ad2b..e752808 100644 (file)
@@ -299,6 +299,12 @@ TLSData<CoreTLSData>& getCoreTlsData();
 #define CL_RUNTIME_EXPORT
 #endif
 
+namespace utils {
+bool getConfigurationParameterBool(const char* name, bool defaultValue);
+size_t getConfigurationParameterSizeT(const char* name, size_t defaultValue);
+cv::String getConfigurationParameterString(const char* name, const char* defaultValue);
+}
+
 extern bool __termination; // skip some cleanups, because process is terminating
                            // (for example, if ExitProcess() was already called)
 
index c3317dd..db653ab 100644 (file)
@@ -44,6 +44,8 @@
 #include "precomp.hpp"
 #include <iostream>
 
+#include <opencv2/core/utils/trace.private.hpp>
+
 namespace cv {
 
 static Mutex* __initialization_mutex = NULL;
@@ -1490,6 +1492,7 @@ void TLSDataContainer::cleanup()
 
 void* TLSDataContainer::getData() const
 {
+    CV_Assert(key_ != -1 && "Can't fetch data from terminated TLS container.");
     void* pData = getTlsStorage().getData(key_); // Check if data was already allocated
     if(!pData)
     {
@@ -1534,6 +1537,99 @@ BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved)
 }
 #endif
 
+
+namespace {
+static int g_threadNum = 0;
+class ThreadID {
+public:
+    const int id;
+    ThreadID() :
+        id(CV_XADD(&g_threadNum, 1))
+    {
+#ifdef OPENCV_WITH_ITT
+        __itt_thread_set_name(cv::format("OpenCVThread-%03d", id).c_str());
+#endif
+    }
+};
+
+static TLSData<ThreadID>& getThreadIDTLS()
+{
+    CV_SINGLETON_LAZY_INIT_REF(TLSData<ThreadID>, new TLSData<ThreadID>());
+}
+
+} // namespace
+int utils::getThreadID() { return getThreadIDTLS().get()->id; }
+
+bool utils::getConfigurationParameterBool(const char* name, bool defaultValue)
+{
+#ifdef NO_GETENV
+    const char* envValue = NULL;
+#else
+    const char* envValue = getenv(name);
+#endif
+    if (envValue == NULL)
+    {
+        return defaultValue;
+    }
+    cv::String value = envValue;
+    if (value == "1" || value == "True" || value == "true" || value == "TRUE")
+    {
+        return true;
+    }
+    if (value == "0" || value == "False" || value == "false" || value == "FALSE")
+    {
+        return false;
+    }
+    CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
+}
+
+
+size_t utils::getConfigurationParameterSizeT(const char* name, size_t defaultValue)
+{
+#ifdef NO_GETENV
+    const char* envValue = NULL;
+#else
+    const char* envValue = getenv(name);
+#endif
+    if (envValue == NULL)
+    {
+        return defaultValue;
+    }
+    cv::String value = envValue;
+    size_t pos = 0;
+    for (; pos < value.size(); pos++)
+    {
+        if (!isdigit(value[pos]))
+            break;
+    }
+    cv::String valueStr = value.substr(0, pos);
+    cv::String suffixStr = value.substr(pos, value.length() - pos);
+    int v = atoi(valueStr.c_str());
+    if (suffixStr.length() == 0)
+        return v;
+    else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
+        return v * 1024 * 1024;
+    else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
+        return v * 1024;
+    CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
+}
+
+cv::String utils::getConfigurationParameterString(const char* name, const char* defaultValue)
+{
+#ifdef NO_GETENV
+    const char* envValue = NULL;
+#else
+    const char* envValue = getenv(name);
+#endif
+    if (envValue == NULL)
+    {
+        return defaultValue;
+    }
+    cv::String value = envValue;
+    return value;
+}
+
+
 #ifdef CV_COLLECT_IMPL_DATA
 ImplCollector& getImplData()
 {
diff --git a/modules/core/src/trace.cpp b/modules/core/src/trace.cpp
new file mode 100644 (file)
index 0000000..157023e
--- /dev/null
@@ -0,0 +1,1115 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include <precomp.hpp>
+
+#include <opencv2/core/utils/trace.hpp>
+#include <opencv2/core/utils/trace.private.hpp>
+
+#include <cstdarg> // va_start
+
+#include <sstream>
+#include <ostream>
+#include <fstream>
+
+#if 0
+#define CV_LOG(...) CV_LOG_INFO(NULL, __VA_ARGS__)
+#else
+#define CV_LOG(...) {}
+#endif
+
+#if 0
+#define CV_LOG_ITT(...) CV_LOG_INFO(NULL, __VA_ARGS__)
+#else
+#define CV_LOG_ITT(...) {}
+#endif
+
+#if 1
+#define CV_LOG_TRACE_BAILOUT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
+#else
+#define CV_LOG_TRACE_BAILOUT(...) {}
+#endif
+
+#if 0
+#define CV_LOG_PARALLEL(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
+#else
+#define CV_LOG_PARALLEL(...) {}
+#endif
+
+#if 0
+#define CV_LOG_CTX_STAT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
+#else
+#define CV_LOG_CTX_STAT(...) {}
+#endif
+
+#if 0
+#define CV_LOG_SKIP(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
+#else
+#define CV_LOG_SKIP(...) {}
+#endif
+
+namespace cv {
+namespace utils {
+namespace trace {
+namespace details {
+
+#ifdef OPENCV_TRACE
+
+static int64 g_zero_timestamp = 0;
+
+static int64 getTimestamp()
+{
+    int64 t = getTickCount();
+    static double tick_to_ns = 1e9 / getTickFrequency();
+    return (int64)((t - g_zero_timestamp) * tick_to_ns);
+}
+
+// TODO lazy configuration flags
+static bool param_traceEnable = utils::getConfigurationParameterBool("OPENCV_TRACE", false);
+
+static int param_maxRegionDepthOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_DEPTH_OPENCV", 1);
+static int param_maxRegionChildrenOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN_OPENCV", 1000);
+static int param_maxRegionChildren = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN", 10000);
+static cv::String param_traceLocation = utils::getConfigurationParameterString("OPENCV_TRACE_LOCATION", "OpenCVTrace");
+
+#ifdef HAVE_OPENCL
+static bool param_synchronizeOpenCL = utils::getConfigurationParameterBool("OPENCV_TRACE_SYNC_OPENCL", false);
+#endif
+
+#ifdef OPENCV_WITH_ITT
+static bool param_ITT_registerParentScope = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_PARENT", false);
+#endif
+
+static const char* _spaces(int count)
+{
+    static const char buf[64] =
+"                                                               ";
+    return &buf[63 - (count & 63)];
+}
+
+/**
+ * Text-based trace messages
+ */
+class TraceMessage
+{
+public:
+    char buffer[1024];
+    size_t len;
+    bool hasError;
+
+    TraceMessage() :
+        len(0),
+        hasError(false)
+    {}
+
+    bool printf(const char* format, ...)
+    {
+        char* buf = &buffer[len];
+        size_t sz = sizeof(buffer) - len;
+        va_list ap;
+        va_start(ap, format);
+        int n = cv_vsnprintf(buf, (int)sz, format, ap);
+        va_end(ap);
+        if (n < 0 || (size_t)n > sz)
+        {
+            hasError = true;
+            return false;
+        }
+        len += n;
+        return true;
+    }
+
+    bool formatlocation(const Region::LocationStaticStorage& location)
+    {
+        return this->printf("l,%lld,\"%s\",%d,\"%s\",0x%llX\n",
+                (long long int)(*location.ppExtra)->global_location_id,
+                location.filename,
+                location.line,
+                location.name,
+                (long long int)(location.flags & ~0xF0000000));
+    }
+    bool formatRegionEnter(const Region& region)
+    {
+        bool ok = this->printf("b,%d,%lld,%lld,%lld",
+                (int)region.pImpl->threadID,
+                (long long int)region.pImpl->beginTimestamp,
+                (long long int)((*region.pImpl->location.ppExtra)->global_location_id),
+                (long long int)region.pImpl->global_region_id);
+        if (region.pImpl->parentRegion && region.pImpl->parentRegion->pImpl)
+        {
+            if (region.pImpl->parentRegion->pImpl->threadID != region.pImpl->threadID)
+                ok &= this->printf(",parentThread=%d,parent=%lld",
+                        (int)region.pImpl->parentRegion->pImpl->threadID,
+                        (long long int)region.pImpl->parentRegion->pImpl->global_region_id);
+        }
+        ok &= this->printf("\n");
+        return ok;
+    }
+    bool formatRegionLeave(const Region& region, const RegionStatistics& result)
+    {
+        CV_DbgAssert(region.pImpl->endTimestamp - region.pImpl->beginTimestamp == result.duration);
+        bool ok = this->printf("e,%d,%lld,%lld,%lld,%lld",
+                (int)region.pImpl->threadID,
+                (long long int)region.pImpl->endTimestamp,
+                (long long int)(*region.pImpl->location.ppExtra)->global_location_id,
+                (long long int)region.pImpl->global_region_id,
+                (long long int)result.duration);
+        if (result.currentSkippedRegions)
+            ok &= this->printf(",skip=%d", (int)result.currentSkippedRegions);
+#ifdef HAVE_IPP
+        if (result.durationImplIPP)
+            ok &= this->printf(",tIPP=%lld", (long long int)result.durationImplIPP);
+#endif
+#ifdef HAVE_OPENCL
+        if (result.durationImplOpenCL)
+            ok &= this->printf(",tOCL=%lld", (long long int)result.durationImplOpenCL);
+#endif
+#ifdef HAVE_OPENVX
+        if (result.durationImplOpenVX)
+            ok &= this->printf(",tOVX=%lld", (long long int)result.durationImplOpenVX));
+#endif
+        ok &= this->printf("\n");
+        return ok;
+    }
+    bool recordRegionArg(const Region& region, const TraceArg& arg, const char* value)
+    {
+        return this->printf("a,%d,%lld,%lld,\"%s\",\"%s\"\n",
+                region.pImpl->threadID,
+                (long long int)region.pImpl->beginTimestamp,
+                (long long int)region.pImpl->global_region_id,
+                arg.name,
+                value);
+    }
+};
+
+
+#ifdef OPENCV_WITH_ITT
+static __itt_domain* domain = NULL;
+
+static bool isITTEnabled()
+{
+    static bool isInitialized = false;
+    static bool isEnabled = false;
+    if (!isInitialized)
+    {
+        isEnabled = !!(__itt_api_version());
+        CV_LOG_ITT("ITT is " << (isEnabled ? "enabled" : "disabled"));
+        domain = __itt_domain_create("OpenCVTrace");
+        isInitialized = true;
+    }
+    return isEnabled;
+}
+#endif
+
+
+Region::LocationExtraData::LocationExtraData(const LocationStaticStorage& location)
+{
+    CV_UNUSED(location);
+    static int g_location_id_counter = 0;
+    global_location_id = CV_XADD(&g_location_id_counter, 1) + 1;
+    CV_LOG("Register location: " << global_location_id << " (" << (void*)&location << ")"
+            << std::endl << "    file: " << location.filename
+            << std::endl << "    line: " << location.line
+            << std::endl << "    name: " << location.name);
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        // Caching is not required here, because there is builtin cache.
+        // https://software.intel.com/en-us/node/544203:
+        //     Consecutive calls to __itt_string_handle_create with the same name return the same value.
+        ittHandle_name = __itt_string_handle_create(location.name);
+        ittHandle_filename = __itt_string_handle_create(location.filename);
+    }
+#endif
+}
+
+/*static*/ Region::LocationExtraData* Region::LocationExtraData::init(const Region::LocationStaticStorage& location)
+{
+    LocationExtraData** pLocationExtra = location.ppExtra;
+    CV_DbgAssert(pLocationExtra);
+    if (*pLocationExtra == NULL)
+    {
+        cv::AutoLock lock(cv::getInitializationMutex());
+        if (*pLocationExtra == NULL)
+        {
+            *pLocationExtra = new Region::LocationExtraData(location);
+            TraceStorage* s = getTraceManager().trace_storage.get();
+            if (s)
+            {
+                TraceMessage msg;
+                msg.formatlocation(location);
+                s->put(msg);
+            }
+        }
+    }
+    return *pLocationExtra;
+}
+
+
+Region::Impl::Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_) :
+    location(location_),
+    region(region_),
+    parentRegion(parentRegion_),
+    threadID(ctx.threadID),
+    global_region_id(++ctx.region_counter),
+    beginTimestamp(beginTimestamp_),
+    endTimestamp(0),
+    directChildrenCount(0)
+#ifdef OPENCV_WITH_ITT
+    ,itt_id_registered(false)
+    ,itt_id(__itt_null)
+#endif
+{
+    CV_DbgAssert(ctx.currentActiveRegion == parentRegion);
+    region.pImpl = this;
+
+    registerRegion(ctx);
+
+    enterRegion(ctx);
+}
+
+Region::Impl::~Impl()
+{
+#ifdef OPENCV_WITH_ITT
+    if (itt_id_registered)
+    {
+        CV_LOG_ITT(" Destroy ITT region: I=" << (void*)this);
+        __itt_id_destroy(domain, itt_id);
+        itt_id_registered = false;
+    }
+#endif
+    region.pImpl = NULL;
+}
+
+void Region::Impl::enterRegion(TraceManagerThreadLocal& ctx)
+{
+    ctx.currentActiveRegion = &region;
+
+    if (location.flags & REGION_FLAG_FUNCTION)
+    {
+        if ((location.flags & REGION_FLAG_APP_CODE) == 0)
+        {
+            ctx.regionDepthOpenCV++;
+        }
+        ctx.regionDepth++;
+    }
+
+    TraceStorage* s = ctx.getStorage();
+    if (s)
+    {
+        TraceMessage msg;
+        msg.formatRegionEnter(region);
+        s->put(msg);
+    }
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        __itt_id parentID = __itt_null;
+        if (param_ITT_registerParentScope && parentRegion && parentRegion->pImpl && parentRegion->pImpl->itt_id_registered && (location.flags & REGION_FLAG_REGION_FORCE) == 0)
+            parentID = parentRegion->pImpl->itt_id;
+        __itt_task_begin(domain, itt_id, parentID, (*location.ppExtra)->ittHandle_name);
+    }
+#endif
+}
+
+void Region::Impl::leaveRegion(TraceManagerThreadLocal& ctx)
+{
+    int64 duration = endTimestamp - beginTimestamp; CV_UNUSED(duration);
+    RegionStatistics result;
+    ctx.stat.grab(result);
+    ctx.totalSkippedEvents += result.currentSkippedRegions;
+    CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "leaveRegion(): " << (void*)this << " " << result);
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        if (result.currentSkippedRegions)
+        {
+            __itt_metadata_add(domain, itt_id, __itt_string_handle_create("skipped trace entries"), __itt_metadata_u32, 1, &result.currentSkippedRegions);
+        }
+#ifdef HAVE_IPP
+        if (result.durationImplIPP)
+            __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tIPP"), __itt_metadata_u64, 1, &result.durationImplIPP);
+#endif
+#ifdef HAVE_OPENCL
+        if (result.durationImplOpenCL)
+            __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenCL"), __itt_metadata_u64, 1, &result.durationImplOpenCL);
+#endif
+#ifdef HAVE_OPENVX
+        if (result.durationImplOpenVX)
+            __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenVX"), __itt_metadata_u64, 1, &result.durationImplOpenVX);
+#endif
+        __itt_task_end(domain);
+    }
+#endif
+    TraceStorage* s = ctx.getStorage();
+    if (s)
+    {
+        TraceMessage msg;
+        msg.formatRegionLeave(region, result);
+        s->put(msg);
+    }
+
+    if (location.flags & REGION_FLAG_FUNCTION)
+    {
+        if ((location.flags & REGION_FLAG_APP_CODE) == 0)
+        {
+            ctx.regionDepthOpenCV--;
+        }
+        ctx.regionDepth--;
+    }
+
+    ctx.currentActiveRegion = parentRegion;
+}
+
+void Region::Impl::release()
+{
+    delete this;
+}
+
+void Region::Impl::registerRegion(TraceManagerThreadLocal& ctx)
+{
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        if (!itt_id_registered)
+        {
+            CV_LOG_ITT(" Register ITT region: I=" << (void*)this << " " << ctx.threadID << "-" << global_region_id);
+#if 1 // workaround for some ITT backends
+            itt_id = __itt_id_make((void*)(intptr_t)(((int64)(ctx.threadID + 1) << 32) | global_region_id), global_region_id);
+#else
+            itt_id = __itt_id_make((void*)(intptr_t)(ctx.threadID + 1), global_region_id);
+#endif
+            __itt_id_create(domain, itt_id);
+            itt_id_registered = true;
+        }
+    }
+#else
+    CV_UNUSED(ctx);
+#endif
+}
+
+void RegionStatisticsStatus::enableSkipMode(int depth)
+{
+    CV_DbgAssert(_skipDepth < 0);
+    CV_LOG_SKIP(NULL, "SKIP-ENABLE: depth=" << depth);
+    _skipDepth = depth;
+}
+void RegionStatisticsStatus::checkResetSkipMode(int leaveDepth)
+{
+    if (leaveDepth <= _skipDepth)
+    {
+        CV_LOG_SKIP(NULL, "SKIP-RESET: leaveDepth=" << leaveDepth << " skipDepth=" << _skipDepth);
+        _skipDepth = -1;
+    }
+}
+
+Region::Region(const LocationStaticStorage& location) :
+    pImpl(NULL),
+    implFlags(0)
+{
+    // Checks:
+    // - global enable flag
+    // - parent region is disabled
+    // - children count threshold
+    // - region location
+    // - depth (opencv nested calls)
+    if (!TraceManager::isActivated())
+    {
+        CV_LOG("Trace is disabled. Bailout");
+        return;
+    }
+
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+    CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region(): " << (void*)this << ": " << location.name);
+
+    Region* parentRegion = ctx.stackTopRegion();
+    const Region::LocationStaticStorage* parentLocation = ctx.stackTopLocation();
+
+    if (location.flags & REGION_FLAG_REGION_NEXT)
+    {
+        if (parentRegion && parentRegion->pImpl)
+        {
+            CV_DbgAssert((parentRegion->pImpl->location.flags & REGION_FLAG_FUNCTION) == 0);
+            parentRegion->destroy(); parentRegion->implFlags = 0;
+            parentRegion = ctx.stackTopRegion();
+            parentLocation = ctx.stackTopLocation();
+        }
+    }
+
+    int parentChildren = 0;
+    if (parentRegion && parentRegion->pImpl)
+    {
+        if (parentLocation == NULL)
+        {
+            // parallel_for_body code path
+            parentChildren = CV_XADD(&parentRegion->pImpl->directChildrenCount, 1) + 1;
+        }
+        else
+        {
+            parentChildren = ++parentRegion->pImpl->directChildrenCount;
+        }
+    }
+
+    int64 beginTimestamp = getTimestamp();
+
+    int currentDepth = ctx.getCurrentDepth() + 1;
+    switch (location.flags & REGION_FLAG_IMPL_MASK)
+    {
+#ifdef HAVE_IPP
+    case REGION_FLAG_IMPL_IPP:
+        if (!ctx.stat_status.ignoreDepthImplIPP)
+            ctx.stat_status.ignoreDepthImplIPP = currentDepth;
+        break;
+#endif
+#ifdef HAVE_OPENCL
+    case REGION_FLAG_IMPL_OPENCL:
+        if (!ctx.stat_status.ignoreDepthImplOpenCL)
+            ctx.stat_status.ignoreDepthImplOpenCL = currentDepth;
+        break;
+#endif
+#ifdef HAVE_OPENVX
+    case REGION_FLAG_IMPL_OPENVX:
+        if (!ctx.stat_status.ignoreDepthImplOpenVX)
+            ctx.stat_status.ignoreDepthImplOpenVX = currentDepth;
+        break;
+#endif
+    default:
+        break;
+    }
+
+    ctx.stackPush(this, &location, beginTimestamp);
+    implFlags |= REGION_FLAG__NEED_STACK_POP;
+
+    if ((location.flags & REGION_FLAG_REGION_FORCE) == 0)
+    {
+        if (ctx.stat_status._skipDepth >= 0 && currentDepth > ctx.stat_status._skipDepth)
+        {
+            CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region is disabled. Bailout");
+            ctx.stat.currentSkippedRegions++;
+            return;
+        }
+
+        if (param_maxRegionChildrenOpenCV > 0 && (location.flags & REGION_FLAG_APP_CODE) == 0 && parentLocation && (parentLocation->flags & REGION_FLAG_APP_CODE) == 0)
+        {
+            if (parentChildren >= param_maxRegionChildrenOpenCV)
+            {
+                CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "OpenCV parent region exceeds children count. Bailout");
+                ctx.stat_status.enableSkipMode(currentDepth - 1);
+                ctx.stat.currentSkippedRegions++;
+                DEBUG_ONLY(ctx.dumpStack(std::cout, false));
+                return;
+            }
+        }
+        if (param_maxRegionChildren > 0 && parentChildren >= param_maxRegionChildren)
+        {
+            CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Parent region exceeds children count. Bailout");
+            ctx.stat_status.enableSkipMode(currentDepth - 1);
+            ctx.stat.currentSkippedRegions++;
+            DEBUG_ONLY(ctx.dumpStack(std::cout, false));
+            return;
+        }
+    }
+
+    LocationExtraData::init(location);
+
+    if ((*location.ppExtra)->global_location_id == 0)
+    {
+        CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Region location is disabled. Bailout");
+        ctx.stat_status.enableSkipMode(currentDepth);
+        ctx.stat.currentSkippedRegions++;
+        return;
+    }
+
+    if (parentLocation && (parentLocation->flags & REGION_FLAG_SKIP_NESTED))
+    {
+        CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region disables inner regions. Bailout");
+        ctx.stat_status.enableSkipMode(currentDepth);
+        ctx.stat.currentSkippedRegions++;
+        return;
+    }
+
+    if (param_maxRegionDepthOpenCV)
+    {
+        if ((location.flags & REGION_FLAG_APP_CODE) == 0)
+        {
+            if (ctx.regionDepthOpenCV >= param_maxRegionDepthOpenCV)
+            {
+                CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "OpenCV region depth is exceed = " << ctx.regionDepthOpenCV << ". Bailout");
+                if (ctx.stat.currentSkippedRegions == 0)
+                {
+                    DEBUG_ONLY(ctx.dumpStack(std::cout, false));
+                }
+                ctx.stat_status.enableSkipMode(currentDepth);
+                ctx.stat.currentSkippedRegions++;
+                return;
+            }
+        }
+    }
+
+    new Impl(ctx, parentRegion, *this, location, beginTimestamp);
+    CV_DbgAssert(pImpl != NULL);
+    implFlags |= REGION_FLAG__ACTIVE;
+
+    // parallel_for path
+    if (parentRegion && parentRegion->pImpl)
+    {
+        if (parentLocation == NULL)
+        {
+            pImpl->directChildrenCount = parentChildren;
+        }
+    }
+}
+
+void Region::destroy()
+{
+    CV_DbgAssert(implFlags != 0);
+
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+    CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region::destruct(): " << (void*)this << " pImpl=" << pImpl << " implFlags=" << implFlags << ' ' << (ctx.stackTopLocation() ? ctx.stackTopLocation()->name : "<unknown>"));
+
+    CV_DbgAssert(implFlags & REGION_FLAG__NEED_STACK_POP);
+    const int currentDepth = ctx.getCurrentDepth(); CV_UNUSED(currentDepth);
+
+    CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << ctx.stat << ' ' << ctx.stat_status);
+
+    const Region::LocationStaticStorage* location = ctx.stackTopLocation();
+    Impl::OptimizationPath myCodePath = Impl::CODE_PATH_PLAIN;
+    if (location)
+    {
+        switch (location->flags & REGION_FLAG_IMPL_MASK)
+        {
+#ifdef HAVE_IPP
+        case REGION_FLAG_IMPL_IPP:
+            myCodePath = Impl::CODE_PATH_IPP;
+            break;
+#endif
+#ifdef HAVE_OPENCL
+        case REGION_FLAG_IMPL_OPENCL:
+            if (param_synchronizeOpenCL && cv::ocl::useOpenCL())
+                cv::ocl::finish();
+            myCodePath = Impl::CODE_PATH_OPENCL;
+            break;
+#endif
+#ifdef HAVE_OPENVX
+        case REGION_FLAG_IMPL_OPENVX:
+            myCodePath = Impl::CODE_PATH_OPENVX;
+            break;
+#endif
+        default:
+            break;
+        }
+    }
+
+    int64 endTimestamp = getTimestamp();
+    int64 duration = endTimestamp - ctx.stackTopBeginTimestamp();
+
+    bool active = isActive();
+
+    if (active)
+        ctx.stat.duration = duration;
+    else if (ctx.stack.size() == ctx.parallel_for_stack_size + 1)
+        ctx.stat.duration += duration;
+
+    switch (myCodePath) {
+        case Impl::CODE_PATH_PLAIN:
+            // nothing
+            break;
+#ifdef HAVE_IPP
+        case Impl::CODE_PATH_IPP:
+            if (ctx.stat_status.ignoreDepthImplIPP == currentDepth)
+            {
+                ctx.stat.durationImplIPP += duration;
+                ctx.stat_status.ignoreDepthImplIPP = 0;
+            }
+            else if (active)
+            {
+                ctx.stat.durationImplIPP = duration;
+            }
+            break;
+#endif
+#ifdef HAVE_OPENCL
+        case Impl::CODE_PATH_OPENCL:
+            if (ctx.stat_status.ignoreDepthImplOpenCL == currentDepth)
+            {
+                ctx.stat.durationImplOpenCL += duration;
+                ctx.stat_status.ignoreDepthImplOpenCL = 0;
+            }
+            else if (active)
+            {
+                ctx.stat.durationImplOpenCL = duration;
+            }
+            break;
+#endif
+#ifdef HAVE_OPENVX
+        case Impl::CODE_PATH_OPENVX:
+            if (ctx.stat_status.ignoreDepthImplOpenVX == currentDepth)
+            {
+                ctx.stat.durationImplOpenVX += duration;
+                ctx.stat_status.ignoreDepthImplOpenVX = 0;
+            }
+            else if (active)
+            {
+                ctx.stat.durationImplOpenVX = duration;
+            }
+            break;
+#endif
+        default:
+            break;
+    }
+
+    if (pImpl)
+    {
+        CV_DbgAssert((implFlags & (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP)) == (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP));
+        CV_DbgAssert(ctx.stackTopRegion() == this);
+        pImpl->endTimestamp = endTimestamp;
+        pImpl->leaveRegion(ctx);
+        pImpl->release();
+        pImpl = NULL;
+        DEBUG_ONLY(implFlags &= ~REGION_FLAG__ACTIVE);
+    }
+    else
+    {
+        CV_DbgAssert(ctx.stat_status._skipDepth <= currentDepth);
+    }
+
+    if (implFlags & REGION_FLAG__NEED_STACK_POP)
+    {
+        CV_DbgAssert(ctx.stackTopRegion() == this);
+        ctx.stackPop();
+        ctx.stat_status.checkResetSkipMode(currentDepth);
+        DEBUG_ONLY(implFlags &= ~REGION_FLAG__NEED_STACK_POP);
+    }
+    CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << "===> " << ctx.stat << ' ' << ctx.stat_status);
+}
+
+
+TraceManagerThreadLocal::~TraceManagerThreadLocal()
+{
+}
+
+void TraceManagerThreadLocal::dumpStack(std::ostream& out, bool onlyFunctions) const
+{
+    std::stringstream ss;
+    std::deque<StackEntry>::const_iterator it = stack.begin();
+    std::deque<StackEntry>::const_iterator end = stack.end();
+    int depth = 0;
+    for (; it != end; ++it)
+    {
+        const Region::LocationStaticStorage* location = it->location;
+        if (location)
+        {
+            if (!onlyFunctions || (location->flags & REGION_FLAG_FUNCTION))
+            {
+                ss << _spaces(4*depth) << location->name << std::endl;
+                depth++;
+            }
+        }
+        else
+        {
+            ss << _spaces(4*depth) << "<unknown>" << std::endl;
+            depth++;
+        }
+    }
+    out << ss.str();
+}
+
+class AsyncTraceStorage : public TraceStorage
+{
+    mutable std::ofstream out;
+public:
+    const std::string name;
+
+    AsyncTraceStorage(const std::string& filename) :
+        out(filename.c_str(), std::ios::trunc),
+        name(filename)
+    {
+        out << "#description: OpenCV trace file" << std::endl;
+        out << "#version: 1.0" << std::endl;
+    }
+    ~AsyncTraceStorage()
+    {
+        out.close();
+    }
+
+    bool put(const TraceMessage& msg) const
+    {
+        if (msg.hasError)
+            return false;
+        out << msg.buffer;
+        //DEBUG_ONLY(std::flush(out)); // TODO configure flag
+        return true;
+    }
+};
+
+class SyncTraceStorage : public TraceStorage
+{
+    mutable std::ofstream out;
+    mutable cv::Mutex mutex;
+public:
+    const std::string name;
+
+    SyncTraceStorage(const std::string& filename) :
+        out(filename.c_str(), std::ios::trunc),
+        name(filename)
+    {
+        out << "#description: OpenCV trace file" << std::endl;
+        out << "#version: 1.0" << std::endl;
+    }
+    ~SyncTraceStorage()
+    {
+        cv::AutoLock l(mutex);
+        out.close();
+    }
+
+    bool put(const TraceMessage& msg) const
+    {
+        if (msg.hasError)
+            return false;
+        {
+            cv::AutoLock l(mutex);
+            out << msg.buffer;
+            std::flush(out); // TODO configure flag
+        }
+        return true;
+    }
+};
+
+
+TraceStorage* TraceManagerThreadLocal::getStorage() const
+{
+    // TODO configuration option for stdout/single trace file
+    if (storage.empty())
+    {
+        TraceStorage* global = getTraceManager().trace_storage.get();
+        if (global)
+        {
+            const std::string filepath = cv::format("%s-%03d.txt", param_traceLocation.c_str(), threadID).c_str();
+            TraceMessage msg;
+            const char* pos = strrchr(filepath.c_str(), '/'); // extract filename
+#ifdef _WIN32
+            if (!pos)
+                strrchr(filepath.c_str(), '\\');
+#endif
+            if (!pos)
+                pos = filepath.c_str();
+            msg.printf("#thread file: %s\n", pos);
+            global->put(msg);
+            storage.reset(new AsyncTraceStorage(filepath));
+        }
+    }
+    return storage.get();
+}
+
+
+
+static bool activated = false;
+static bool isInitialized = false;
+
+TraceManager::TraceManager()
+{
+    g_zero_timestamp = cv::getTickCount();
+
+    isInitialized = true;
+    CV_LOG("TraceManager ctor: " << (void*)this);
+
+    CV_LOG("TraceManager configure()");
+    activated = param_traceEnable;
+
+    if (activated)
+        trace_storage.reset(new SyncTraceStorage(std::string(param_traceLocation) + ".txt"));
+
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        activated = true; // force trace pipeline activation (without OpenCV storage)
+        __itt_region_begin(domain, __itt_null, __itt_null, __itt_string_handle_create("OpenCVTrace"));
+    }
+#endif
+}
+TraceManager::~TraceManager()
+{
+    CV_LOG("TraceManager dtor: " << (void*)this);
+
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        __itt_region_end(domain, __itt_null);
+    }
+#endif
+
+    std::vector<TraceManagerThreadLocal*> threads_ctx;
+    tls.gather(threads_ctx);
+    size_t totalEvents = 0, totalSkippedEvents = 0;
+    for (size_t i = 0; i < threads_ctx.size(); i++)
+    {
+        TraceManagerThreadLocal* ctx = threads_ctx[i];
+        if (ctx)
+        {
+            totalEvents += ctx->region_counter;
+            totalSkippedEvents += ctx->totalSkippedEvents;
+        }
+    }
+    if (totalEvents || activated)
+    {
+        CV_LOG_INFO(NULL, "Trace: Total events: " << totalEvents);
+    }
+    if (totalSkippedEvents)
+    {
+        CV_LOG_WARNING(NULL, "Trace: Total skipped events: " << totalSkippedEvents);
+    }
+
+    // This is a global static object, so process starts shutdown here
+    // Turn off trace
+    cv::__termination = true; // also set in DllMain() notifications handler for DLL_PROCESS_DETACH
+    activated = false;
+}
+
+bool TraceManager::isActivated()
+{
+    // Check if process starts shutdown, and set earlyExit to true
+    // to prevent further instrumentation processing earlier.
+    if (cv::__termination)
+    {
+        activated = false;
+        return false;
+    }
+
+    if (!isInitialized)
+    {
+        TraceManager& m = getTraceManager();
+        (void)m; // TODO
+    }
+
+    return activated;
+}
+
+
+static TraceManager* getTraceManagerCallOnce()
+{
+    static TraceManager globalInstance;
+    return &globalInstance;
+}
+TraceManager& getTraceManager()
+{
+    CV_SINGLETON_LAZY_INIT_REF(TraceManager, getTraceManagerCallOnce())
+}
+
+void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx)
+{
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+
+    if (ctx.dummy_stack_top.region == &rootRegion) // already attached
+        return;
+
+    CV_Assert(ctx.dummy_stack_top.region == NULL);
+    ctx.dummy_stack_top = TraceManagerThreadLocal::StackEntry(const_cast<Region*>(&rootRegion), NULL, -1);
+
+    if (&ctx == &root_ctx)
+    {
+        ctx.stat.grab(ctx.parallel_for_stat);
+        ctx.parallel_for_stat_status = ctx.stat_status;
+        ctx.parallel_for_stack_size = ctx.stack.size();
+        return;
+    }
+
+    CV_Assert(ctx.stack.empty());
+
+    ctx.currentActiveRegion = const_cast<Region*>(&rootRegion);
+
+    ctx.regionDepth = root_ctx.regionDepth;
+    ctx.regionDepthOpenCV = root_ctx.regionDepthOpenCV;
+
+    ctx.parallel_for_stack_size = 0;
+
+    ctx.stat_status.propagateFrom(root_ctx.stat_status);
+}
+
+void parallelForAttachNestedRegion(const Region& rootRegion)
+{
+    CV_UNUSED(rootRegion);
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+
+    CV_DbgAssert(ctx.dummy_stack_top.region == &rootRegion);
+
+    Region* region = ctx.getCurrentActiveRegion();
+    CV_LOG_PARALLEL(NULL, " PARALLEL_FOR: " << (void*)region << " ==> " << &rootRegion);
+    if (!region)
+        return;
+
+#ifdef OPENCV_WITH_ITT
+    if (!rootRegion.pImpl || !rootRegion.pImpl->itt_id_registered)
+        return;
+
+    if (!region->pImpl)
+        return;
+
+    CV_LOG_PARALLEL(NULL, " PARALLEL_FOR ITT: " << (void*)rootRegion.pImpl->itt_id.d1 << ":" << rootRegion.pImpl->itt_id.d2 << ":" << (void*)rootRegion.pImpl->itt_id.d3 << " => "
+                                 << (void*)region->pImpl->itt_id.d1 << ":" << region->pImpl->itt_id.d2 << ":" << (void*)region->pImpl->itt_id.d3);
+    __itt_relation_add(domain, region->pImpl->itt_id, __itt_relation_is_child_of, rootRegion.pImpl->itt_id);
+#endif
+}
+
+void parallelForFinalize(const Region& rootRegion)
+{
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+
+    int64 endTimestamp = getTimestamp();
+    int64 duration = endTimestamp - ctx.stackTopBeginTimestamp();
+    CV_LOG_PARALLEL(NULL, "parallel_for duration: " << duration << " " << &rootRegion);
+
+    std::vector<TraceManagerThreadLocal*> threads_ctx;
+    getTraceManager().tls.gather(threads_ctx);
+    RegionStatistics parallel_for_stat;
+    int threads = 0;
+    for (size_t i = 0; i < threads_ctx.size(); i++)
+    {
+        TraceManagerThreadLocal* child_ctx = threads_ctx[i];
+
+        if (child_ctx && child_ctx->stackTopRegion() == &rootRegion)
+        {
+            CV_LOG_PARALLEL(NULL, "Thread=" << child_ctx->threadID << " " << child_ctx->stat);
+            threads++;
+            RegionStatistics child_stat;
+            child_ctx->stat.grab(child_stat);
+            parallel_for_stat.append(child_stat);
+            if (child_ctx != &ctx)
+            {
+                child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry();
+            }
+            else
+            {
+                ctx.parallel_for_stat.grab(ctx.stat);
+                ctx.stat_status = ctx.parallel_for_stat_status;
+                child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry();
+            }
+        }
+    }
+    float parallel_coeff = std::min(1.0f, duration / (float)(parallel_for_stat.duration));
+    CV_LOG_PARALLEL(NULL, "parallel_coeff=" << 1.0f / parallel_coeff);
+    CV_LOG_PARALLEL(NULL, parallel_for_stat);
+    if (parallel_coeff != 1.0f)
+    {
+        parallel_for_stat.multiply(parallel_coeff);
+        CV_LOG_PARALLEL(NULL, parallel_for_stat);
+    }
+    parallel_for_stat.duration = 0;
+    ctx.stat.append(parallel_for_stat);
+    CV_LOG_PARALLEL(NULL, ctx.stat);
+}
+
+struct TraceArg::ExtraData
+{
+#ifdef OPENCV_WITH_ITT
+    // Special fields for ITT
+    __itt_string_handle* volatile ittHandle_name;
+#endif
+    ExtraData(TraceManagerThreadLocal& ctx, const TraceArg& arg)
+    {
+        CV_UNUSED(ctx); CV_UNUSED(arg);
+#ifdef OPENCV_WITH_ITT
+        if (isITTEnabled())
+        {
+            // Caching is not required here, because there is builtin cache.
+            // https://software.intel.com/en-us/node/544203:
+            //     Consecutive calls to __itt_string_handle_create with the same name return the same value.
+            ittHandle_name = __itt_string_handle_create(arg.name);
+        }
+#endif
+    }
+};
+
+static void initTraceArg(TraceManagerThreadLocal& ctx, const TraceArg& arg)
+{
+    TraceArg::ExtraData** pExtra = arg.ppExtra;
+    if (*pExtra == NULL)
+    {
+        cv::AutoLock lock(cv::getInitializationMutex());
+        if (*pExtra == NULL)
+        {
+            *pExtra = new TraceArg::ExtraData(ctx, arg);
+        }
+    }
+}
+void traceArg(const TraceArg& arg, const char* value)
+{
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+    Region* region = ctx.getCurrentActiveRegion();
+    if (!region)
+        return;
+    CV_Assert(region->pImpl);
+    initTraceArg(ctx, arg);
+    if (!value)
+        value = "<null>";
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        __itt_metadata_str_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, value, strlen(value));
+    }
+#endif
+}
+void traceArg(const TraceArg& arg, int value)
+{
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+    Region* region = ctx.getCurrentActiveRegion();
+    if (!region)
+        return;
+    CV_Assert(region->pImpl);
+    initTraceArg(ctx, arg);
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, sizeof(int) == 4 ? __itt_metadata_s32 : __itt_metadata_s64, 1, &value);
+    }
+#else
+    CV_UNUSED(value);
+#endif
+}
+void traceArg(const TraceArg& arg, int64 value)
+{
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+    Region* region = ctx.getCurrentActiveRegion();
+    if (!region)
+        return;
+    CV_Assert(region->pImpl);
+    initTraceArg(ctx, arg);
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_s64, 1, &value);
+    }
+#else
+    CV_UNUSED(value);
+#endif
+}
+void traceArg(const TraceArg& arg, double value)
+{
+    TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
+    Region* region = ctx.getCurrentActiveRegion();
+    if (!region)
+        return;
+    CV_Assert(region->pImpl);
+    initTraceArg(ctx, arg);
+#ifdef OPENCV_WITH_ITT
+    if (isITTEnabled())
+    {
+        __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_double, 1, &value);
+    }
+#else
+    CV_UNUSED(value);
+#endif
+}
+
+#else
+
+Region::Region(const LocationStaticStorage&) : pImpl(NULL), implFlags(0) {}
+void Region::destroy() {}
+
+void traceArg(const TraceArg&, const char*) {}
+void traceArg(const TraceArg&, int) {};
+void traceArg(const TraceArg&, int64) {};
+void traceArg(const TraceArg&, double) {};
+
+#endif
+
+}}}} // namespace
index 35ce8ca..24686af 100644 (file)
@@ -168,46 +168,55 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id)
 
 void cv::namedWindow( const String& winname, int flags )
 {
+    CV_TRACE_FUNCTION();
     cvNamedWindow( winname.c_str(), flags );
 }
 
 void cv::destroyWindow( const String& winname )
 {
+    CV_TRACE_FUNCTION();
     cvDestroyWindow( winname.c_str() );
 }
 
 void cv::destroyAllWindows()
 {
+    CV_TRACE_FUNCTION();
     cvDestroyAllWindows();
 }
 
 void cv::resizeWindow( const String& winname, int width, int height )
 {
+    CV_TRACE_FUNCTION();
     cvResizeWindow( winname.c_str(), width, height );
 }
 
 void cv::moveWindow( const String& winname, int x, int y )
 {
+    CV_TRACE_FUNCTION();
     cvMoveWindow( winname.c_str(), x, y );
 }
 
 void cv::setWindowProperty(const String& winname, int prop_id, double prop_value)
 {
+    CV_TRACE_FUNCTION();
     cvSetWindowProperty( winname.c_str(), prop_id, prop_value);
 }
 
 double cv::getWindowProperty(const String& winname, int prop_id)
 {
+    CV_TRACE_FUNCTION();
     return cvGetWindowProperty(winname.c_str(), prop_id);
 }
 
 int cv::waitKeyEx(int delay)
 {
+    CV_TRACE_FUNCTION();
     return cvWaitKey(delay);
 }
 
 int cv::waitKey(int delay)
 {
+    CV_TRACE_FUNCTION();
     int code = waitKeyEx(delay);
 #ifndef HAVE_WINRT
     static int use_legacy = -1;
@@ -225,42 +234,50 @@ int cv::createTrackbar(const String& trackbarName, const String& winName,
                    int* value, int count, TrackbarCallback callback,
                    void* userdata)
 {
+    CV_TRACE_FUNCTION();
     return cvCreateTrackbar2(trackbarName.c_str(), winName.c_str(),
                              value, count, callback, userdata);
 }
 
 void cv::setTrackbarPos( const String& trackbarName, const String& winName, int value )
 {
+    CV_TRACE_FUNCTION();
     cvSetTrackbarPos(trackbarName.c_str(), winName.c_str(), value );
 }
 
 void cv::setTrackbarMax(const String& trackbarName, const String& winName, int maxval)
 {
+    CV_TRACE_FUNCTION();
     cvSetTrackbarMax(trackbarName.c_str(), winName.c_str(), maxval);
 }
 
 void cv::setTrackbarMin(const String& trackbarName, const String& winName, int minval)
 {
+    CV_TRACE_FUNCTION();
     cvSetTrackbarMin(trackbarName.c_str(), winName.c_str(), minval);
 }
 
 int cv::getTrackbarPos( const String& trackbarName, const String& winName )
 {
+    CV_TRACE_FUNCTION();
     return cvGetTrackbarPos(trackbarName.c_str(), winName.c_str());
 }
 
 void cv::setMouseCallback( const String& windowName, MouseCallback onMouse, void* param)
 {
+    CV_TRACE_FUNCTION();
     cvSetMouseCallback(windowName.c_str(), onMouse, param);
 }
 
 int cv::getMouseWheelDelta( int flags )
 {
+    CV_TRACE_FUNCTION();
     return CV_GET_WHEEL_DELTA(flags);
 }
 
 int cv::startWindowThread()
 {
+    CV_TRACE_FUNCTION();
     return cvStartWindowThread();
 }
 
@@ -268,16 +285,19 @@ int cv::startWindowThread()
 
 void cv::setOpenGlDrawCallback(const String& name, OpenGlDrawCallback callback, void* userdata)
 {
+    CV_TRACE_FUNCTION();
     cvSetOpenGlDrawCallback(name.c_str(), callback, userdata);
 }
 
 void cv::setOpenGlContext(const String& windowName)
 {
+    CV_TRACE_FUNCTION();
     cvSetOpenGlContext(windowName.c_str());
 }
 
 void cv::updateWindow(const String& windowName)
 {
+    CV_TRACE_FUNCTION();
     cvUpdateWindow(windowName.c_str());
 }
 
@@ -299,6 +319,7 @@ namespace
 
 void cv::imshow( const String& winname, InputArray _img )
 {
+    CV_TRACE_FUNCTION();
     const Size size = _img.size();
 #ifndef HAVE_OPENGL
     CV_Assert(size.width>0 && size.height>0);
@@ -355,6 +376,7 @@ void cv::imshow( const String& winname, InputArray _img )
 
 void cv::imshow(const String& winname, const ogl::Texture2D& _tex)
 {
+    CV_TRACE_FUNCTION();
 #ifndef HAVE_OPENGL
     (void) winname;
     (void) _tex;
index 6b15dbe..2614019 100644 (file)
@@ -556,6 +556,8 @@ imreadmulti_(const String& filename, int flags, std::vector<Mat>& mats)
 */
 Mat imread( const String& filename, int flags )
 {
+    CV_TRACE_FUNCTION();
+
     /// create the basic container
     Mat img;
 
@@ -584,6 +586,8 @@ Mat imread( const String& filename, int flags )
 */
 bool imreadmulti(const String& filename, std::vector<Mat>& mats, int flags)
 {
+    CV_TRACE_FUNCTION();
+
     return imreadmulti_(filename, flags, mats);
 }
 
@@ -621,6 +625,8 @@ static bool imwrite_( const String& filename, const Mat& image,
 bool imwrite( const String& filename, InputArray _img,
               const std::vector<int>& params )
 {
+    CV_TRACE_FUNCTION();
+
     Mat img = _img.getMat();
     return imwrite_(filename, img, params, false);
 }
@@ -725,6 +731,8 @@ imdecode_( const Mat& buf, int flags, int hdrtype, Mat* mat=0 )
 
 Mat imdecode( InputArray _buf, int flags )
 {
+    CV_TRACE_FUNCTION();
+
     Mat buf = _buf.getMat(), img;
     imdecode_( buf, flags, LOAD_MAT, &img );
 
@@ -739,6 +747,8 @@ Mat imdecode( InputArray _buf, int flags )
 
 Mat imdecode( InputArray _buf, int flags, Mat* dst )
 {
+    CV_TRACE_FUNCTION();
+
     Mat buf = _buf.getMat(), img;
     dst = dst ? dst : &img;
     imdecode_( buf, flags, LOAD_MAT, dst );
@@ -755,6 +765,8 @@ Mat imdecode( InputArray _buf, int flags, Mat* dst )
 bool imencode( const String& ext, InputArray _image,
                std::vector<uchar>& buf, const std::vector<int>& params )
 {
+    CV_TRACE_FUNCTION();
+
     Mat image = _image.getMat();
 
     int channels = image.channels();
index 6d9b1a0..583f12b 100644 (file)
@@ -47,8 +47,6 @@
 #include "../perf_precomp.hpp"
 #include "opencv2/ts/ocl_perf.hpp"
 
-#ifdef HAVE_OPENCL
-
 namespace cvtest {
 namespace ocl {
 
@@ -318,11 +316,11 @@ OCL_PERF_TEST_P(CannyFixture, Canny, ::testing::Combine(OCL_TEST_SIZES, OCL_PERF
 
     declare.in(img).out(edges);
 
-    OCL_TEST_CYCLE() cv::Canny(img, edges, 50.0, 100.0, apertureSize, L2Grad);
+    PERF_SAMPLE_BEGIN();
+        cv::Canny(img, edges, 50.0, 100.0, apertureSize, L2Grad);
+    PERF_SAMPLE_END();
 
     SANITY_CHECK_NOTHING();
 }
 
 } } // namespace cvtest::ocl
-
-#endif // HAVE_OPENCL
index 2046556..3a25ff2 100644 (file)
@@ -31,7 +31,9 @@ PERF_TEST_P(Img_Aperture_L2_thresholds, canny,
 
     declare.in(img).out(edges);
 
-    TEST_CYCLE() Canny(img, edges, thresh_low, thresh_high, aperture, useL2);
+    PERF_SAMPLE_BEGIN();
+        Canny(img, edges, thresh_low, thresh_high, aperture, useL2);
+    PERF_SAMPLE_END();
 
     SANITY_CHECK(edges);
 }
index 9750c27..b4a4ac4 100644 (file)
@@ -350,6 +350,8 @@ public:
 
     void operator()(const Range &boundaries) const
     {
+        CV_TRACE_FUNCTION();
+
         Mat dx, dy;
         AutoBuffer<short> dxMax(0), dyMax(0);
         std::deque<uchar*> stack, borderPeaksLocal;
@@ -358,6 +360,7 @@ public:
         short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL;
         uchar *_pmap;
 
+        CV_TRACE_REGION("gradient")
         if(needGradient)
         {
             Sobel(src.rowRange(rowStart, rowEnd), dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
@@ -369,6 +372,7 @@ public:
             dy = src2.rowRange(rowStart, rowEnd);
         }
 
+        CV_TRACE_REGION_NEXT("magnitude");
         if(cn > 1)
         {
             dxMax.allocate(2 * dx.cols);
@@ -740,6 +744,7 @@ public:
         uint pmapDiff = (uint)(((rowEnd == src.rows) ? map.datalimit : (map.data + boundaries.end * mapstep)) - pmapLower);
 
         // now track the edges (hysteresis thresholding)
+        CV_TRACE_REGION_NEXT("hysteresis");
         while (!stack.empty())
         {
             uchar *m = stack.back();
@@ -1035,6 +1040,7 @@ void Canny( InputArray _src, OutputArray _dst,
 
     parallel_for_(Range(0, src.rows), parallelCanny(src, map, stack, low, high, aperture_size, L2gradient), numOfThreads);
 
+    CV_TRACE_REGION("global_hysteresis");
     // now track the edges (hysteresis thresholding)
     ptrdiff_t mapstep = map.cols;
 
@@ -1053,6 +1059,7 @@ void Canny( InputArray _src, OutputArray _dst,
         if (!m[mapstep+1])  CANNY_PUSH((m+mapstep+1), stack);
     }
 
+    CV_TRACE_REGION_NEXT("finalPass");
     parallel_for_(Range(0, src.rows), finalPass(map, dst), src.total()/(double)(1<<16));
 }
 
@@ -1105,6 +1112,7 @@ void Canny( InputArray _dx, InputArray _dy, OutputArray _dst,
 
     parallel_for_(Range(0, dx.rows), parallelCanny(dx, dy, map, stack, low, high, L2gradient), numOfThreads);
 
+    CV_TRACE_REGION("global_hysteresis")
     // now track the edges (hysteresis thresholding)
     ptrdiff_t mapstep = map.cols;
 
@@ -1123,6 +1131,7 @@ void Canny( InputArray _dx, InputArray _dy, OutputArray _dst,
         if (!m[mapstep+1])  CANNY_PUSH((m+mapstep+1), stack);
     }
 
+    CV_TRACE_REGION_NEXT("finalPass");
     parallel_for_(Range(0, dx.rows), finalPass(map, dst), dx.total()/(double)(1<<16));
 }
 
index ecf8f46..6e554cf 100644 (file)
@@ -271,6 +271,8 @@ public:
 
     virtual void operator()(const Range& range) const
     {
+        CV_TRACE_FUNCTION();
+
         const uchar* yS = src_data + static_cast<size_t>(range.start) * src_step;
         uchar* yD = dst_data + static_cast<size_t>(range.start) * dst_step;
 
index 5e1b6d2..e48030f 100644 (file)
@@ -1028,6 +1028,7 @@ Ptr<TrainData> TrainData::loadFromCSV(const String& filename,
                                       const String& varTypeSpec,
                                       char delimiter, char missch)
 {
+    CV_TRACE_FUNCTION_SKIP_NESTED();
     Ptr<TrainDataImpl> td = makePtr<TrainDataImpl>();
     if(!td->loadCSV(filename, headerLines, responseStartIdx, responseEndIdx, varTypeSpec, delimiter, missch))
         td.release();
@@ -1038,6 +1039,7 @@ Ptr<TrainData> TrainData::create(InputArray samples, int layout, InputArray resp
                                  InputArray varIdx, InputArray sampleIdx, InputArray sampleWeights,
                                  InputArray varType)
 {
+    CV_TRACE_FUNCTION_SKIP_NESTED();
     Ptr<TrainDataImpl> td = makePtr<TrainDataImpl>();
     td->setData(samples, layout, responses, varIdx, sampleIdx, sampleWeights, varType, noArray());
     return td;
index 819d409..e9e8d4b 100644 (file)
@@ -45,6 +45,7 @@ namespace cv { namespace ml {
 ParamGrid::ParamGrid() { minVal = maxVal = 0.; logStep = 1; }
 ParamGrid::ParamGrid(double _minVal, double _maxVal, double _logStep)
 {
+    CV_TRACE_FUNCTION();
     minVal = std::min(_minVal, _maxVal);
     maxVal = std::max(_minVal, _maxVal);
     logStep = std::max(_logStep, 1.);
@@ -60,17 +61,20 @@ int StatModel::getVarCount() const { return 0; }
 
 bool StatModel::train( const Ptr<TrainData>&, int )
 {
+    CV_TRACE_FUNCTION();
     CV_Error(CV_StsNotImplemented, "");
     return false;
 }
 
 bool StatModel::train( InputArray samples, int layout, InputArray responses )
 {
+    CV_TRACE_FUNCTION();
     return train(TrainData::create(samples, layout, responses));
 }
 
 float StatModel::calcError( const Ptr<TrainData>& data, bool testerr, OutputArray _resp ) const
 {
+    CV_TRACE_FUNCTION_SKIP_NESTED();
     Mat samples = data->getSamples();
     int layout = data->getLayout();
     Mat sidx = testerr ? data->getTestSampleIdx() : data->getTrainSampleIdx();
@@ -119,6 +123,7 @@ float StatModel::calcError( const Ptr<TrainData>& data, bool testerr, OutputArra
 /* Calculates upper triangular matrix S, where A is a symmetrical matrix A=S'*S */
 static void Cholesky( const Mat& A, Mat& S )
 {
+    CV_TRACE_FUNCTION();
     CV_Assert(A.type() == CV_32F);
 
     S = A.clone();
@@ -133,6 +138,7 @@ static void Cholesky( const Mat& A, Mat& S )
    average row vector, <cov> - symmetric covariation matrix */
 void randMVNormal( InputArray _mean, InputArray _cov, int nsamples, OutputArray _samples )
 {
+    CV_TRACE_FUNCTION();
     // check mean vector and covariance matrix
     Mat mean = _mean.getMat(), cov = _cov.getMat();
     int dim = (int)mean.total();  // dimensionality
index f930014..00c092d 100644 (file)
@@ -135,6 +135,7 @@ Ptr<LogisticRegression> LogisticRegression::load(const String& filepath, const S
 
 bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
 {
+    CV_TRACE_FUNCTION_SKIP_NESTED();
     // return value
     bool ok = false;
 
@@ -313,6 +314,7 @@ float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, i
 
 Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const
 {
+    CV_TRACE_FUNCTION();
     Mat dest;
     exp(-data, dest);
     return 1.0/(1.0+dest);
@@ -320,6 +322,7 @@ Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const
 
 double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
 {
+    CV_TRACE_FUNCTION();
     float llambda = 0;                   /*changed llambda from int to float to solve issue #7924*/
     int m;
     int n;
@@ -410,6 +413,7 @@ struct LogisticRegressionImpl_ComputeDradient_Impl : ParallelLoopBody
 
 void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient )
 {
+    CV_TRACE_FUNCTION();
     const int m = _data.rows;
     Mat pcal_a, pcal_b, pcal_ab;
 
@@ -431,6 +435,7 @@ void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labe
 
 Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
 {
+    CV_TRACE_FUNCTION();
     // implements batch gradient descent
     if(this->params.alpha<=0)
     {
index fa2a239..4482188 100644 (file)
@@ -49,6 +49,7 @@ namespace ml {
 //////////////////////////////////////////////////////////////////////////////////////////
 RTreeParams::RTreeParams()
 {
+    CV_TRACE_FUNCTION();
     calcVarImportance = false;
     nactiveVars = 0;
     termCrit = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 50, 0.1);
@@ -58,6 +59,7 @@ RTreeParams::RTreeParams(bool _calcVarImportance,
                          int _nactiveVars,
                          TermCriteria _termCrit )
 {
+    CV_TRACE_FUNCTION();
     calcVarImportance = _calcVarImportance;
     nactiveVars = _nactiveVars;
     termCrit = _termCrit;
@@ -69,6 +71,7 @@ class DTreesImplForRTrees : public DTreesImpl
 public:
     DTreesImplForRTrees()
     {
+        CV_TRACE_FUNCTION();
         params.setMaxDepth(5);
         params.setMinSampleCount(10);
         params.setRegressionAccuracy(0.f);
@@ -83,6 +86,7 @@ public:
 
     void clear()
     {
+        CV_TRACE_FUNCTION();
         DTreesImpl::clear();
         oobError = 0.;
         rng = RNG((uint64)-1);
@@ -90,6 +94,7 @@ public:
 
     const vector<int>& getActiveVars()
     {
+        CV_TRACE_FUNCTION();
         int i, nvars = (int)allVars.size(), m = (int)activeVars.size();
         for( i = 0; i < nvars; i++ )
         {
@@ -104,6 +109,7 @@ public:
 
     void startTraining( const Ptr<TrainData>& trainData, int flags )
     {
+        CV_TRACE_FUNCTION();
         DTreesImpl::startTraining(trainData, flags);
         int nvars = w->data->getNVars();
         int i, m = rparams.nactiveVars > 0 ? rparams.nactiveVars : cvRound(std::sqrt((double)nvars));
@@ -116,6 +122,7 @@ public:
 
     void endTraining()
     {
+        CV_TRACE_FUNCTION();
         DTreesImpl::endTraining();
         vector<int> a, b;
         std::swap(allVars, a);
@@ -124,6 +131,7 @@ public:
 
     bool train( const Ptr<TrainData>& trainData, int flags )
     {
+        CV_TRACE_FUNCTION();
         startTraining(trainData, flags);
         int treeidx, ntrees = (rparams.termCrit.type & TermCriteria::COUNT) != 0 ?
             rparams.termCrit.maxCount : 10000;
@@ -286,12 +294,14 @@ public:
 
     void writeTrainingParams( FileStorage& fs ) const
     {
+        CV_TRACE_FUNCTION();
         DTreesImpl::writeTrainingParams(fs);
         fs << "nactive_vars" << rparams.nactiveVars;
     }
 
     void write( FileStorage& fs ) const
     {
+        CV_TRACE_FUNCTION();
         if( roots.empty() )
             CV_Error( CV_StsBadArg, "RTrees have not been trained" );
 
@@ -319,6 +329,7 @@ public:
 
     void readParams( const FileNode& fn )
     {
+        CV_TRACE_FUNCTION();
         DTreesImpl::readParams(fn);
 
         FileNode tparams_node = fn["training_params"];
@@ -327,6 +338,7 @@ public:
 
     void read( const FileNode& fn )
     {
+        CV_TRACE_FUNCTION();
         clear();
 
         //int nclasses = (int)fn["nclasses"];
@@ -351,6 +363,7 @@ public:
 
     void getVotes( InputArray input, OutputArray output, int flags ) const
     {
+        CV_TRACE_FUNCTION();
         CV_Assert( !roots.empty() );
         int nclasses = (int)classLabels.size(), ntrees = (int)roots.size();
         Mat samples = input.getMat(), results;
@@ -435,6 +448,7 @@ public:
 
     bool train( const Ptr<TrainData>& trainData, int flags )
     {
+        CV_TRACE_FUNCTION();
         if (impl.getCVFolds() != 0)
             CV_Error(Error::StsBadArg, "Cross validation for RTrees is not implemented");
         return impl.train(trainData, flags);
@@ -442,22 +456,26 @@ public:
 
     float predict( InputArray samples, OutputArray results, int flags ) const
     {
+        CV_TRACE_FUNCTION();
         return impl.predict(samples, results, flags);
     }
 
     void write( FileStorage& fs ) const
     {
+        CV_TRACE_FUNCTION();
         impl.write(fs);
     }
 
     void read( const FileNode& fn )
     {
+        CV_TRACE_FUNCTION();
         impl.read(fn);
     }
 
     void getVotes_( InputArray samples, OutputArray results, int flags ) const
     {
-      impl.getVotes(samples, results, flags);
+        CV_TRACE_FUNCTION();
+        impl.getVotes(samples, results, flags);
     }
 
     Mat getVarImportance() const { return Mat_<float>(impl.varImportance, true); }
@@ -477,17 +495,20 @@ public:
 
 Ptr<RTrees> RTrees::create()
 {
+    CV_TRACE_FUNCTION();
     return makePtr<RTreesImpl>();
 }
 
 //Function needed for Python and Java wrappers
 Ptr<RTrees> RTrees::load(const String& filepath, const String& nodeName)
 {
+    CV_TRACE_FUNCTION();
     return Algorithm::load<RTrees>(filepath, nodeName);
 }
 
 void RTrees::getVotes(InputArray input, OutputArray output, int flags) const
 {
+    CV_TRACE_FUNCTION();
     const RTreesImpl* this_ = dynamic_cast<const RTreesImpl*>(this);
     if(!this_)
         CV_Error(Error::StsNotImplemented, "the class is not RTreesImpl");
index 2764876..eaed926 100644 (file)
@@ -50,6 +50,7 @@ using cv::ml::KNearest;
 static
 void defaultDistribs( Mat& means, vector<Mat>& covs, int type=CV_32FC1 )
 {
+    CV_TRACE_FUNCTION();
     float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f};
     float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f};
     float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f};
@@ -76,6 +77,7 @@ void defaultDistribs( Mat& means, vector<Mat>& covs, int type=CV_32FC1 )
 static
 void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const Mat& _means, const vector<Mat>& covs, int dataType, int labelType )
 {
+    CV_TRACE_FUNCTION();
     vector<int>::const_iterator sit = sizes.begin();
     int total = 0;
     for( ; sit != sizes.end(); ++sit )
@@ -226,6 +228,7 @@ protected:
 
 void CV_KMeansTest::run( int /*start_from*/ )
 {
+    CV_TRACE_FUNCTION();
     const int iters = 100;
     int sizesArr[] = { 5000, 7000, 8000 };
     int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
index 125fd8b..130c155 100644 (file)
@@ -64,6 +64,7 @@ using namespace cv::ml;
 
 static bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error)
 {
+    CV_TRACE_FUNCTION();
     error = 0.0f;
     float accuracy = 0.0f;
     Mat _p_labels_temp;
@@ -91,6 +92,7 @@ protected:
 
 void CV_LRTest::run( int /*start_from*/ )
 {
+    CV_TRACE_FUNCTION();
     // initialize varibles from the popular Iris Dataset
     string dataFileName = ts->get_data_path() + "iris.data";
     Ptr<TrainData> tdata = TrainData::loadFromCSV(dataFileName, 0);
@@ -150,6 +152,7 @@ protected:
 
 void CV_LRTest_SaveLoad::run( int /*start_from*/ )
 {
+    CV_TRACE_FUNCTION();
     int code = cvtest::TS::OK;
 
     // initialize varibles from the popular Iris Dataset
index 7193331..5e863f6 100644 (file)
@@ -51,6 +51,7 @@ CV_AMLTest::CV_AMLTest( const char* _modelName ) : CV_MLBaseTest( _modelName )
 
 int CV_AMLTest::run_test_case( int testCaseIdx )
 {
+    CV_TRACE_FUNCTION();
     int code = cvtest::TS::OK;
     code = prepare_test_case( testCaseIdx );
 
@@ -91,6 +92,7 @@ int CV_AMLTest::run_test_case( int testCaseIdx )
 
 int CV_AMLTest::validate_test_results( int testCaseIdx )
 {
+    CV_TRACE_FUNCTION();
     int iters;
     float mean, sigma;
     // read validation params
index 15ae200..b823b84 100644 (file)
@@ -87,6 +87,7 @@ int str_to_ann_train_method( String& str )
 
 void ann_check_data( Ptr<TrainData> _data )
 {
+    CV_TRACE_FUNCTION();
     Mat values = _data->getSamples();
     Mat var_idx = _data->getVarIdx();
     int nvars = (int)var_idx.total();
@@ -99,6 +100,7 @@ void ann_check_data( Ptr<TrainData> _data )
 // unroll the categorical responses to binary vectors
 Mat ann_get_new_responses( Ptr<TrainData> _data, map<int, int>& cls_map )
 {
+    CV_TRACE_FUNCTION();
     Mat train_sidx = _data->getTrainSampleIdx();
     int* train_sidx_ptr = train_sidx.ptr<int>();
     Mat responses = _data->getResponses();
@@ -130,6 +132,7 @@ Mat ann_get_new_responses( Ptr<TrainData> _data, map<int, int>& cls_map )
 
 float ann_calc_error( Ptr<StatModel> ann, Ptr<TrainData> _data, map<int, int>& cls_map, int type, vector<float> *resp_labels )
 {
+    CV_TRACE_FUNCTION();
     float err = 0;
     Mat samples = _data->getSamples();
     Mat responses = _data->getResponses();
@@ -241,6 +244,7 @@ CV_MLBaseTest::~CV_MLBaseTest()
 
 int CV_MLBaseTest::read_params( CvFileStorage* __fs )
 {
+    CV_TRACE_FUNCTION();
     FileStorage _fs(__fs, false);
     if( !_fs.isOpened() )
         test_case_count = -1;
@@ -265,6 +269,7 @@ int CV_MLBaseTest::read_params( CvFileStorage* __fs )
 
 void CV_MLBaseTest::run( int )
 {
+    CV_TRACE_FUNCTION();
     string filename = ts->get_data_path();
     filename += get_validation_filename();
     validationFS.open( filename, FileStorage::READ );
@@ -273,6 +278,7 @@ void CV_MLBaseTest::run( int )
     int code = cvtest::TS::OK;
     for (int i = 0; i < test_case_count; i++)
     {
+        CV_TRACE_REGION("iteration");
         int temp_code = run_test_case( i );
         if (temp_code == cvtest::TS::OK)
             temp_code = validate_test_results( i );
@@ -289,6 +295,7 @@ void CV_MLBaseTest::run( int )
 
 int CV_MLBaseTest::prepare_test_case( int test_case_idx )
 {
+    CV_TRACE_FUNCTION();
     clear();
 
     string dataPath = ts->get_data_path();
@@ -331,6 +338,7 @@ string& CV_MLBaseTest::get_validation_filename()
 
 int CV_MLBaseTest::train( int testCaseIdx )
 {
+    CV_TRACE_FUNCTION();
     bool is_trained = false;
     FileNode modelParamsNode =
         validationFS.getFirstTopLevelNode()["validation"][modelName][dataSetNames[testCaseIdx]]["model_params"];
@@ -489,6 +497,7 @@ int CV_MLBaseTest::train( int testCaseIdx )
 
 float CV_MLBaseTest::get_test_error( int /*testCaseIdx*/, vector<float> *resp )
 {
+    CV_TRACE_FUNCTION();
     int type = CV_TEST_ERROR;
     float err = 0;
     Mat _resp;
@@ -506,11 +515,13 @@ float CV_MLBaseTest::get_test_error( int /*testCaseIdx*/, vector<float> *resp )
 
 void CV_MLBaseTest::save( const char* filename )
 {
+    CV_TRACE_FUNCTION();
     model->save( filename );
 }
 
 void CV_MLBaseTest::load( const char* filename )
 {
+    CV_TRACE_FUNCTION();
     if( modelName == CV_NBAYES )
         model = Algorithm::load<NormalBayesClassifier>( filename );
     else if( modelName == CV_KNEAREST )
index e014d0e..f1d4ba1 100644 (file)
@@ -37,6 +37,7 @@ ocv_list_filterout(opencv_hdrs "modules/core/.*/cuda")
 ocv_list_filterout(opencv_hdrs "modules/cuda.*")
 ocv_list_filterout(opencv_hdrs "modules/cudev")
 ocv_list_filterout(opencv_hdrs "modules/core/.*/hal/")
+ocv_list_filterout(opencv_hdrs "modules/.+/utils/.*")
 ocv_list_filterout(opencv_hdrs "modules/.*/detection_based_tracker.hpp") # Conditional compilation
 
 set(cv2_generated_hdrs
index 382a087..41a76b1 100644 (file)
@@ -1,7 +1,22 @@
 #ifndef OPENCV_TS_HPP
 #define OPENCV_TS_HPP
 
-#include "opencv2/core/cvdef.h"
+#ifndef __OPENCV_TESTS
+#define __OPENCV_TESTS 1
+#endif
+
+#include "opencv2/opencv_modules.hpp"
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/videoio.hpp"
+#include "opencv2/highgui.hpp"
+
+#include "opencv2/core/utility.hpp"
+
+#include "opencv2/core/utils/trace.hpp"
+
 #include <stdarg.h> // for va_list
 
 #include "cvconfig.h"
@@ -46,9 +61,6 @@
 #define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
 #define GET_PARAM(k) std::tr1::get< k >(GetParam())
 
-#include "opencv2/core.hpp"
-#include "opencv2/core/utility.hpp"
-
 namespace cvtest
 {
 
@@ -615,6 +627,8 @@ void parseCustomOptions(int argc, char **argv);
 #define CV_TEST_MAIN_EX(resourcesubdir, INIT0, ...) \
 int main(int argc, char **argv) \
 { \
+    CV_TRACE_FUNCTION(); \
+    { CV_TRACE_REGION("INIT"); \
     using namespace cvtest; \
     TS* ts = TS::ptr(); \
     ts->init(resourcesubdir); \
@@ -624,6 +638,7 @@ int main(int argc, char **argv) \
     TEST_DUMP_OCL_INFO \
     __CV_TEST_EXEC_ARGS(__VA_ARGS__) \
     parseCustomOptions(argc, argv); \
+    } \
     return RUN_ALL_TESTS(); \
 }
 
index 672b9ff..3c162ad 100644 (file)
 #ifndef OPENCV_CUDA_PERF_UTILITY_HPP
 #define OPENCV_CUDA_PERF_UTILITY_HPP
 
-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/videoio.hpp"
-#include "opencv2/imgproc.hpp"
+#include "opencv2/ts.hpp"
+
 #include "opencv2/ts/ts_perf.hpp"
-#include "cvconfig.h"
 
 namespace perf
 {
index b107761..4100d29 100644 (file)
 #ifndef OPENCV_CUDA_TEST_UTILITY_HPP
 #define OPENCV_CUDA_TEST_UTILITY_HPP
 
+#include "opencv2/ts.hpp"
+
 #include <stdexcept>
-#include "cvconfig.h"
-#include "opencv2/core.hpp"
 #include "opencv2/core/cuda.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/ts.hpp"
 
 namespace cvtest
 {
index 58091f3..4d57aaa 100644 (file)
@@ -42,6 +42,8 @@
 #ifndef OPENCV_TS_OCL_PERF_HPP
 #define OPENCV_TS_OCL_PERF_HPP
 
+#include "opencv2/ts.hpp"
+
 #include "ocl_test.hpp"
 #include "ts_perf.hpp"
 
@@ -67,7 +69,7 @@ using std::tr1::tuple;
     protected: \
         virtual void PerfTestBody(); \
     }; \
-    TEST_F(OCL##_##fixture##_##name, name) { declare.strategy(OCL_PERF_STRATEGY); RunPerfTestBody(); } \
+    TEST_F(OCL##_##fixture##_##name, name) { CV_TRACE_REGION("PERF_TEST: " #fixture "_" #name); declare.strategy(OCL_PERF_STRATEGY); RunPerfTestBody(); } \
     void OCL##_##fixture##_##name::PerfTestBody()
 
 #define SIMPLE_PERF_TEST_P(fixture, name, params) \
@@ -79,7 +81,7 @@ using std::tr1::tuple;
     protected: \
         virtual void PerfTestBody(); \
     }; \
-    TEST_P(OCL##_##fixture##_##name, name) { declare.strategy(OCL_PERF_STRATEGY); RunPerfTestBody(); } \
+    TEST_P(OCL##_##fixture##_##name, name) { CV_TRACE_REGION("PERF_TEST_P: " #fixture "_" #name); declare.strategy(OCL_PERF_STRATEGY); RunPerfTestBody(); } \
     INSTANTIATE_TEST_CASE_P(/*none*/, OCL##_##fixture##_##name, params); \
     void OCL##_##fixture##_##name::PerfTestBody()
 
@@ -95,17 +97,27 @@ using std::tr1::tuple;
 
 #define OCL_PERF_ENUM ::testing::Values
 
-// TODO Replace finish call to dstUMat.wait()
+//! deprecated
 #define OCL_TEST_CYCLE() \
     for (cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
-
+//! deprecated
 #define OCL_TEST_CYCLE_N(n) \
     for (declare.iterations(n), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
-
+//! deprecated
 #define OCL_TEST_CYCLE_MULTIRUN(runsNum) \
     for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
         for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r)
 
+#undef PERF_SAMPLE_BEGIN
+#undef PERF_SAMPLE_END
+#define PERF_SAMPLE_BEGIN() \
+    cvtest::ocl::perf::safeFinish(); \
+    for(; next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
+    { \
+        CV_TRACE_REGION("iteration");
+#define PERF_SAMPLE_END() \
+    }
+
 
 namespace perf {
 
index 8c94ede..54b33ec 100644 (file)
@@ -42,8 +42,6 @@
 #ifndef OPENCV_TS_OCL_TEST_HPP
 #define OPENCV_TS_OCL_TEST_HPP
 
-#include "opencv2/opencv_modules.hpp"
-
 #include "opencv2/ts.hpp"
 
 #include "opencv2/imgcodecs.hpp"
index 05ccc63..0bdd346 100644 (file)
@@ -16,8 +16,9 @@ void checkIppStatus();
     cv::ipp::setIppStatus(0); \
     cv::theRNG().state = cvtest::param_seed;
 #define CV_TEST_CLEANUP ::cvtest::checkIppStatus();
-#define CV_TEST_BODY_IMPL \
+#define CV_TEST_BODY_IMPL(name) \
     { \
+       CV__TRACE_APP_FUNCTION_NAME(name); \
        try { \
           CV_TEST_INIT \
           Body(); \
@@ -53,7 +54,7 @@ void checkIppStatus();
             ::testing::Test::TearDownTestCase, \
             new ::testing::internal::TestFactoryImpl<\
                 GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
-    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL \
+    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_case_name "_" #test_name ) \
     void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::Body()
 
 #undef TEST_F
@@ -79,7 +80,7 @@ void checkIppStatus();
             test_fixture::TearDownTestCase, \
             new ::testing::internal::TestFactoryImpl<\
                 GTEST_TEST_CLASS_NAME_(test_fixture, test_name)>);\
-    void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV_TEST_BODY_IMPL \
+    void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_fixture "_" #test_name ) \
     void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::Body()
 
 #undef TEST_P
@@ -111,7 +112,7 @@ void checkIppStatus();
   int GTEST_TEST_CLASS_NAME_(test_case_name, \
                              test_name)::gtest_registering_dummy_ = \
       GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
-    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL \
+    void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() CV_TEST_BODY_IMPL( #test_case_name "_" #test_name ) \
     void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::Body()
 
 #endif  // OPENCV_TS_EXT_HPP
index bfa1811..5bbf191 100644 (file)
@@ -1,7 +1,8 @@
 #ifndef OPENCV_TS_PERF_HPP
 #define OPENCV_TS_PERF_HPP
 
-#include "opencv2/core.hpp"
+#include "opencv2/ts.hpp"
+
 #include "ts_gtest.h"
 #include "ts_ext.hpp"
 
@@ -537,7 +538,7 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
       protected:\
        virtual void PerfTestBody();\
      };\
-     TEST_F(test_case_name, test_name){ RunPerfTestBody(); }\
+     TEST_F(test_case_name, test_name){ CV_TRACE_REGION("PERF_TEST: " #test_case_name "_" #test_name); RunPerfTestBody(); }\
     }\
     void PERF_PROXY_NAMESPACE_NAME_(test_case_name, test_name)::test_case_name::PerfTestBody()
 
@@ -575,7 +576,7 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
       protected:\
        virtual void PerfTestBody();\
      };\
-     TEST_F(fixture, testname){ RunPerfTestBody(); }\
+     TEST_F(fixture, testname){ CV_TRACE_REGION("PERF_TEST: " #fixture "_" #testname); RunPerfTestBody(); }\
     }\
     void PERF_PROXY_NAMESPACE_NAME_(fixture, testname)::fixture::PerfTestBody()
 
@@ -608,7 +609,7 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
      protected:\
       virtual void PerfTestBody();\
     };\
-    TEST_P(fixture##_##name, name /*perf*/){ RunPerfTestBody(); }\
+    TEST_P(fixture##_##name, name /*perf*/){ CV_TRACE_REGION("PERF_TEST: " #fixture "_" #name); RunPerfTestBody(); }\
     INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
     void fixture##_##name::PerfTestBody()
 
@@ -631,7 +632,10 @@ void dumpOpenCLDevice();
 #define TEST_DUMP_OCL_INFO
 #endif
 
+
 #define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...)    \
+    CV_TRACE_FUNCTION(); \
+    { CV_TRACE_REGION("INIT"); \
     ::perf::Regression::Init(#modulename); \
     ::perf::TestBase::Init(std::vector<std::string>(impls, impls + sizeof impls / sizeof *impls), \
                            argc, argv); \
@@ -641,6 +645,7 @@ void dumpOpenCLDevice();
     ::perf::TestBase::RecordRunParameters(); \
     __CV_TEST_EXEC_ARGS(__VA_ARGS__) \
     TEST_DUMP_OCL_INFO \
+    } \
     return RUN_ALL_TESTS();
 
 // impls must be an array, not a pointer; "plain" should always be one of the implementations
@@ -657,10 +662,20 @@ int main(int argc, char **argv)\
     CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\
 }
 
+//! deprecated
 #define TEST_CYCLE_N(n) for(declare.iterations(n); next() && startTimer(); stopTimer())
+//! deprecated
 #define TEST_CYCLE() for(; next() && startTimer(); stopTimer())
+//! deprecated
 #define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); next() && startTimer(); stopTimer()) for(int r = 0; r < runsNum; ++r)
 
+#define PERF_SAMPLE_BEGIN() \
+    for(; next() && startTimer(); stopTimer()) \
+    { \
+        CV_TRACE_REGION("iteration");
+#define PERF_SAMPLE_END() \
+    }
+
 namespace perf
 {
 namespace comparators
index ad6a38d..0f2116e 100755 (executable)
@@ -49,6 +49,9 @@ if __name__ == "__main__":
     parser.add_argument("--serial", metavar="serial number", default="", help="Android: directs command to the USB device or emulator with the given serial number")
     parser.add_argument("--package", metavar="package", default="", help="Android: run jUnit tests for specified package")
 
+    parser.add_argument("--trace", action="store_true", default=False, help="Trace: enable OpenCV tracing")
+    parser.add_argument("--trace_dump", metavar="trace_dump", default=-1, help="Trace: dump highlight calls (specify max entries count, 0 - dump all)")
+
     args, other_args = parser.parse_known_args()
 
     log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
index ca0841d..311f415 100644 (file)
@@ -4,6 +4,8 @@ import datetime
 from run_utils import *
 from run_long import LONG_TESTS_DEBUG_VALGRIND, longTestFilter
 
+timestamp = datetime.datetime.now()
+
 class TestSuite(object):
     def __init__(self, options, cache):
         self.options = options
@@ -20,7 +22,8 @@ class TestSuite(object):
             res.append("CUDA")
         return res
 
-    def getLogName(self, app, timestamp):
+    def getLogBaseName(self, app):
+        global timestamp
         app = self.getAlias(app)
         rev = self.cache.getGitVersion()
         if isinstance(timestamp, datetime.datetime):
@@ -34,7 +37,10 @@ class TestSuite(object):
             lname = "_".join([p for p in pieces if p])
             lname = re.sub(r'[\(\)\[\]\s,]', '_', lname)
             l = re.sub(r'_+', '_', lname)
-        return l + ".xml"
+        return l
+
+    def getLogName(self, app):
+        return self.getLogBaseName(app) + '.xml'
 
     def listTests(self, short = False, main = False):
         if len(self.tests) == 0:
@@ -138,10 +144,25 @@ class TestSuite(object):
             if isColorEnabled(args):
                 args.append("--gtest_color=yes")
             cmd = self.wrapInValgrind([exe] + args)
+            env = {}
+            if not self.options.valgrind and self.options.trace:
+                env['OPENCV_TRACE'] = '1'
+                env['OPENCV_TRACE_LOCATION'] = 'OpenCVTrace-{}'.format(self.getLogBaseName(exe))
+                env['OPENCV_TRACE_SYNC_OPENCL'] = '1'
             tempDir = TempEnvDir('OPENCV_TEMP_PATH', "__opencv_temp.")
             tempDir.init()
             log.warning("Run: %s" % " ".join(cmd))
-            ret = execute(cmd, cwd = workingDir)
+            ret = execute(cmd, cwd = workingDir, env=env)
+            try:
+                if not self.options.valgrind and self.options.trace and int(self.options.trace_dump) >= 0:
+                    import trace_profiler
+                    trace = trace_profiler.Trace(env['OPENCV_TRACE_LOCATION']+'.txt')
+                    trace.process()
+                    trace.dump(max_entries=int(self.options.trace_dump))
+            except:
+                import traceback
+                traceback.print_exc()
+                pass
             tempDir.clean()
             hostlogpath = os.path.join(workingDir, logfile)
             if os.path.isfile(hostlogpath):
@@ -157,7 +178,6 @@ class TestSuite(object):
         args = args[:]
         logs = []
         test_list = self.getTestList(tests, black)
-        date = datetime.datetime.now()
         if len(test_list) != 1:
             args = [a for a in args if not a.startswith("--gtest_output=")]
         ret = 0
@@ -170,7 +190,7 @@ class TestSuite(object):
             else:
                 userlog = [a for a in args if a.startswith("--gtest_output=")]
                 if len(userlog) == 0:
-                    logname = self.getLogName(exe, date)
+                    logname = self.getLogName(exe)
                     more_args.append("--gtest_output=xml:" + logname)
                 else:
                     logname = userlog[0][userlog[0].find(":")+1:]
index 2e9a66f..8c07763 100644 (file)
@@ -25,10 +25,12 @@ class Err(Exception):
 def execute(cmd, silent = False, cwd = ".", env = None):
     try:
         log.debug("Run: %s", cmd)
-        if env:
+        if env is not None:
             for k in env:
                 log.debug("    Environ: %s=%s", k, env[k])
-            env = os.environ.update(env)
+            new_env = os.environ.copy()
+            new_env.update(env)
+            env = new_env
         if silent:
             return check_output(cmd, stderr = STDOUT, cwd = cwd, env = env).decode("latin-1")
         else:
diff --git a/modules/ts/misc/trace_profiler.py b/modules/ts/misc/trace_profiler.py
new file mode 100644 (file)
index 0000000..1d10890
--- /dev/null
@@ -0,0 +1,435 @@
+from __future__ import print_function
+
+import os
+import sys
+import csv
+from pprint import pprint
+from collections import deque
+
+# trace.hpp
+REGION_FLAG_IMPL_MASK = 15 << 16;
+REGION_FLAG_IMPL_IPP = 1 << 16;
+REGION_FLAG_IMPL_OPENCL = 2 << 16;
+
+DEBUG = False
+
+if DEBUG:
+    dprint = print
+    dpprint = pprint
+else:
+    def dprint(args, **kwargs):
+        pass
+    def dpprint(args, **kwargs):
+        pass
+
+def tryNum(s):
+    if s.startswith('0x'):
+        try:
+            return int(s, 16)
+        except ValueError:
+            pass
+    try:
+        return int(s)
+    except ValueError:
+        pass
+    if sys.version_info[0] < 3:
+        try:
+            return long(s)
+        except ValueError:
+            pass
+    return s
+
+def formatTimestamp(t):
+    return "%.3f" % (t * 1e-6)
+
+try:
+    from statistics import median
+except ImportError:
+    def median(lst):
+        sortedLst = sorted(lst)
+        lstLen = len(lst)
+        index = (lstLen - 1) // 2
+        if (lstLen % 2):
+            return sortedLst[index]
+        else:
+            return (sortedLst[index] + sortedLst[index + 1]) * 0.5
+
+def getCXXFunctionName(spec):
+    def dropParams(spec):
+        pos = len(spec) - 1
+        depth = 0
+        while pos >= 0:
+            if spec[pos] == ')':
+                depth = depth + 1
+            elif spec[pos] == '(':
+                depth = depth - 1
+                if depth == 0:
+                    if pos == 0 or spec[pos - 1] in ['#', ':']:
+                        res = dropParams(spec[pos+1:-1])
+                        return (spec[:pos] + res[0], res[1])
+                    return (spec[:pos], spec[pos:])
+            pos = pos - 1
+        return (spec, '')
+
+    def extractName(spec):
+        pos = len(spec) - 1
+        inName = False
+        while pos >= 0:
+            if spec[pos] == ' ':
+                if inName:
+                    return spec[pos+1:]
+            elif spec[pos].isalnum():
+                inName = True
+            pos = pos - 1
+        return spec
+
+    if spec.startswith('IPP') or spec.startswith('OpenCL'):
+        prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
+        prefix = spec[:prefix_size]
+        if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
+            prefix = prefix + spec[prefix_size]
+            prefix_size = prefix_size + 1
+        begin = prefix_size
+        while begin < len(spec):
+            if spec[begin].isalnum() or spec[begin] in ['_', ':']:
+                break
+            begin = begin + 1
+        if begin == len(spec):
+            return spec
+        end = begin
+        while end < len(spec):
+            if not (spec[end].isalnum() or spec[end] in ['_', ':']):
+                break
+            end = end + 1
+        return prefix + spec[begin:end]
+
+    spec = spec.replace(') const', ')') # const methods
+    (ret_type_name, params) = dropParams(spec)
+    name = extractName(ret_type_name)
+    if 'operator' in name:
+        return name + params
+    if name.startswith('&'):
+        return name[1:]
+    return name
+
+stack_size = 10
+
+class Trace:
+    def __init__(self, filename=None):
+        self.tasks = {}
+        self.tasks_list = []
+        self.locations = {}
+        self.threads_stack = {}
+        self.pending_files = deque()
+        if filename:
+            self.load(filename)
+
+    class TraceTask:
+        def __init__(self, threadID, taskID, locationID, beginTimestamp):
+            self.threadID = threadID
+            self.taskID = taskID
+            self.locationID = locationID
+            self.beginTimestamp = beginTimestamp
+            self.endTimestamp = None
+            self.parentTaskID = None
+            self.parentThreadID = None
+            self.childTask = []
+            self.selfTimeIPP = 0
+            self.selfTimeOpenCL = 0
+            self.totalTimeIPP = 0
+            self.totalTimeOpenCL = 0
+
+        def __repr__(self):
+            return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
+                self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
+                self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
+
+
+    class TraceLocation:
+        def __init__(self, locationID, filename, line, name, flags):
+            self.locationID = locationID
+            self.filename = os.path.split(filename)[1]
+            self.line = line
+            self.name = getCXXFunctionName(name)
+            self.flags = flags
+
+        def __str__(self):
+            return "{}#{}:{}".format(self.name, self.filename, self.line)
+
+        def __repr__(self):
+            return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
+
+    def parse_file(self, filename):
+        dprint("Process file: '{}'".format(filename))
+        with open(filename) as infile:
+            for line in infile:
+                line = str(line).strip()
+                if line[0] == "#":
+                    if line.startswith("#thread file:"):
+                        name = str(line.split(':', 1)[1]).strip()
+                        self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
+                    continue
+                self.parse_line(line)
+
+    def parse_line(self, line):
+        opts = line.split(',')
+        dpprint(opts)
+        if opts[0] == 'l':
+            opts = list(csv.reader([line]))[0]  # process quote more
+            locationID = int(opts[1])
+            filename = str(opts[2])
+            line = int(opts[3])
+            name = opts[4]
+            flags = tryNum(opts[5])
+            self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
+            return
+        extra_opts = {}
+        for e in opts[5:]:
+            if not '=' in e:
+                continue
+            (k, v) = e.split('=')
+            extra_opts[k] = tryNum(v)
+        if extra_opts:
+            dpprint(extra_opts)
+        threadID = None
+        taskID = None
+        locationID = None
+        ts = None
+        if opts[0] in ['b', 'e']:
+            threadID = int(opts[1])
+            taskID = int(opts[4])
+            locationID = int(opts[3])
+            ts = tryNum(opts[2])
+        thread_stack = None
+        currentTask = (None, None)
+        if threadID is not None:
+            if not threadID in self.threads_stack:
+                thread_stack = deque()
+                self.threads_stack[threadID] = thread_stack
+            else:
+                thread_stack = self.threads_stack[threadID]
+            currentTask = None if not thread_stack else thread_stack[-1]
+        t = (threadID, taskID)
+        if opts[0] == 'b':
+            assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
+            task = self.TraceTask(threadID, taskID, locationID, ts)
+            self.tasks[t] = task
+            self.tasks_list.append(task)
+            thread_stack.append((threadID, taskID))
+            if currentTask:
+                task.parentThreadID = currentTask[0]
+                task.parentTaskID = currentTask[1]
+            if 'parentThread' in extra_opts:
+                task.parentThreadID = extra_opts['parentThread']
+            if 'parent' in extra_opts:
+                task.parentTaskID = extra_opts['parent']
+        if opts[0] == 'e':
+            task = self.tasks[t]
+            task.endTimestamp = ts
+            if 'tIPP' in extra_opts:
+                task.selfTimeIPP = extra_opts['tIPP']
+            if 'tOCL' in extra_opts:
+                task.selfTimeOpenCL = extra_opts['tOCL']
+            thread_stack.pop()
+
+    def load(self, filename):
+        self.pending_files.append(filename)
+        if DEBUG:
+            with open(filename, 'r') as f:
+                print(f.read(), end='')
+        while self.pending_files:
+            self.parse_file(self.pending_files.pop())
+
+    def getParentTask(self, task):
+        return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
+
+    def process(self):
+        self.tasks_list.sort(key=lambda x: x.beginTimestamp)
+
+        parallel_for_location = None
+        for (id, l) in self.locations.items():
+            if l.name == 'parallel_for':
+                parallel_for_location = l.locationID
+                break
+
+        for task in self.tasks_list:
+            try:
+                task.duration = task.endTimestamp - task.beginTimestamp
+                task.selfDuration = task.duration
+            except:
+                task.duration = None
+                task.selfDuration = None
+            task.totalTimeIPP = task.selfTimeIPP
+            task.totalTimeOpenCL = task.selfTimeOpenCL
+
+        dpprint(self.tasks)
+        dprint("Calculate total times")
+
+        for task in self.tasks_list:
+            parentTask = self.getParentTask(task)
+            if parentTask:
+                parentTask.selfDuration = parentTask.selfDuration - task.duration
+                parentTask.childTask.append(task)
+                timeIPP = task.selfTimeIPP
+                timeOpenCL = task.selfTimeOpenCL
+                while parentTask:
+                    if parentTask.locationID == parallel_for_location:  # TODO parallel_for
+                        break
+                    parentLocation = self.locations[parentTask.locationID]
+                    if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
+                        parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
+                        timeIPP = 0
+                    else:
+                        parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
+                    if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
+                        parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
+                        timeOpenCL = 0
+                    else:
+                        parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
+                    parentTask = self.getParentTask(parentTask)
+
+        dpprint(self.tasks)
+        dprint("Calculate total times (parallel_for)")
+
+        for task in self.tasks_list:
+            if task.locationID == parallel_for_location:
+                task.selfDuration = 0
+                childDuration = sum([t.duration for t in task.childTask])
+                if task.duration == 0 or childDuration == 0:
+                    continue
+                timeCoef = task.duration / float(childDuration)
+                childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
+                childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
+                if childTimeIPP == 0 and childTimeOpenCL == 0:
+                    continue
+                timeIPP = childTimeIPP * timeCoef
+                timeOpenCL = childTimeOpenCL * timeCoef
+                parentTask = task
+                while parentTask:
+                    parentLocation = self.locations[parentTask.locationID]
+                    if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
+                        parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
+                        timeIPP = 0
+                    else:
+                        parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
+                    if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
+                        parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
+                        timeOpenCL = 0
+                    else:
+                        parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
+                    parentTask = self.getParentTask(parentTask)
+
+        dpprint(self.tasks)
+        dprint("Done")
+
+    def dump(self, max_entries):
+        assert isinstance(max_entries, int)
+
+        class CallInfo():
+            def __init__(self, callID):
+                self.callID = callID
+                self.totalTimes = []
+                self.selfTimes = []
+                self.threads = set()
+                self.selfTimesIPP = []
+                self.selfTimesOpenCL = []
+                self.totalTimesIPP = []
+                self.totalTimesOpenCL = []
+
+        calls = {}
+
+        for currentTask in self.tasks_list:
+            task = currentTask
+            callID = []
+            for i in range(stack_size):
+                callID.append(task.locationID)
+                task = self.getParentTask(task)
+                if not task:
+                    break
+            callID = tuple(callID)
+            if not callID in calls:
+                call = CallInfo(callID)
+                calls[callID] = call
+            else:
+                call = calls[callID]
+            call.totalTimes.append(currentTask.duration)
+            call.selfTimes.append(currentTask.selfDuration)
+            call.threads.add(currentTask.threadID)
+            call.selfTimesIPP.append(currentTask.selfTimeIPP)
+            call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
+            call.totalTimesIPP.append(currentTask.totalTimeIPP)
+            call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
+
+        dpprint(self.tasks)
+        dpprint(self.locations)
+        dpprint(calls)
+
+        calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
+        calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
+        calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
+        calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
+
+        calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
+        calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
+
+        calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
+        calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
+
+        if max_entries > 0 and len(calls_sorted) > max_entries:
+            calls_sorted = calls_sorted[:max_entries]
+
+        def formatPercents(p):
+            if p is not None:
+                return "{:>3d}".format(int(p*100))
+            return ''
+
+        name_width = 70
+        timestamp_width = 12
+        def fmtTS():
+            return '{:>' + str(timestamp_width) + '}'
+        fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
+        fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
+        print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
+        print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
+        for (index, callID) in enumerate(calls_sorted):
+            call_self_times = calls[callID].selfTimes
+            loc0 = self.locations[callID[0]]
+            loc_array = []  # [str(callID)]
+            for (i, l) in enumerate(callID):
+                loc = self.locations[l]
+                loc_array.append(loc.name if i > 0 else str(loc))
+            loc_str = '|'.join(loc_array)
+            if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
+            print(fmt.format(index + 1, loc_str, len(call_self_times),
+                    len(calls[callID].threads),
+                    formatTimestamp(min(call_self_times)),
+                    formatTimestamp(max(call_self_times)),
+                    formatTimestamp(calls_median[callID]),
+                    formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
+                    formatTimestamp(sum(call_self_times)),
+                    formatTimestamp(calls_self_sum_IPP[callID]),
+                    formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
+                    formatTimestamp(calls_self_sum_OpenCL[callID]),
+                    formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
+                ))
+            call_total_times = calls[callID].totalTimes
+            print(fmt2.format("", "", "", "",
+                    formatTimestamp(min(call_total_times)),
+                    formatTimestamp(max(call_total_times)),
+                    formatTimestamp(median(call_total_times)),
+                    formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
+                    formatTimestamp(sum(call_total_times)),
+                    formatTimestamp(calls_total_sum_IPP[callID]),
+                    formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
+                    formatTimestamp(calls_total_sum_OpenCL[callID]),
+                    formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
+                ))
+            print()
+
+if __name__ == "__main__":
+    tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
+    count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
+    trace = Trace(tracefile)
+    trace.process()
+    trace.dump(max_entries = count)
+    print("OK")
index fbb13ec..155c377 100644 (file)
@@ -1,7 +1,5 @@
-#include "opencv2/core/utility.hpp"
-#include "opencv2/core/private.hpp"
 #include "opencv2/ts.hpp"
-#include "cvconfig.h"
+#include "opencv2/core/private.hpp"
 
 #ifdef GTEST_LINKED_AS_SHARED_LIBRARY
 #error ts module should not have GTEST_LINKED_AS_SHARED_LIBRARY defined
index b2763d4..db6de8e 100644 (file)
@@ -225,6 +225,7 @@ bool BaseTest::can_do_fast_forward()
 
 void BaseTest::safe_run( int start_from )
 {
+    CV_TRACE_FUNCTION();
     read_params( ts->get_file_storage() );
     ts->update_context( 0, -1, true );
     ts->update_context( this, -1, true );
index e5a5e31..159437c 100644 (file)
@@ -954,6 +954,8 @@ void TestBase::Init(int argc, const char* const argv[])
 void TestBase::Init(const std::vector<std::string> & availableImpls,
                  int argc, const char* const argv[])
 {
+    CV_TRACE_FUNCTION();
+
     available_impls = availableImpls;
 
     const std::string command_line_keys =
@@ -1182,6 +1184,7 @@ enum PERF_STRATEGY TestBase::getCurrentModulePerformanceStrategy()
 
 int64 TestBase::_calibrate()
 {
+    CV_TRACE_FUNCTION();
     class _helper : public ::perf::TestBase
     {
         public:
@@ -1248,6 +1251,7 @@ void TestBase::declareArray(SizeVector& sizes, cv::InputOutputArray a, WarmUpTyp
 
 void TestBase::warmup(cv::InputOutputArray a, WarmUpType wtype)
 {
+    CV_TRACE_FUNCTION();
     if (a.empty())
         return;
     else if (a.isUMat())
@@ -1419,6 +1423,7 @@ bool TestBase::next()
                             median_ms > perf_validation_time_threshold_ms &&
                             (grow || metrics.stddev > perf_stability_criteria * fabs(metrics.mean)))
                     {
+                        CV_TRACE_REGION("idle_delay");
                         printf("Performance is unstable, it may be a result of overheat problems\n");
                         printf("Idle delay for %d ms... \n", perf_validation_idle_delay_ms);
 #if defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64
@@ -1682,8 +1687,17 @@ void TestBase::validateMetrics()
 
 void TestBase::reportMetrics(bool toJUnitXML)
 {
+    CV_TRACE_FUNCTION();
+
     performance_metrics& m = calcMetrics();
 
+    CV_TRACE_ARG_VALUE(samples, "samples", (int64)m.samples);
+    CV_TRACE_ARG_VALUE(outliers, "outliers", (int64)m.outliers);
+    CV_TRACE_ARG_VALUE(median, "mean_ms", (double)(m.mean * 1000.0f / metrics.frequency));
+    CV_TRACE_ARG_VALUE(median, "median_ms", (double)(m.median * 1000.0f / metrics.frequency));
+    CV_TRACE_ARG_VALUE(stddev, "stddev_ms", (double)(m.stddev * 1000.0f / metrics.frequency));
+    CV_TRACE_ARG_VALUE(stddev_percents, "stddev_percents", (double)(m.stddev / (double)m.mean * 100.0f));
+
     if (m.terminationReason == performance_metrics::TERM_SKIP_TEST)
     {
         if (toJUnitXML)
index b6f991b..97c3a64 100644 (file)
@@ -590,28 +590,33 @@ VideoCapture::VideoCapture()
 
 VideoCapture::VideoCapture(const String& filename, int apiPreference)
 {
+    CV_TRACE_FUNCTION();
     open(filename, apiPreference);
 }
 
 VideoCapture::VideoCapture(const String& filename)
 {
+    CV_TRACE_FUNCTION();
     open(filename, CAP_ANY);
 }
 
 VideoCapture::VideoCapture(int index)
 {
+    CV_TRACE_FUNCTION();
     open(index);
 }
 
 VideoCapture::~VideoCapture()
 {
+    CV_TRACE_FUNCTION();
+
     icap.release();
     cap.release();
 }
 
 bool VideoCapture::open(const String& filename, int apiPreference)
 {
-    CV_INSTRUMENT_REGION()
+    CV_TRACE_FUNCTION();
 
     if (isOpened()) release();
     icap = IVideoCapture_create(filename);
@@ -624,14 +629,14 @@ bool VideoCapture::open(const String& filename, int apiPreference)
 
 bool VideoCapture::open(const String& filename)
 {
-    CV_INSTRUMENT_REGION()
+    CV_TRACE_FUNCTION();
 
     return open(filename, CAP_ANY);
 }
 
 bool VideoCapture::open(int index)
 {
-    CV_INSTRUMENT_REGION()
+    CV_TRACE_FUNCTION();
 
     if (isOpened()) release();
     icap = IVideoCapture_create(index);
@@ -642,6 +647,8 @@ bool VideoCapture::open(int index)
 }
 bool  VideoCapture::open(int cameraNum, int apiPreference)
 {
+    CV_TRACE_FUNCTION();
+
     cameraNum = cameraNum + apiPreference;
     return open(cameraNum);
 }
@@ -653,6 +660,7 @@ bool VideoCapture::isOpened() const
 
 void VideoCapture::release()
 {
+    CV_TRACE_FUNCTION();
     icap.release();
     cap.release();
 }
diff --git a/samples/cpp/application_trace.cpp b/samples/cpp/application_trace.cpp
new file mode 100644 (file)
index 0000000..ddddd51
--- /dev/null
@@ -0,0 +1,92 @@
+/* OpenCV Application Tracing support demo. */
+#include <iostream>
+
+#include <opencv2/opencv.hpp>
+#include <opencv2/core/utils/trace.hpp>
+
+using namespace cv;
+using namespace std;
+
+static void process_frame(const cv::UMat& frame)
+{
+    CV_TRACE_FUNCTION(); // OpenCV Trace macro for function
+
+    imshow("Live", frame);
+
+    UMat gray, processed;
+    cv::cvtColor(frame, gray, COLOR_BGR2GRAY);
+    Canny(gray, processed, 32, 64, 3);
+    imshow("Processed", processed);
+}
+
+int main(int argc, char** argv)
+{
+    CV_TRACE_FUNCTION();
+
+    cv::CommandLineParser parser(argc, argv,
+        "{help h ? |     | help message}"
+        "{n        | 100 | number of frames to process }"
+        "{@video   | 0   | video filename or cameraID }"
+    );
+    if (parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    VideoCapture capture;
+    std::string video = parser.get<string>("@video");
+    if (video.size() == 1 && isdigit(video[0]))
+        capture.open(parser.get<int>("@video"));
+    else
+        capture.open(video);
+    int nframes = 0;
+    if (capture.isOpened())
+    {
+        nframes = (int)capture.get(CAP_PROP_FRAME_COUNT);
+        cout << "Video " << video <<
+            ": width=" << capture.get(CAP_PROP_FRAME_WIDTH) <<
+            ", height=" << capture.get(CAP_PROP_FRAME_HEIGHT) <<
+            ", nframes=" << nframes << endl;
+    }
+    else
+    {
+        cout << "Could not initialize video capturing...\n";
+        return -1;
+    }
+
+    int N = parser.get<int>("n");
+    if (nframes > 0 && N > nframes)
+        N = nframes;
+
+    cout << "Start processing..." << endl
+        << "Press ESC key to terminate" << endl;
+
+    UMat frame;
+    for (int i = 0; N > 0 ? (i < N) : true; i++)
+    {
+        CV_TRACE_REGION("FRAME"); // OpenCV Trace macro for named "scope" region
+        {
+            CV_TRACE_REGION("read");
+            capture.read(frame);
+
+            if (frame.empty())
+            {
+                cerr << "Can't capture frame: " << i << std::endl;
+                break;
+            }
+
+            // OpenCV Trace macro for NEXT named region in the same C++ scope
+            // Previous "read" region will be marked complete on this line.
+            // Use this to eliminate unnecessary curly braces.
+            CV_TRACE_REGION_NEXT("process");
+            process_frame(frame);
+
+            CV_TRACE_REGION_NEXT("delay");
+            if (waitKey(1) == 27/*ESC*/)
+                break;
+        }
+    }
+
+    return 0;
+}