Implementations instrumentation framework for OpenCV performance system;
authorPavel Vlasov <pavel.vlasov@intel.com>
Mon, 15 Aug 2016 14:12:45 +0000 (17:12 +0300)
committerPavel Vlasov <pavel.vlasov@intel.com>
Wed, 17 Aug 2016 13:26:03 +0000 (16:26 +0300)
CMakeLists.txt
cmake/templates/cvconfig.h.in
modules/core/include/opencv2/core/private.hpp
modules/core/include/opencv2/core/utility.hpp
modules/core/src/parallel.cpp
modules/core/src/stat.cpp
modules/core/src/system.cpp
modules/ts/include/opencv2/ts/ocl_perf.hpp
modules/ts/include/opencv2/ts/ts_perf.hpp
modules/ts/src/ts_perf.cpp

index 05d8854..c956854 100644 (file)
@@ -286,6 +286,7 @@ OCV_OPTION(ENABLE_NOISY_WARNINGS      "Show all warnings even if they are too no
 OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors"                                 OFF )
 OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with native libraries" OFF  IF ANDROID )
 OCV_OPTION(ENABLE_IMPL_COLLECTION     "Collect implementation data on function call"             OFF )
+OCV_OPTION(ENABLE_INSTRUMENTATION     "Instrument functions to collect calls trace and performance" OFF )
 OCV_OPTION(GENERATE_ABI_DESCRIPTOR    "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
 
 OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF )
index 4012f69..56d415b 100644 (file)
 
 /* FP16 */
 #cmakedefine HAVE_FP16
+
+/* Library was compiled with functions instrumentation */
+#cmakedefine ENABLE_INSTRUMENTATION
index c884815..6fcd171 100644 (file)
@@ -344,7 +344,7 @@ static struct __IppInitializer__ __ipp_initializer__;
 #ifdef CV_IPP_RUN_VERBOSE
 #define CV_IPP_RUN_(condition, func, ...)                                   \
     {                                                                       \
-        if (cv::ipp::useIPP() && (condition) && func)                       \
+        if (cv::ipp::useIPP() && (condition) && (func))                     \
         {                                                                   \
             printf("%s: IPP implementation is running\n", CV_Func);         \
             fflush(stdout);                                                 \
@@ -376,7 +376,7 @@ static struct __IppInitializer__ __ipp_initializer__;
     }
 #else
 #define CV_IPP_RUN_(condition, func, ...)                                   \
-    if (cv::ipp::useIPP() && (condition) && func)                           \
+    if (cv::ipp::useIPP() && (condition) && (func))                         \
     {                                                                       \
         CV_IMPL_ADD(CV_IMPL_IPP);                                           \
         return __VA_ARGS__;                                                 \
@@ -393,7 +393,7 @@ static struct __IppInitializer__ __ipp_initializer__;
 #define CV_IPP_RUN_FAST(func, ...)
 #endif
 
-#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__)
+#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_((condition), (func), __VA_ARGS__)
 
 
 #ifndef IPPI_CALL
@@ -449,6 +449,135 @@ CV_EXPORTS void setUseTegra(bool flag);
 }
 #endif
 
+#ifdef ENABLE_INSTRUMENTATION
+namespace cv
+{
+namespace instr
+{
+struct InstrTLSStruct
+{
+    InstrTLSStruct()
+    {
+        pCurrentNode = NULL;
+    }
+    InstrNode* pCurrentNode;
+};
+
+class InstrStruct
+{
+public:
+    InstrStruct()
+    {
+        useInstr         = false;
+        enableMapping    = true;
+
+        rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN);
+        tlsStruct.get()->pCurrentNode = &rootNode;
+    }
+
+    Mutex mutexCreate;
+    Mutex mutexCount;
+
+    bool       useInstr;
+    bool       enableMapping;
+    InstrNode  rootNode;
+    TLSData<InstrTLSStruct> tlsStruct;
+};
+
+class CV_EXPORTS IntrumentationRegion
+{
+public:
+    IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
+    ~IntrumentationRegion();
+
+private:
+    bool    m_disabled; // region status
+    uint64  m_regionTicks;
+};
+
+InstrStruct&    getInstrumentStruct();
+InstrTLSStruct& getInstrumentTLSStruct();
+CV_EXPORTS InstrNode* getCurrentNode();
+}
+}
+
+///// General instrumentation
+// Instrument region
+#define CV_INSTRUMENT_REGION_META(TYPE, IMPL, ...)      ::cv::instr::IntrumentationRegion __instr_region__(__FUNCTION__, __FILE__, __LINE__, TYPE, IMPL);
+// Instrument functions with non-void return type
+#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
+{\
+    if(::cv::instr::useInstrumentation()){\
+        ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
+        try{\
+            auto status = ((FUN)(__VA_ARGS__));\
+            if(ERROR_COND){\
+                ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+                CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
+            }\
+            return status;\
+        }catch(...){\
+            ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+            CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
+            throw;\
+        }\
+    }else{\
+        return ((FUN)(__VA_ARGS__));\
+    }\
+}())
+// Instrument functions with void return type
+#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\
+{\
+    if(::cv::instr::useInstrumentation()){\
+        ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
+        try{\
+            (FUN)(__VA_ARGS__);\
+        }catch(...){\
+            ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+            CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
+            throw;\
+        }\
+    }else{\
+        (FUN)(__VA_ARGS__);\
+    }\
+}())
+// Instrumentation information marker
+#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(#NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);}
+
+///// General instrumentation
+// General OpenCV region instrumentation macro
+#define CV_INSTRUMENT_REGION()              CV_INSTRUMENT_REGION_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
+// Parallel OpenCV region instrumentation macro
+#define CV_INSTRUMENT_REGION_MT()           CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
+
+///// IPP instrumentation
+// Wrapper region instrumentation macro
+#define CV_INSTRUMENT_REGION_IPP()          CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
+// Function instrumentation macro
+#define CV_INSTRUMENT_FUN_IPP(FUN, ...)     CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
+// Diagnostic markers
+#define CV_INSTRUMENT_MARK_IPP(NAME)        CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_IPP, NAME)
+
+///// OpenCL instrumentation
+// Wrapper region instrumentation macro
+#define CV_INSTRUMENT_REGION_OPENCL()              CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
+// Function instrumentation macro
+#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...)  CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__)
+// Diagnostic markers
+#define CV_INSTRUMENT_MARK_OPENCL(NAME)            CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
+#else
+#define CV_INSTRUMENT_REGION()
+#define CV_INSTRUMENT_REGION_MT()
+
+#define CV_INSTRUMENT_REGION_IPP()
+#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
+#define CV_INSTRUMENT_MARK_IPP(NAME)
+
+#define CV_INSTRUMENT_REGION_OPENCL()
+#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__))
+#define CV_INSTRUMENT_MARK_OPENCL(NAME)
+#endif
+
 //! @endcond
 
 #endif // __OPENCV_CORE_PRIVATE_HPP__
index 768c4d5..911f779 100644 (file)
@@ -1002,6 +1002,137 @@ template<> inline std::string CommandLineParser::get<std::string>(const String&
 
 //! @endcond
 
+
+// Basic Node class for tree building
+template<class OBJECT>
+class CV_EXPORTS Node
+{
+public:
+    Node()
+    {
+        m_pParent  = 0;
+    }
+    Node(OBJECT& payload) : m_payload(payload)
+    {
+        m_pParent  = 0;
+    }
+    ~Node()
+    {
+        removeChilds();
+        if (m_pParent)
+        {
+            int idx = m_pParent->findChild(this);
+            if (idx >= 0)
+                m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx);
+        }
+    }
+
+    Node<OBJECT>* findChild(OBJECT& payload) const
+    {
+        for(int i = 0; i < this->m_childs.size(); i++)
+        {
+            if(this->m_childs[i]->m_payload == payload)
+                return this->m_childs[i];
+        }
+        return NULL;
+    }
+
+    int findChild(Node<OBJECT> *pNode) const
+    {
+        for (int i = 0; i < this->m_childs.size(); i++)
+        {
+            if(this->m_childs[i] == pNode)
+                return i;
+        }
+        return -1;
+    }
+
+    void addChild(Node<OBJECT> *pNode)
+    {
+        if(!pNode)
+            return;
+
+        CV_Assert(pNode->m_pParent == 0);
+        pNode->m_pParent = this;
+        this->m_childs.push_back(pNode);
+    }
+
+    void removeChilds()
+    {
+        for(int i = 0; i < m_childs.size(); i++)
+        {
+            m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
+            delete m_childs[i];
+        }
+        m_childs.clear();
+    }
+
+public:
+    OBJECT                     m_payload;
+    Node<OBJECT>*              m_pParent;
+    std::vector<Node<OBJECT>*> m_childs;
+};
+
+// Instrumentation external interface
+namespace instr
+{
+enum
+{
+    TYPE_GENERAL = 0,   // OpenCV API function, e.g. exported function
+    TYPE_MARKER,        // Information marker
+    TYPE_WRAPPER,       // Wrapper function for implementation
+    TYPE_FUN,           // Simple function call
+};
+
+enum
+{
+    IMPL_PLAIN = 0,
+    IMPL_IPP,
+    IMPL_OPENCL,
+};
+
+enum
+{
+    FLAGS_MAPPING = 0x01,
+};
+
+enum
+{
+    COMMAND_RESET = 0x01,
+};
+
+class CV_EXPORTS NodeData
+{
+public:
+    NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
+    NodeData(NodeData &ref);
+    ~NodeData();
+    NodeData& operator=(const NodeData&);
+
+    cv::String          m_funName;
+    int                 m_instrType;
+    int                 m_implType;
+    cv::String          m_fileName;
+    int                 m_lineNum;
+
+    bool                m_funError;
+    volatile int        m_counter;
+    bool                m_stopPoint;
+    uint64              m_ticksMean;
+
+};
+bool operator==(const NodeData& lhs, const NodeData& rhs);
+
+typedef Node<NodeData> InstrNode;
+
+CV_EXPORTS bool       useInstrumentation();
+CV_EXPORTS void       setUseInstrumentation(bool flag);
+CV_EXPORTS InstrNode* getTrace();
+CV_EXPORTS void       resetTrace();
+CV_EXPORTS void       setFlags(int modeFlags);
+CV_EXPORTS int        getFlags();
+};
+
 } //namespace cv
 
 #ifndef DISABLE_OPENCV_24_COMPATIBILITY
index b805ab4..d496865 100644 (file)
@@ -149,13 +149,25 @@ namespace
     public:
         ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
         {
+
             body = &_body;
             wholeRange = _r;
             double len = wholeRange.end - wholeRange.start;
             nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
+
+#ifdef ENABLE_INSTRUMENTATION
+            pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
+#endif
         }
         void operator()(const cv::Range& sr) const
         {
+#ifdef ENABLE_INSTRUMENTATION
+            {
+                cv::instr::InstrTLSStruct *pInstrTLS = &cv::instr::getInstrumentTLSStruct();
+                pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread
+            }
+#endif
+
             cv::Range r;
             r.start = (int)(wholeRange.start +
                             ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
@@ -169,6 +181,9 @@ namespace
         const cv::ParallelLoopBody* body;
         cv::Range wholeRange;
         int nstripes;
+#ifdef ENABLE_INSTRUMENTATION
+        cv::instr::InstrNode *pThreadRoot;
+#endif
     };
 
 #if defined HAVE_TBB
@@ -252,6 +267,8 @@ static SchedPtr pplScheduler;
 
 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
 {
+    CV_INSTRUMENT_REGION()
+
 #ifdef CV_PARALLEL_FRAMEWORK
 
     if(numThreads != 0)
index be0f89e..727d895 100644 (file)
@@ -1522,6 +1522,8 @@ namespace cv {
 
 static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
 {
+    CV_INSTRUMENT_REGION_OPENCL()
+
     bool haveMask = _mask.kind() != _InputArray::NONE;
     int nz = haveMask ? -1 : (int)_src.total();
     Scalar mean, stddev;
@@ -1585,7 +1587,8 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
             k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
 
         size_t globalsize = groups * wgs;
-        if (!k.run(1, &globalsize, &wgs, false))
+
+        if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false))
             return false;
 
         typedef Scalar (* part_sum)(Mat m);
@@ -1641,6 +1644,8 @@ namespace cv
 {
 static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
 {
+    CV_INSTRUMENT_REGION_IPP()
+
 #if IPP_VERSION_X100 >= 700
     int cn = src.channels();
     size_t total_size = src.total();
@@ -1679,29 +1684,29 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
         if( !mask.empty() )
         {
             typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
-            ippiMaskMeanStdDevFuncC1 ippFuncC1 =
+            ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR =
             type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
             type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
             type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
             0;
-            if( ippFuncC1 )
+            if( ippiMean_StdDev_C1MR )
             {
-                if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
+                if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
                 {
                     return true;
                 }
             }
             typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
-            ippiMaskMeanStdDevFuncC3 ippFuncC3 =
+            ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR =
             type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
             type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
             type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
             0;
-            if( ippFuncC3 )
+            if( ippiMean_StdDev_C3CMR )
             {
-                if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
-                    ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
-                    ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
+                if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
+                    CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
+                    CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
                 {
                     return true;
                 }
@@ -1710,31 +1715,31 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
         else
         {
             typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
-            ippiMeanStdDevFuncC1 ippFuncC1 =
+            ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R =
             type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
             type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
 #if (IPP_VERSION_X100 >= 810)
             type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
 #endif
             0;
-            if( ippFuncC1 )
+            if( ippiMean_StdDev_C1R )
             {
-                if( ippFuncC1(src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
+                if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
                 {
                     return true;
                 }
             }
             typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
-            ippiMeanStdDevFuncC3 ippFuncC3 =
+            ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR =
             type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
             type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
             type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
             0;
-            if( ippFuncC3 )
+            if( ippiMean_StdDev_C3CR )
             {
-                if( ippFuncC3(src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
-                    ippFuncC3(src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
-                    ippFuncC3(src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
+                if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
+                    CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
+                    CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
                 {
                     return true;
                 }
@@ -1751,6 +1756,8 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m
 
 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
 {
+    CV_INSTRUMENT_REGION()
+
     CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
                ocl_meanStdDev(_src, _mean, _sdv, _mask))
 
index edf9e0e..cc60cf7 100644 (file)
@@ -1300,6 +1300,210 @@ void setUseCollection(bool flag)
 }
 #endif
 
+namespace instr
+{
+bool useInstrumentation()
+{
+#ifdef ENABLE_INSTRUMENTATION
+    return getInstrumentStruct().useInstr;
+#else
+    return false;
+#endif
+}
+
+void setUseInstrumentation(bool flag)
+{
+#ifdef ENABLE_INSTRUMENTATION
+    getInstrumentStruct().useInstr = flag;
+#else
+    CV_UNUSED(flag);
+#endif
+}
+
+InstrNode* getTrace()
+{
+#ifdef ENABLE_INSTRUMENTATION
+    return &getInstrumentStruct().rootNode;
+#else
+    return NULL;
+#endif
+}
+
+void resetTrace()
+{
+#ifdef ENABLE_INSTRUMENTATION
+    getInstrumentStruct().rootNode.removeChilds();
+    getInstrumentTLSStruct().pCurrentNode = &getInstrumentStruct().rootNode;
+#endif
+}
+
+void setFlags(int modeFlags)
+{
+#ifdef ENABLE_INSTRUMENTATION
+    getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING);
+#else
+    CV_UNUSED(modeFlags);
+#endif
+}
+int getFlags()
+{
+#ifdef ENABLE_INSTRUMENTATION
+    int flags = 0;
+    if(getInstrumentStruct().enableMapping)
+        flags |= FLAGS_MAPPING;
+    return flags;
+#else
+    return 0;
+#endif
+}
+
+NodeData::NodeData(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
+{
+    m_instrType = TYPE_GENERAL;
+    m_implType  = IMPL_PLAIN;
+
+    m_funError  = false;
+    m_counter   = 0;
+    m_stopPoint = false;
+    m_ticksMean = 0;
+
+    m_funName     = funName;
+    m_instrType   = instrType;
+    m_implType    = implType;
+    m_fileName    = fileName;
+    m_lineNum     = lineNum;
+}
+NodeData::NodeData(NodeData &ref)
+{
+    *this = ref;
+}
+NodeData& NodeData::operator=(const NodeData &right)
+{
+    this->m_funName     = right.m_funName;
+    this->m_instrType   = right.m_instrType;
+    this->m_implType    = right.m_implType;
+    this->m_funError    = right.m_funError;
+    this->m_counter     = right.m_counter;
+    this->m_stopPoint   = right.m_stopPoint;
+    this->m_ticksMean   = right.m_ticksMean;
+    this->m_fileName    = right.m_fileName;
+    this->m_lineNum     = right.m_lineNum;
+
+    return *this;
+}
+NodeData::~NodeData()
+{
+}
+bool operator==(const NodeData& left, const NodeData& right)
+{
+    if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
+        return true;
+    return false;
+}
+
+#ifdef ENABLE_INSTRUMENTATION
+InstrStruct& getInstrumentStruct()
+{
+    static InstrStruct instr;
+    return instr;
+}
+
+InstrTLSStruct& getInstrumentTLSStruct()
+{
+    return *getInstrumentStruct().tlsStruct.get();
+}
+
+InstrNode* getCurrentNode()
+{
+    return getInstrumentTLSStruct().pCurrentNode;
+}
+
+IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
+{
+    m_disabled    = false;
+    m_regionTicks = 0;
+
+    InstrStruct *pStruct = &getInstrumentStruct();
+    if(pStruct->useInstr)
+    {
+        InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
+
+        // Disable in case of failure
+        if(!pTLS->pCurrentNode)
+        {
+            m_disabled = true;
+            return;
+        }
+
+        m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint;
+        if(m_disabled)
+            return;
+
+        NodeData payload(funName, fileName, lineNum, instrType, implType);
+        Node<NodeData>* pChild = NULL;
+
+        if(pStruct->enableMapping)
+        {
+            // Critical section
+            cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
+            pChild = pTLS->pCurrentNode->findChild(payload);
+            if(!pChild)
+            {
+                pChild = new Node<NodeData>(payload);
+                pTLS->pCurrentNode->addChild(pChild);
+            }
+        }
+        else
+        {
+            pChild = pTLS->pCurrentNode->findChild(payload);
+            if(!pChild)
+            {
+                pTLS->pCurrentNode->m_payload.m_stopPoint = true;
+                return;
+            }
+        }
+        pTLS->pCurrentNode = pChild;
+
+        m_regionTicks = getTickCount();
+    }
+}
+
+IntrumentationRegion::~IntrumentationRegion()
+{
+    InstrStruct *pStruct = &getInstrumentStruct();
+    if(pStruct->useInstr)
+    {
+        if(!m_disabled)
+        {
+            InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
+            if(pTLS->pCurrentNode->m_payload.m_stopPoint)
+            {
+                pTLS->pCurrentNode->m_payload.m_stopPoint = false;
+            }
+            else
+            {
+                if(pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
+                    pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
+                    cv::ocl::finish();
+
+                uint64 ticks = (getTickCount() - m_regionTicks);
+                {
+                    cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
+                    pTLS->pCurrentNode->m_payload.m_counter++;
+                    if(pTLS->pCurrentNode->m_payload.m_counter <= 1)
+                        pTLS->pCurrentNode->m_payload.m_ticksMean = ticks;
+                    else
+                        pTLS->pCurrentNode->m_payload.m_ticksMean = (pTLS->pCurrentNode->m_payload.m_ticksMean*(pTLS->pCurrentNode->m_payload.m_counter-1) + ticks)/pTLS->pCurrentNode->m_payload.m_counter;
+                }
+
+                pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
+            }
+        }
+    }
+}
+#endif
+}
+
 namespace ipp
 {
 
index c2e8600..2f2f55c 100644 (file)
@@ -97,13 +97,13 @@ using std::tr1::tuple;
 
 // TODO Replace finish call to dstUMat.wait()
 #define OCL_TEST_CYCLE() \
-    for (cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
+    for (cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
 
 #define OCL_TEST_CYCLE_N(n) \
-    for(declare.iterations(n), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
+    for (declare.iterations(n), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
 
 #define OCL_TEST_CYCLE_MULTIRUN(runsNum) \
-    for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
+    for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
         for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r)
 
 
index 0999899..d1a726a 100644 (file)
@@ -354,6 +354,15 @@ typedef struct ImplData
 } ImplData;
 #endif
 
+#ifdef ENABLE_INSTRUMENTATION
+class InstumentData
+{
+public:
+    static ::cv::String treeToString();
+    static void         printTree();
+};
+#endif
+
 class CV_EXPORTS TestBase: public ::testing::Test
 {
 public:
@@ -382,7 +391,7 @@ protected:
     virtual void SetUp();
     virtual void TearDown();
 
-    void startTimer();
+    bool startTimer(); // bool is dummy for conditional loop
     void stopTimer();
     bool next();
 
@@ -406,6 +415,10 @@ protected:
 #ifdef CV_COLLECT_IMPL_DATA
     ImplData implConf;
 #endif
+#ifdef ENABLE_INSTRUMENTATION
+    InstumentData instrConf;
+#endif
+
 private:
     typedef std::vector<std::pair<int, cv::Size> > SizeVector;
     typedef std::vector<int64> TimeVector;
@@ -645,9 +658,9 @@ int main(int argc, char **argv)\
     CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\
 }
 
-#define TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); stopTimer())
-#define TEST_CYCLE() for(; startTimer(), next(); stopTimer())
-#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ++r)
+#define TEST_CYCLE_N(n) for(declare.iterations(n); next() && startTimer(); stopTimer())
+#define TEST_CYCLE() for(; next() && startTimer(); stopTimer())
+#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); next() && startTimer(); stopTimer()) for(int r = 0; r < runsNum; ++r)
 
 namespace perf
 {
index 1a959f1..50e2354 100644 (file)
@@ -45,6 +45,9 @@ static bool         param_verify_sanity;
 #ifdef CV_COLLECT_IMPL_DATA
 static bool         param_collect_impl;
 #endif
+#ifdef ENABLE_INSTRUMENTATION
+static bool         param_instrument;
+#endif
 extern bool         test_ipp_check;
 
 #ifdef HAVE_CUDA
@@ -727,7 +730,219 @@ public:
     }
 };
 
+#ifdef ENABLE_INSTRUMENTATION
+static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
+{
+    // Print empty line for a big tree nodes
+    if(pNode->m_pParent)
+    {
+        int parendIdx = pNode->m_pParent->findChild(pNode);
+        if(parendIdx > 0 && pNode->m_pParent->m_childs[parendIdx-1]->m_childs.size())
+        {
+            printShift(pNode->m_pParent->m_childs[parendIdx-1]->m_childs[0], pRoot);
+            printf("\n");
+        }
+    }
+
+    // Check if parents have more childs
+    std::vector<cv::instr::InstrNode*> cache;
+    cv::instr::InstrNode *pTmpNode = pNode;
+    while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot)
+    {
+        cache.push_back(pTmpNode->m_pParent);
+        pTmpNode = pTmpNode->m_pParent;
+    }
+    for(int i = (int)cache.size()-1; i >= 0; i--)
+    {
+        if(cache[i]->m_pParent)
+        {
+            if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1)
+                printf("    ");
+            else
+                printf("|   ");
+        }
+    }
+}
+
+static double calcLocalWeight(cv::instr::InstrNode *pNode)
+{
+    if(pNode->m_pParent && pNode->m_pParent->m_pParent)
+        return ((double)pNode->m_payload.m_ticksMean*100/pNode->m_pParent->m_payload.m_ticksMean);
+    else
+        return 100;
+}
+
+static double calcGlobalWeight(cv::instr::InstrNode *pNode)
+{
+    cv::instr::InstrNode* globNode = pNode;
+
+    while(globNode->m_pParent && globNode->m_pParent->m_pParent)
+        globNode = globNode->m_pParent;
+
+    return ((double)pNode->m_payload.m_ticksMean*100/(double)globNode->m_payload.m_ticksMean);
+}
+
+static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoot)
+{
+    printf("%s", (pNode->m_payload.m_funName.substr(0, 40) + ((pNode->m_payload.m_funName.size()>40)?"...":"")).c_str());
+
+    // Write instrumentation flags
+    if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL || pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN)
+    {
+        printf("<");
+        if(pNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER)
+            printf("W");
+        else if(pNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
+            printf("F");
+        else if(pNode->m_payload.m_instrType == cv::instr::TYPE_MARKER)
+            printf("MARK");
+
+        if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL && pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN)
+            printf("_");
 
+        if(pNode->m_payload.m_implType == cv::instr::IMPL_IPP)
+            printf("IPP");
+        else if(pNode->m_payload.m_implType == cv::instr::IMPL_OPENCL)
+            printf("OCL");
+
+        printf(">");
+    }
+
+    if(pNode->m_pParent)
+    {
+        printf(" - C:%d", pNode->m_payload.m_counter);
+        printf(" T:%.4fms", (double)pNode->m_payload.m_ticksMean/cv::getTickFrequency()*1000);
+        if(pNode->m_pParent->m_pParent)
+            printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
+    }
+    printf("\n");
+
+    // Group childes
+    std::vector<cv::String> groups;
+    {
+        bool bFound = false;
+        for(size_t i = 0; i < pNode->m_childs.size(); i++)
+        {
+            bFound = false;
+            for(size_t j = 0; j < groups.size(); j++)
+            {
+                if(groups[j] == pNode->m_childs[i]->m_payload.m_funName)
+                {
+                    bFound = true;
+                    break;
+                }
+            }
+            if(!bFound)
+                groups.push_back(pNode->m_childs[i]->m_payload.m_funName);
+        }
+    }
+
+    for(size_t g = 0; g < groups.size(); g++)
+    {
+        for(size_t i = 0; i < pNode->m_childs.size(); i++)
+        {
+            if(pNode->m_childs[i]->m_payload.m_funName == groups[g])
+            {
+                printShift(pNode->m_childs[i], pRoot);
+
+                if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i]))
+                    printf("\\---");
+                else
+                    printf("|---");
+                printNodeRec(pNode->m_childs[i], pRoot);
+            }
+        }
+    }
+}
+
+template <typename T>
+std::string to_string_with_precision(const T value, const int p = 3)
+{
+    std::ostringstream out;
+    out << std::fixed << std::setprecision(p) << value;
+    return out.str();
+}
+
+static cv::String nodeToString(cv::instr::InstrNode *pNode)
+{
+    cv::String string;
+    if (pNode->m_payload.m_funName == "ROOT")
+        string = pNode->m_payload.m_funName;
+    else
+    {
+        string = "#";
+        string += std::to_string(pNode->m_payload.m_instrType);
+        string += pNode->m_payload.m_funName;
+        string += " - L:";
+        string += to_string_with_precision(calcLocalWeight(pNode));
+        string += ", G:";
+        string += to_string_with_precision(calcGlobalWeight(pNode));
+    }
+    string += "(";
+    for(size_t i = 0; i < pNode->m_childs.size(); i++)
+        string += nodeToString(pNode->m_childs[i]);
+    string += ")";
+
+    return string;
+}
+
+static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl)
+{
+    uint64 ticks = 0;
+
+    if(pNode->m_pParent && pNode->m_payload.m_instrType == type && pNode->m_payload.m_implType == impl)
+        ticks = pNode->m_payload.m_ticksMean;
+
+    for(size_t i = 0; i < pNode->m_childs.size(); i++)
+        ticks += getNodeTimeRec(pNode->m_childs[i], type, impl);
+
+    return ticks;
+}
+
+static uint64 getImplTime(int impl)
+{
+    uint64 ticks = 0;
+    cv::instr::InstrNode *pRoot = cv::instr::getTrace();
+
+    ticks = getNodeTimeRec(pRoot, cv::instr::TYPE_FUN, impl);
+
+    return ticks;
+}
+
+static uint64 getTotalTime()
+{
+    uint64 ticks = 0;
+    cv::instr::InstrNode *pRoot = cv::instr::getTrace();
+
+    for(size_t i = 0; i < pRoot->m_childs.size(); i++)
+        ticks += pRoot->m_childs[i]->m_payload.m_ticksMean;
+
+    return ticks;
+}
+
+::cv::String InstumentData::treeToString()
+{
+    cv::String string = nodeToString(cv::instr::getTrace());
+    return string;
+}
+
+void InstumentData::printTree()
+{
+    if(cv::instr::getTrace()->m_childs.size())
+    {
+        printf("[ TRACE    ]\n");
+        printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
+#ifdef HAVE_IPP
+        printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
+#endif
+#ifdef HAVE_OPENCL
+        printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
+#endif
+        printf("\n[/TRACE    ]\n");
+        fflush(stdout);
+    }
+}
+#endif
 
 /*****************************************************************************************\
 *                                   ::perf::TestBase
@@ -775,6 +990,9 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
 #ifdef CV_COLLECT_IMPL_DATA
         "{   perf_collect_impl           |false    |collect info about executed implementations}"
 #endif
+#ifdef ENABLE_INSTRUMENTATION
+        "{   perf_instrument             |false    |instrument code to collect implementations trace}"
+#endif
         "{   help h                      |false    |print help info}"
 #ifdef HAVE_CUDA
         "{   perf_cuda_device            |0        |run CUDA test suite onto specific CUDA capable device}"
@@ -826,6 +1044,9 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
 #ifdef CV_COLLECT_IMPL_DATA
     param_collect_impl  = args.get<bool>("perf_collect_impl");
 #endif
+#ifdef ENABLE_INSTRUMENTATION
+    param_instrument    = args.get<bool>("perf_instrument");
+#endif
 #ifdef ANDROID
     param_affinity_mask   = args.get<int>("perf_affinity_mask");
     log_power_checkpoints = args.has("perf_log_power_checkpoints");
@@ -856,6 +1077,12 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
     else
         cv::setUseCollection(0);
 #endif
+#ifdef ENABLE_INSTRUMENTATION
+    if(param_instrument)
+        cv::instr::setUseInstrumentation(true);
+    else
+        cv::instr::setUseInstrumentation(false);
+#endif
 
 #ifdef HAVE_CUDA
 
@@ -969,13 +1196,13 @@ int64 TestBase::_calibrate()
             cv::Mat b(2048, 2048, CV_32S, cv::Scalar(2));
             declare.time(30);
             double s = 0;
-            for(declare.iterations(20); startTimer(), next(); stopTimer())
+            for(declare.iterations(20); next() && startTimer(); stopTimer())
                 s+=a.dot(b);
             declare.time(s);
 
             //self calibration
             SetUp();
-            for(declare.iterations(1000); startTimer(), next(); stopTimer()){}
+            for(declare.iterations(1000); next() && startTimer(); stopTimer()){}
         }
     };
 
@@ -1242,8 +1469,6 @@ bool TestBase::next()
     }
 #endif
 
-    if (has_next)
-        startTimer(); // really we should measure activity from this moment, so reset start time
     return has_next;
 }
 
@@ -1281,9 +1506,17 @@ unsigned int TestBase::getTotalOutputSize() const
     return res;
 }
 
-void TestBase::startTimer()
+bool TestBase::startTimer()
 {
+#ifdef ENABLE_INSTRUMENTATION
+    if(currentIter == 0)
+    {
+        cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_MAPPING); // enable mapping for the first run
+        cv::instr::resetTrace();
+    }
+#endif
     lastTime = cv::getTickCount();
+    return true; // dummy true for conditional loop
 }
 
 void TestBase::stopTimer()
@@ -1297,6 +1530,10 @@ void TestBase::stopTimer()
     if (lastTime < 0) lastTime = 0;
     times.push_back(lastTime);
     lastTime = 0;
+
+#ifdef ENABLE_INSTRUMENTATION
+    cv::instr::setFlags(cv::instr::getFlags()&~cv::instr::FLAGS_MAPPING); // disable mapping to decrease overhead for +1 run
+#endif
 }
 
 performance_metrics& TestBase::calcMetrics()
@@ -1469,6 +1706,16 @@ void TestBase::reportMetrics(bool toJUnitXML)
         RecordProperty("gstddev", cv::format("%.6f", m.gstddev).c_str());
         RecordProperty("mean", cv::format("%.0f", m.mean).c_str());
         RecordProperty("stddev", cv::format("%.0f", m.stddev).c_str());
+#ifdef ENABLE_INSTRUMENTATION
+        if(cv::instr::useInstrumentation())
+        {
+            cv::String tree = InstumentData::treeToString();
+            RecordProperty("functions_hierarchy", tree.c_str());
+            RecordProperty("total_ipp_weight",    cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime())));
+            RecordProperty("total_opencl_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime())));
+            cv::instr::resetTrace();
+        }
+#endif
 #ifdef CV_COLLECT_IMPL_DATA
         if(param_collect_impl)
         {
@@ -1639,6 +1886,12 @@ void TestBase::TearDown()
         fflush(stdout);
     }
 #endif
+
+#ifdef ENABLE_INSTRUMENTATION
+    if(cv::instr::useInstrumentation())
+        InstumentData::printTree();
+#endif
+
     reportMetrics(true);
 }