instrumentation: minor refactoring, wrap OpenCL
authorAlexander Alekhin <alexander.alekhin@intel.com>
Thu, 25 Aug 2016 14:26:46 +0000 (17:26 +0300)
committerAlexander Alekhin <alexander.alekhin@intel.com>
Thu, 8 Sep 2016 13:13:44 +0000 (16:13 +0300)
- calculate ticksTotal instead of ticksMean
- local / global width is based on ticksTotal value
- added instrumentation for OpenCL program compilation
- added instrumentation for OpenCL kernel execution

cmake/OpenCVGenABI.cmake
modules/core/include/opencv2/core/private.hpp
modules/core/include/opencv2/core/utility.hpp
modules/core/src/ocl.cpp
modules/core/src/system.cpp
modules/ts/src/ts_perf.cpp

index 35cc10d..86aa78b 100644 (file)
@@ -43,7 +43,7 @@ string(REPLACE ";" "\n    " OPENCV_ABI_SKIP_HEADERS "${OPENCV_ABI_SKIP_HEADERS}"
 string(REPLACE ";" "\n    " OPENCV_ABI_SKIP_LIBRARIES "${OPENCV_ABI_SKIP_LIBRARIES}")
 
 # Options
-set(OPENCV_ABI_GCC_OPTIONS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}")
+set(OPENCV_ABI_GCC_OPTIONS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} -DOPENCV_ABI_CHECK=1")
 string(REGEX REPLACE "([^ ]) +([^ ])" "\\1\\n    \\2" OPENCV_ABI_GCC_OPTIONS "${OPENCV_ABI_GCC_OPTIONS}")
 
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/templates/opencv_abi.xml.in" "${path1}")
index c839327..44e1a44 100644 (file)
@@ -476,7 +476,7 @@ public:
 class CV_EXPORTS IntrumentationRegion
 {
 public:
-    IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
+    IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN);
     ~IntrumentationRegion();
 
 private:
@@ -492,7 +492,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
 
 ///// General instrumentation
 // Instrument region
-#define CV_INSTRUMENT_REGION_META(TYPE, IMPL, ...)      ::cv::instr::IntrumentationRegion __instr_region__(__FUNCTION__, __FILE__, __LINE__, TYPE, IMPL);
+#define CV_INSTRUMENT_REGION_META(NAME, TYPE, IMPL)      ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, TYPE, IMPL);
 // Instrument functions with non-void return type
 #define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
 {\
@@ -535,13 +535,13 @@ CV_EXPORTS InstrNode* getCurrentNode();
 
 ///// General instrumentation
 // General OpenCV region instrumentation macro
-#define CV_INSTRUMENT_REGION()              CV_INSTRUMENT_REGION_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
+#define CV_INSTRUMENT_REGION()              CV_INSTRUMENT_REGION_META(__FUNCTION__, cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
 // Parallel OpenCV region instrumentation macro
 #define CV_INSTRUMENT_REGION_MT()           CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
 
 ///// IPP instrumentation
 // Wrapper region instrumentation macro
-#define CV_INSTRUMENT_REGION_IPP()          CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
+#define CV_INSTRUMENT_REGION_IPP()          CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
 // Function instrumentation macro
 #define CV_INSTRUMENT_FUN_IPP(FUN, ...)     CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
 // Diagnostic markers
@@ -549,12 +549,15 @@ CV_EXPORTS InstrNode* getCurrentNode();
 
 ///// OpenCL instrumentation
 // Wrapper region instrumentation macro
-#define CV_INSTRUMENT_REGION_OPENCL()              CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
+#define CV_INSTRUMENT_REGION_OPENCL()              CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
+#define CV_INSTRUMENT_REGION_OPENCL_(NAME)         CV_INSTRUMENT_REGION_META(NAME, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
 // Function instrumentation macro
 #define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...)  CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__)
 // Diagnostic markers
 #define CV_INSTRUMENT_MARK_OPENCL(NAME)            CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
 #else
+#define CV_INSTRUMENT_REGION_META(...)
+
 #define CV_INSTRUMENT_REGION()
 #define CV_INSTRUMENT_REGION_MT()
 
@@ -563,6 +566,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
 #define CV_INSTRUMENT_MARK_IPP(NAME)
 
 #define CV_INSTRUMENT_REGION_OPENCL()
+#define CV_INSTRUMENT_REGION_OPENCL_(...)
 #define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__))
 #define CV_INSTRUMENT_MARK_OPENCL(NAME)
 #endif
index 3bf6975..e465fae 100644 (file)
@@ -1076,7 +1076,10 @@ public:
 // Instrumentation external interface
 namespace instr
 {
-enum
+
+#if !defined OPENCV_ABI_CHECK
+
+enum TYPE
 {
     TYPE_GENERAL = 0,   // OpenCV API function, e.g. exported function
     TYPE_MARKER,        // Information marker
@@ -1084,48 +1087,60 @@ enum
     TYPE_FUN,           // Simple function call
 };
 
-enum
+enum IMPL
 {
     IMPL_PLAIN = 0,
     IMPL_IPP,
     IMPL_OPENCL,
 };
 
-enum
-{
-    FLAGS_MAPPING = 0x01,
-};
-
 class CV_EXPORTS NodeData
 {
 public:
-    NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
+    NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
     NodeData(NodeData &ref);
     ~NodeData();
     NodeData& operator=(const NodeData&);
 
     cv::String          m_funName;
-    int                 m_instrType;
-    int                 m_implType;
-    cv::String          m_fileName;
+    cv::instr::TYPE     m_instrType;
+    cv::instr::IMPL     m_implType;
+    const char*         m_fileName;
     int                 m_lineNum;
 
-    bool                m_funError;
     volatile int        m_counter;
-    bool                m_stopPoint;
-    uint64              m_ticksMean;
+    volatile uint64     m_ticksTotal;
+
+    // No synchronization
+    double getTotalMs() const { return (double)m_ticksTotal * 1000. / cv::getTickFrequency(); }
+    // No synchronization
+    double getMeanMs() const { return (double)m_ticksTotal * 1000. / (m_counter * cv::getTickFrequency()); }
 
+    bool                m_funError;
+    bool                m_stopPoint;
 };
 bool operator==(const NodeData& lhs, const NodeData& rhs);
 
 typedef Node<NodeData> InstrNode;
 
+CV_EXPORTS InstrNode* getTrace();
+
+#endif // !defined OPENCV_ABI_CHECK
+
+
 CV_EXPORTS bool       useInstrumentation();
 CV_EXPORTS void       setUseInstrumentation(bool flag);
-CV_EXPORTS InstrNode* getTrace();
 CV_EXPORTS void       resetTrace();
-CV_EXPORTS void       setFlags(int modeFlags);
-CV_EXPORTS int        getFlags();
+
+enum FLAGS
+{
+    FLAGS_NONE = 0,
+    FLAGS_MAPPING = 1 << 0,
+};
+
+CV_EXPORTS void       setFlags(FLAGS modeFlags);
+static inline void    setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); }
+CV_EXPORTS FLAGS      getFlags();
 }
 
 } //namespace cv
index 6780d85..924b360 100644 (file)
@@ -45,6 +45,7 @@
 #include <string>
 #include <sstream>
 #include <iostream> // std::cerr
+#include <inttypes.h>
 
 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
 #define CV_OPENCL_SHOW_RUN_ERRORS       0
@@ -3169,6 +3170,9 @@ struct Kernel::Impl
     {
         cl_program ph = (cl_program)prog.ptr();
         cl_int retval = 0;
+#ifdef ENABLE_INSTRUMENTATION
+        name = kname;
+#endif
         handle = ph != 0 ?
             clCreateKernel(ph, kname, &retval) : 0;
         CV_OclDbgAssert(retval == CL_SUCCESS);
@@ -3221,6 +3225,9 @@ struct Kernel::Impl
 
     IMPLEMENT_REFCOUNTABLE();
 
+#ifdef ENABLE_INSTRUMENTATION
+    cv::String name;
+#endif
     cl_kernel handle;
     cl_event e;
     enum { MAX_ARRS = 16 };
@@ -3441,6 +3448,8 @@ int Kernel::set(int i, const KernelArg& arg)
 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
                  bool sync, const Queue& q)
 {
+    CV_INSTRUMENT_REGION_META(p->name.c_str(), instr::TYPE_FUN, instr::IMPL_OPENCL);
+
     if(!p || !p->handle || p->e != 0)
         return false;
 
@@ -3552,6 +3561,7 @@ struct Program::Impl
     Impl(const ProgramSource& _src,
          const String& _buildflags, String& errmsg)
     {
+        CV_INSTRUMENT_REGION_OPENCL_(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str());
         refcount = 1;
         const Context& ctx = Context::getDefault();
         src = _src;
index ffca514..bd44fe3 100644 (file)
@@ -1337,7 +1337,7 @@ void resetTrace()
 #endif
 }
 
-void setFlags(int modeFlags)
+void setFlags(FLAGS modeFlags)
 {
 #ifdef ENABLE_INSTRUMENTATION
     getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING);
@@ -1345,33 +1345,34 @@ void setFlags(int modeFlags)
     CV_UNUSED(modeFlags);
 #endif
 }
-int getFlags()
+FLAGS getFlags()
 {
 #ifdef ENABLE_INSTRUMENTATION
     int flags = 0;
     if(getInstrumentStruct().enableMapping)
         flags |= FLAGS_MAPPING;
-    return flags;
+    return (FLAGS)flags;
 #else
-    return 0;
+    return (FLAGS)0;
 #endif
 }
 
-NodeData::NodeData(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
+NodeData::NodeData(const char* funName, const char* fileName, int lineNum, cv::instr::TYPE instrType, cv::instr::IMPL implType)
 {
     m_instrType = TYPE_GENERAL;
     m_implType  = IMPL_PLAIN;
 
-    m_funError  = false;
-    m_counter   = 0;
-    m_stopPoint = false;
-    m_ticksMean = 0;
-
     m_funName     = funName;
     m_instrType   = instrType;
     m_implType    = implType;
     m_fileName    = fileName;
     m_lineNum     = lineNum;
+
+    m_counter = 0;
+    m_ticksTotal = 0;
+
+    m_funError = false;
+    m_stopPoint = false;
 }
 NodeData::NodeData(NodeData &ref)
 {
@@ -1382,13 +1383,12 @@ NodeData& NodeData::operator=(const NodeData &right)
     this->m_funName     = right.m_funName;
     this->m_instrType   = right.m_instrType;
     this->m_implType    = right.m_implType;
-    this->m_funError    = right.m_funError;
-    this->m_counter     = right.m_counter;
-    this->m_stopPoint   = right.m_stopPoint;
-    this->m_ticksMean   = right.m_ticksMean;
     this->m_fileName    = right.m_fileName;
     this->m_lineNum     = right.m_lineNum;
-
+    this->m_counter     = right.m_counter;
+    this->m_ticksTotal  = right.m_ticksTotal;
+    this->m_funError    = right.m_funError;
+    this->m_stopPoint   = right.m_stopPoint;
     return *this;
 }
 NodeData::~NodeData()
@@ -1418,7 +1418,7 @@ InstrNode* getCurrentNode()
     return getInstrumentTLSStruct().pCurrentNode;
 }
 
-IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
+IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType, IMPL implType)
 {
     m_disabled    = false;
     m_regionTicks = 0;
@@ -1482,18 +1482,18 @@ IntrumentationRegion::~IntrumentationRegion()
             }
             else
             {
-                if(pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
-                    pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
-                    cv::ocl::finish();
+                if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
+                    (pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN ||
+                        pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER))
+                {
+                    cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation
+                }
 
                 uint64 ticks = (getTickCount() - m_regionTicks);
                 {
                     cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
                     pTLS->pCurrentNode->m_payload.m_counter++;
-                    if(pTLS->pCurrentNode->m_payload.m_counter <= 1)
-                        pTLS->pCurrentNode->m_payload.m_ticksMean = ticks;
-                    else
-                        pTLS->pCurrentNode->m_payload.m_ticksMean = (pTLS->pCurrentNode->m_payload.m_ticksMean*(pTLS->pCurrentNode->m_payload.m_counter-1) + ticks)/pTLS->pCurrentNode->m_payload.m_counter;
+                    pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks;
                 }
 
                 pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
index 50e2354..8866272 100644 (file)
@@ -767,7 +767,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
 static double calcLocalWeight(cv::instr::InstrNode *pNode)
 {
     if(pNode->m_pParent && pNode->m_pParent->m_pParent)
-        return ((double)pNode->m_payload.m_ticksMean*100/pNode->m_pParent->m_payload.m_ticksMean);
+        return ((double)pNode->m_payload.m_ticksTotal*100/pNode->m_pParent->m_payload.m_ticksTotal);
     else
         return 100;
 }
@@ -779,7 +779,7 @@ static double calcGlobalWeight(cv::instr::InstrNode *pNode)
     while(globNode->m_pParent && globNode->m_pParent->m_pParent)
         globNode = globNode->m_pParent;
 
-    return ((double)pNode->m_payload.m_ticksMean*100/(double)globNode->m_payload.m_ticksMean);
+    return ((double)pNode->m_payload.m_ticksTotal*100/(double)globNode->m_payload.m_ticksTotal);
 }
 
 static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoot)
@@ -811,7 +811,7 @@ static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoo
     if(pNode->m_pParent)
     {
         printf(" - C:%d", pNode->m_payload.m_counter);
-        printf(" T:%.4fms", (double)pNode->m_payload.m_ticksMean/cv::getTickFrequency()*1000);
+        printf(" T:%.4fms", pNode->m_payload.getMeanMs());
         if(pNode->m_pParent->m_pParent)
             printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
     }
@@ -886,12 +886,15 @@ static cv::String nodeToString(cv::instr::InstrNode *pNode)
     return string;
 }
 
-static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl)
+static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, cv::instr::TYPE type, cv::instr::IMPL impl)
 {
     uint64 ticks = 0;
 
-    if(pNode->m_pParent && pNode->m_payload.m_instrType == type && pNode->m_payload.m_implType == impl)
-        ticks = pNode->m_payload.m_ticksMean;
+    if (pNode->m_pParent && (type < 0 || pNode->m_payload.m_instrType == type) && pNode->m_payload.m_implType == impl)
+    {
+        ticks = pNode->m_payload.m_ticksTotal;
+        return ticks;
+    }
 
     for(size_t i = 0; i < pNode->m_childs.size(); i++)
         ticks += getNodeTimeRec(pNode->m_childs[i], type, impl);
@@ -899,7 +902,7 @@ static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl)
     return ticks;
 }
 
-static uint64 getImplTime(int impl)
+static uint64 getImplTime(cv::instr::IMPL impl)
 {
     uint64 ticks = 0;
     cv::instr::InstrNode *pRoot = cv::instr::getTrace();
@@ -915,7 +918,7 @@ static uint64 getTotalTime()
     cv::instr::InstrNode *pRoot = cv::instr::getTrace();
 
     for(size_t i = 0; i < pRoot->m_childs.size(); i++)
-        ticks += pRoot->m_childs[i]->m_payload.m_ticksMean;
+        ticks += pRoot->m_childs[i]->m_payload.m_ticksTotal;
 
     return ticks;
 }