From 349d5ba0127ac22455f22f5d49f9cfa0edd84623 Mon Sep 17 00:00:00 2001 From: Pavel Vlasov Date: Mon, 7 Nov 2016 12:15:51 +0300 Subject: [PATCH] --perf_instrument parameter now has int type and 0, 1, 2 modes (1 - simple trees, 2 - expanded trees for functions with same name but different calling address); Maximum depth limit var was added to the instrumentation structure; Trace names output console output fix: improper tree formatting could happen; Output in case of error was added; Custom regions improvements; Improved timing and weight calculation for parallel regions; New TC (threads counter) value to indicate how many different threads accessed particular node; parallel_for, warnings fixes and ReturnAddress code from Alexander Alekhin; --- modules/core/include/opencv2/core/private.hpp | 66 +++++++++++------- modules/core/include/opencv2/core/utility.hpp | 49 ++++++++++---- modules/core/src/dxt.cpp | 6 -- modules/core/src/ocl.cpp | 4 +- modules/core/src/parallel.cpp | 42 +++++++++++- modules/core/src/stat.cpp | 2 +- modules/core/src/system.cpp | 98 ++++++++++++++------------- modules/imgproc/src/canny.cpp | 2 + modules/imgproc/src/color.cpp | 2 - modules/imgproc/src/histogram.cpp | 2 - modules/imgproc/src/imgwarp.cpp | 8 --- modules/imgproc/src/smooth.cpp | 2 - modules/ts/src/ts_perf.cpp | 81 +++++++++++----------- 13 files changed, 207 insertions(+), 157 deletions(-) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 3b15ed3..4e5ba78 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -457,10 +457,11 @@ class InstrStruct public: InstrStruct() { - useInstr = false; - enableMapping = true; + useInstr = false; + flags = FLAGS_MAPPING; + maxDepth = 0; - rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN); + rootNode.m_payload = NodeData("ROOT", NULL, 0, NULL, false, TYPE_GENERAL, IMPL_PLAIN); tlsStruct.get()->pCurrentNode = &rootNode; } @@ -468,7 +469,8 @@ public: Mutex mutexCount; bool useInstr; - bool enableMapping; + int flags; + int maxDepth; InstrNode rootNode; TLSData tlsStruct; }; @@ -476,7 +478,7 @@ public: class CV_EXPORTS IntrumentationRegion { public: - IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN); + IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType = TYPE_GENERAL, IMPL implType = IMPL_PLAIN); ~IntrumentationRegion(); private: @@ -484,20 +486,28 @@ private: uint64 m_regionTicks; }; -InstrStruct& getInstrumentStruct(); -InstrTLSStruct& getInstrumentTLSStruct(); -CV_EXPORTS InstrNode* getCurrentNode(); +CV_EXPORTS InstrStruct& getInstrumentStruct(); +InstrTLSStruct& getInstrumentTLSStruct(); +CV_EXPORTS InstrNode* getCurrentNode(); } } -///// General instrumentation +#ifdef _WIN32 +#define CV_INSTRUMENT_GET_RETURN_ADDRESS _ReturnAddress() +#else +#define CV_INSTRUMENT_GET_RETURN_ADDRESS __builtin_extract_return_addr(__builtin_return_address(0)) +#endif + // Instrument region -#define CV_INSTRUMENT_REGION_META(NAME, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, TYPE, IMPL); +#define CV_INSTRUMENT_REGION_META(NAME, ALWAYS_EXPAND, TYPE, IMPL) ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, CV_INSTRUMENT_GET_RETURN_ADDRESS, ALWAYS_EXPAND, TYPE, IMPL); +#define CV_INSTRUMENT_REGION_CUSTOM_META(NAME, ALWAYS_EXPAND, TYPE, IMPL)\ + void *__curr_address__ = [&]() {return CV_INSTRUMENT_GET_RETURN_ADDRESS;}();\ + ::cv::instr::IntrumentationRegion __instr_region__(NAME, __FILE__, __LINE__, __curr_address__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN); // Instrument functions with non-void return type #define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\ {\ if(::cv::instr::useInstrumentation()){\ - ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\ + ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\ try{\ auto status = ((FUN)(__VA_ARGS__));\ if(ERROR_COND){\ @@ -518,7 +528,7 @@ CV_EXPORTS InstrNode* getCurrentNode(); #define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\ {\ if(::cv::instr::useInstrumentation()){\ - ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\ + ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, NULL, false, TYPE, IMPL);\ try{\ (FUN)(__VA_ARGS__);\ }catch(...){\ @@ -531,17 +541,19 @@ CV_EXPORTS InstrNode* getCurrentNode(); }\ }()) // Instrumentation information marker -#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);} +#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(NAME, __FILE__, __LINE__, NULL, false, ::cv::instr::TYPE_MARKER, IMPL);} ///// General instrumentation // General OpenCV region instrumentation macro -#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) -// Parallel OpenCV region instrumentation macro -#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) +#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN) +// Custom OpenCV region instrumentation macro +#define CV_INSTRUMENT_REGION_NAME(NAME) CV_INSTRUMENT_REGION_CUSTOM_META(NAME, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN) +// Instrumentation for parallel_for_ or other regions which forks and gathers threads +#define CV_INSTRUMENT_REGION_MT_FORK() CV_INSTRUMENT_REGION_META(__FUNCTION__, true, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN); ///// IPP instrumentation // Wrapper region instrumentation macro -#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP) +#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP) // Function instrumentation macro #define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__) // Diagnostic markers @@ -549,26 +561,28 @@ CV_EXPORTS InstrNode* getCurrentNode(); ///// OpenCL instrumentation // Wrapper region instrumentation macro -#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) -#define CV_INSTRUMENT_REGION_OPENCL_(NAME) CV_INSTRUMENT_REGION_META(NAME, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) -// Function instrumentation macro -#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__) +#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) +// OpenCL kernel compilation wrapper +#define CV_INSTRUMENT_REGION_OPENCL_COMPILE(NAME) CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) +// OpenCL kernel run wrapper +#define CV_INSTRUMENT_REGION_OPENCL_RUN(NAME) CV_INSTRUMENT_REGION_META(NAME, false, ::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL) // Diagnostic markers #define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME) #else #define CV_INSTRUMENT_REGION_META(...) #define CV_INSTRUMENT_REGION() -#define CV_INSTRUMENT_REGION_MT() +#define CV_INSTRUMENT_REGION_NAME(...) +#define CV_INSTRUMENT_REGION_MT_FORK() #define CV_INSTRUMENT_REGION_IPP() #define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__)) -#define CV_INSTRUMENT_MARK_IPP(NAME) +#define CV_INSTRUMENT_MARK_IPP(...) #define CV_INSTRUMENT_REGION_OPENCL() -#define CV_INSTRUMENT_REGION_OPENCL_(...) -#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__)) -#define CV_INSTRUMENT_MARK_OPENCL(NAME) +#define CV_INSTRUMENT_REGION_OPENCL_COMPILE(...) +#define CV_INSTRUMENT_REGION_OPENCL_RUN(...) +#define CV_INSTRUMENT_MARK_OPENCL(...) #endif //! @endcond diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 5f7c5f2..e7a7c2d 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -1029,7 +1029,7 @@ public: Node* findChild(OBJECT& payload) const { - for(int i = 0; i < this->m_childs.size(); i++) + for(size_t i = 0; i < this->m_childs.size(); i++) { if(this->m_childs[i]->m_payload == payload) return this->m_childs[i]; @@ -1039,10 +1039,10 @@ public: int findChild(Node *pNode) const { - for (int i = 0; i < this->m_childs.size(); i++) + for (size_t i = 0; i < this->m_childs.size(); i++) { if(this->m_childs[i] == pNode) - return i; + return (int)i; } return -1; } @@ -1059,7 +1059,7 @@ public: void removeChilds() { - for(int i = 0; i < m_childs.size(); i++) + for(size_t i = 0; i < m_childs.size(); i++) { m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming delete m_childs[i]; @@ -1067,6 +1067,14 @@ public: m_childs.clear(); } + int getDepth() + { + int count = 0; + Node *pParent = m_pParent; + while(pParent) count++, pParent = pParent->m_pParent; + return count; + } + public: OBJECT m_payload; Node* m_pParent; @@ -1094,10 +1102,19 @@ enum IMPL IMPL_OPENCL, }; +struct NodeDataTls +{ + NodeDataTls() + { + m_ticksTotal = 0; + } + uint64 m_ticksTotal; +}; + class CV_EXPORTS NodeData { public: - NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); + NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); NodeData(NodeData &ref); ~NodeData(); NodeData& operator=(const NodeData&); @@ -1107,17 +1124,18 @@ public: cv::instr::IMPL m_implType; const char* m_fileName; int m_lineNum; + void* m_retAddress; + bool m_alwaysExpand; + bool m_funError; - volatile int m_counter; - volatile uint64 m_ticksTotal; + volatile int m_counter; + volatile uint64 m_ticksTotal; + TLSData m_tls; + int m_threads; // No synchronization - double getTotalMs() const { return (double)m_ticksTotal * 1000. / cv::getTickFrequency(); } - // No synchronization - double getMeanMs() const { return (double)m_ticksTotal * 1000. / (m_counter * cv::getTickFrequency()); } - - bool m_funError; - bool m_stopPoint; + double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; } + double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; } }; bool operator==(const NodeData& lhs, const NodeData& rhs); @@ -1134,8 +1152,9 @@ CV_EXPORTS void resetTrace(); enum FLAGS { - FLAGS_NONE = 0, - FLAGS_MAPPING = 1 << 0, + FLAGS_NONE = 0, + FLAGS_MAPPING = 0x01, + FLAGS_EXPAND_SAME_NAMES = 0x02, }; CV_EXPORTS void setFlags(FLAGS modeFlags); diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index b52ba93..aff17ba 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1564,8 +1564,6 @@ public: virtual void operator()(const Range& range) const { - CV_INSTRUMENT_REGION_IPP(); - IppStatus status; Ipp8u* pBuffer = 0; Ipp8u* pMemInit= 0; @@ -1647,8 +1645,6 @@ public: virtual void operator()(const Range& range) const { - CV_INSTRUMENT_REGION_IPP(); - IppStatus status; Ipp8u* pBuffer = 0; Ipp8u* pMemInit= 0; @@ -3809,8 +3805,6 @@ public: virtual void operator()(const Range& range) const { - CV_INSTRUMENT_REGION_IPP() - if(*ok == false) return; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 8212030..045b57a 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3450,7 +3450,7 @@ int Kernel::set(int i, const KernelArg& arg) bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], bool sync, const Queue& q) { - CV_INSTRUMENT_REGION_META(p->name.c_str(), instr::TYPE_FUN, instr::IMPL_OPENCL); + CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str()); if(!p || !p->handle || p->e != 0) return false; @@ -3563,7 +3563,7 @@ struct Program::Impl Impl(const ProgramSource& _src, const String& _buildflags, String& errmsg) { - CV_INSTRUMENT_REGION_OPENCL_(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str()); + CV_INSTRUMENT_REGION_OPENCL_COMPILE(cv::format("Compile: %" PRIx64 " options: %s", _src.hash(), _buildflags.c_str()).c_str()); refcount = 1; const Context& ctx = Context::getDefault(); src = _src; diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index e4ee8f5..ebf3907 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -144,7 +144,33 @@ namespace cv namespace { #ifdef CV_PARALLEL_FRAMEWORK - class ParallelLoopBodyWrapper +#ifdef ENABLE_INSTRUMENTATION + static void SyncNodes(cv::instr::InstrNode *pNode) + { + std::vector data; + pNode->m_payload.m_tls.gather(data); + + uint64 ticksMax = 0; + int threads = 0; + for(size_t i = 0; i < data.size(); i++) + { + if(data[i] && data[i]->m_ticksTotal) + { + ticksMax = MAX(ticksMax, data[i]->m_ticksTotal); + pNode->m_payload.m_ticksTotal -= data[i]->m_ticksTotal; + data[i]->m_ticksTotal = 0; + threads++; + } + } + pNode->m_payload.m_ticksTotal += ticksMax; + pNode->m_payload.m_threads = MAX(pNode->m_payload.m_threads, threads); + + for(size_t i = 0; i < pNode->m_childs.size(); i++) + SyncNodes(pNode->m_childs[i]); + } +#endif + + class ParallelLoopBodyWrapper : public cv::ParallelLoopBody { public: ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) @@ -159,6 +185,13 @@ namespace pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode; #endif } +#ifdef ENABLE_INSTRUMENTATION + ~ParallelLoopBodyWrapper() + { + for(size_t i = 0; i < pThreadRoot->m_childs.size(); i++) + SyncNodes(pThreadRoot->m_childs[i]); + } +#endif void operator()(const cv::Range& sr) const { #ifdef ENABLE_INSTRUMENTATION @@ -167,6 +200,7 @@ namespace pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread } #endif + CV_INSTRUMENT_REGION() cv::Range r; r.start = (int)(wholeRange.start + @@ -267,7 +301,9 @@ static SchedPtr pplScheduler; void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) { - CV_INSTRUMENT_REGION() + CV_INSTRUMENT_REGION_MT_FORK() + if (range.empty()) + return; #ifdef CV_PARALLEL_FRAMEWORK @@ -326,7 +362,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, #elif defined HAVE_PTHREADS_PF - parallel_for_pthreads(range, body, nstripes); + parallel_for_pthreads(pbody.stripeRange(), pbody, pbody.stripeRange().size()); #else diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index ad97e8b..9def2e1 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1597,7 +1597,7 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv size_t globalsize = groups * wgs; - if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false)) + if(!k.run(1, &globalsize, &wgs, false)) return false; typedef Scalar (* part_sum)(Mat m); diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index bd44fe3..b2c0a3b 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -1340,7 +1340,7 @@ void resetTrace() void setFlags(FLAGS modeFlags) { #ifdef ENABLE_INSTRUMENTATION - getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING); + getInstrumentStruct().flags = modeFlags; #else CV_UNUSED(modeFlags); #endif @@ -1348,31 +1348,27 @@ void setFlags(FLAGS modeFlags) FLAGS getFlags() { #ifdef ENABLE_INSTRUMENTATION - int flags = 0; - if(getInstrumentStruct().enableMapping) - flags |= FLAGS_MAPPING; - return (FLAGS)flags; + return (FLAGS)getInstrumentStruct().flags; #else return (FLAGS)0; #endif } -NodeData::NodeData(const char* funName, const char* fileName, int lineNum, cv::instr::TYPE instrType, cv::instr::IMPL implType) +NodeData::NodeData(const char* funName, const char* fileName, int lineNum, void* retAddress, bool alwaysExpand, cv::instr::TYPE instrType, cv::instr::IMPL implType) { - m_instrType = TYPE_GENERAL; - m_implType = IMPL_PLAIN; + m_funName = funName; + m_instrType = instrType; + m_implType = implType; + m_fileName = fileName; + m_lineNum = lineNum; + m_retAddress = retAddress; + m_alwaysExpand = alwaysExpand; - m_funName = funName; - m_instrType = instrType; - m_implType = implType; - m_fileName = fileName; - m_lineNum = lineNum; - - m_counter = 0; + m_threads = 1; + m_counter = 0; m_ticksTotal = 0; - m_funError = false; - m_stopPoint = false; + m_funError = false; } NodeData::NodeData(NodeData &ref) { @@ -1380,15 +1376,20 @@ NodeData::NodeData(NodeData &ref) } NodeData& NodeData::operator=(const NodeData &right) { - this->m_funName = right.m_funName; - this->m_instrType = right.m_instrType; - this->m_implType = right.m_implType; - this->m_fileName = right.m_fileName; - this->m_lineNum = right.m_lineNum; + this->m_funName = right.m_funName; + this->m_instrType = right.m_instrType; + this->m_implType = right.m_implType; + this->m_fileName = right.m_fileName; + this->m_lineNum = right.m_lineNum; + this->m_retAddress = right.m_retAddress; + this->m_alwaysExpand = right.m_alwaysExpand; + + this->m_threads = right.m_threads; this->m_counter = right.m_counter; this->m_ticksTotal = right.m_ticksTotal; + this->m_funError = right.m_funError; - this->m_stopPoint = right.m_stopPoint; + return *this; } NodeData::~NodeData() @@ -1397,7 +1398,10 @@ NodeData::~NodeData() bool operator==(const NodeData& left, const NodeData& right) { if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName) - return true; + { + if(left.m_retAddress == right.m_retAddress || !(cv::instr::getFlags()&cv::instr::FLAGS_EXPAND_SAME_NAMES || left.m_alwaysExpand)) + return true; + } return false; } @@ -1418,7 +1422,7 @@ InstrNode* getCurrentNode() return getInstrumentTLSStruct().pCurrentNode; } -IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, TYPE instrType, IMPL implType) +IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, void *retAddress, bool alwaysExpand, TYPE instrType, IMPL implType) { m_disabled = false; m_regionTicks = 0; @@ -1435,14 +1439,17 @@ IntrumentationRegion::IntrumentationRegion(const char* funName, const char* file return; } - m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint; - if(m_disabled) + int depth = pTLS->pCurrentNode->getDepth(); + if(pStruct->maxDepth && pStruct->maxDepth <= depth) + { + m_disabled = true; return; + } - NodeData payload(funName, fileName, lineNum, instrType, implType); + NodeData payload(funName, fileName, lineNum, retAddress, alwaysExpand, instrType, implType); Node* pChild = NULL; - if(pStruct->enableMapping) + if(pStruct->flags&FLAGS_MAPPING) { // Critical section cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation @@ -1458,7 +1465,7 @@ IntrumentationRegion::IntrumentationRegion(const char* funName, const char* file pChild = pTLS->pCurrentNode->findChild(payload); if(!pChild) { - pTLS->pCurrentNode->m_payload.m_stopPoint = true; + m_disabled = true; return; } } @@ -1476,28 +1483,23 @@ IntrumentationRegion::~IntrumentationRegion() if(!m_disabled) { InstrTLSStruct *pTLS = &getInstrumentTLSStruct(); - if(pTLS->pCurrentNode->m_payload.m_stopPoint) + + if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL && + (pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN || + pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER)) { - pTLS->pCurrentNode->m_payload.m_stopPoint = false; + cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation } - else - { - if (pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL && - (pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN || - pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER)) - { - cv::ocl::finish(); // TODO Support "async" OpenCL instrumentation - } - uint64 ticks = (getTickCount() - m_regionTicks); - { - cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation - pTLS->pCurrentNode->m_payload.m_counter++; - pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks; - } - - pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent; + uint64 ticks = (getTickCount() - m_regionTicks); + { + cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation + pTLS->pCurrentNode->m_payload.m_counter++; + pTLS->pCurrentNode->m_payload.m_ticksTotal += ticks; + pTLS->pCurrentNode->m_payload.m_tls.get()->m_ticksTotal += ticks; } + + pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent; } } } diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index a9fbe8b..4636574 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -142,6 +142,8 @@ template static bool ocl_Canny(InputArray _src, const UMat& dx_, const UMat& dy_, OutputArray _dst, float low_thresh, float high_thresh, int aperture_size, bool L2gradient, int cn, const Size & size) { + CV_INSTRUMENT_REGION_OPENCL() + UMat map; const ocl::Device &dev = ocl::Device::getDefault(); diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 5b36386..9f3e6ac 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -259,8 +259,6 @@ public: virtual void operator()(const Range& range) const { - CV_INSTRUMENT_REGION_IPP(); - const void *yS = src_data + src_step * range.start; void *yD = dst_data + dst_step * range.start; if( !cvt(yS, static_cast(src_step), yD, static_cast(dst_step), width, range.end - range.start) ) diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp index 9b3b1b1..61fc098 100644 --- a/modules/imgproc/src/histogram.cpp +++ b/modules/imgproc/src/histogram.cpp @@ -1188,8 +1188,6 @@ public: virtual void operator() (const Range & range) const { - CV_INSTRUMENT_REGION_IPP() - Ipp32s levelNum = histSize + 1; Mat phist(hist->size(), hist->type(), Scalar::all(0)); #if IPP_VERSION_X100 >= 900 diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 9cf53a1..0d0fb72 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2795,8 +2795,6 @@ public: virtual void operator() (const Range& range) const { - CV_INSTRUMENT_REGION_IPP() - if (*ok == false) return; @@ -4772,8 +4770,6 @@ public: virtual void operator() (const Range & range) const { - CV_INSTRUMENT_REGION_IPP() - IppiRect srcRoiRect = { 0, 0, src.cols, src.rows }; Mat dstRoi = dst.rowRange(range); IppiSize dstRoiSize = ippiSize(dstRoi.size()); @@ -5609,8 +5605,6 @@ public: virtual void operator() (const Range& range) const { - CV_INSTRUMENT_REGION_IPP() - IppiSize srcsize = { src.cols, src.rows }; IppiRect srcroi = { 0, 0, src.cols, src.rows }; IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start }; @@ -6254,8 +6248,6 @@ public: virtual void operator() (const Range& range) const { - CV_INSTRUMENT_REGION_IPP() - IppiSize srcsize = {src.cols, src.rows}; IppiRect srcroi = {0, 0, src.cols, src.rows}; IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start}; diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index a0df333..018548e 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -3368,8 +3368,6 @@ public: virtual void operator() (const Range& range) const { - CV_INSTRUMENT_REGION_IPP() - int d = radius * 2 + 1; IppiSize kernel = {d, d}; IppiSize roi={dst.cols, range.end - range.start}; diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index 8866272..1c40fb1 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -46,7 +46,7 @@ static bool param_verify_sanity; static bool param_collect_impl; #endif #ifdef ENABLE_INSTRUMENTATION -static bool param_instrument; +static int param_instrument; #endif extern bool test_ipp_check; @@ -744,7 +744,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot) } } - // Check if parents have more childs + // Check if parents have more childes std::vector cache; cv::instr::InstrNode *pTmpNode = pNode; while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot) @@ -756,7 +756,7 @@ static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot) { if(cache[i]->m_pParent) { - if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1) + if(cache[i]->m_pParent->findChild(cache[i]) == (int)cache[i]->m_pParent->m_childs.size()-1) printf(" "); else printf("| "); @@ -810,48 +810,39 @@ static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoo if(pNode->m_pParent) { - printf(" - C:%d", pNode->m_payload.m_counter); - printf(" T:%.4fms", pNode->m_payload.getMeanMs()); + printf(" - TC:%d C:%d", pNode->m_payload.m_threads, pNode->m_payload.m_counter); + printf(" T:%.2fms", pNode->m_payload.getTotalMs()); if(pNode->m_pParent->m_pParent) printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode)); } printf("\n"); - // Group childes - std::vector groups; { - bool bFound = false; - for(size_t i = 0; i < pNode->m_childs.size(); i++) + // Group childes by name + for(size_t i = 1; i < pNode->m_childs.size(); i++) { - bFound = false; - for(size_t j = 0; j < groups.size(); j++) + if(pNode->m_childs[i-1]->m_payload.m_funName == pNode->m_childs[i]->m_payload.m_funName ) + continue; + for(size_t j = i+1; j < pNode->m_childs.size(); j++) { - if(groups[j] == pNode->m_childs[i]->m_payload.m_funName) + if(pNode->m_childs[i-1]->m_payload.m_funName == pNode->m_childs[j]->m_payload.m_funName ) { - bFound = true; - break; + cv::swap(pNode->m_childs[i], pNode->m_childs[j]); + i++; } } - if(!bFound) - groups.push_back(pNode->m_childs[i]->m_payload.m_funName); } } - for(size_t g = 0; g < groups.size(); g++) + for(size_t i = 0; i < pNode->m_childs.size(); i++) { - for(size_t i = 0; i < pNode->m_childs.size(); i++) - { - if(pNode->m_childs[i]->m_payload.m_funName == groups[g]) - { - printShift(pNode->m_childs[i], pRoot); + printShift(pNode->m_childs[i], pRoot); - if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i])) - printf("\\---"); - else - printf("|---"); - printNodeRec(pNode->m_childs[i], pRoot); - } - } + if(i == pNode->m_childs.size()-1) + printf("\\---"); + else + printf("|---"); + printNodeRec(pNode->m_childs[i], pRoot); } } @@ -871,7 +862,7 @@ static cv::String nodeToString(cv::instr::InstrNode *pNode) else { string = "#"; - string += std::to_string(pNode->m_payload.m_instrType); + string += std::to_string((int)pNode->m_payload.m_instrType); string += pNode->m_payload.m_funName; string += " - L:"; string += to_string_with_precision(calcLocalWeight(pNode)); @@ -931,19 +922,16 @@ static uint64 getTotalTime() void InstumentData::printTree() { - if(cv::instr::getTrace()->m_childs.size()) - { - printf("[ TRACE ]\n"); - printNodeRec(cv::instr::getTrace(), cv::instr::getTrace()); + printf("[ TRACE ]\n"); + printNodeRec(cv::instr::getTrace(), cv::instr::getTrace()); #ifdef HAVE_IPP - printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime())); + printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime())); #endif #ifdef HAVE_OPENCL - printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime())); + printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime())); #endif - printf("\n[/TRACE ]\n"); - fflush(stdout); - } + printf("\n[/TRACE ]\n"); + fflush(stdout); } #endif @@ -994,7 +982,7 @@ void TestBase::Init(const std::vector & availableImpls, "{ perf_collect_impl |false |collect info about executed implementations}" #endif #ifdef ENABLE_INSTRUMENTATION - "{ perf_instrument |false |instrument code to collect implementations trace}" + "{ perf_instrument |0 |instrument code to collect implementations trace: 1 - perform instrumentation; 2 - separate functions with the same name }" #endif "{ help h |false |print help info}" #ifdef HAVE_CUDA @@ -1048,7 +1036,7 @@ void TestBase::Init(const std::vector & availableImpls, param_collect_impl = args.get("perf_collect_impl"); #endif #ifdef ENABLE_INSTRUMENTATION - param_instrument = args.get("perf_instrument"); + param_instrument = args.get("perf_instrument"); #endif #ifdef ANDROID param_affinity_mask = args.get("perf_affinity_mask"); @@ -1081,8 +1069,12 @@ void TestBase::Init(const std::vector & availableImpls, cv::setUseCollection(0); #endif #ifdef ENABLE_INSTRUMENTATION - if(param_instrument) + if(param_instrument > 0) + { + if(param_instrument == 2) + cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_EXPAND_SAME_NAMES); cv::instr::setUseInstrumentation(true); + } else cv::instr::setUseInstrumentation(false); #endif @@ -1856,6 +1848,11 @@ void TestBase::TearDown() if (HasFailure()) { reportMetrics(false); + +#ifdef ENABLE_INSTRUMENTATION + if(cv::instr::useInstrumentation()) + InstumentData::printTree(); +#endif return; } } -- 2.7.4