OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with native libraries" OFF IF ANDROID )
OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF )
+OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF )
OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF )
/* FP16 */
#cmakedefine HAVE_FP16
+
+/* Library was compiled with functions instrumentation */
+#cmakedefine ENABLE_INSTRUMENTATION
#ifdef CV_IPP_RUN_VERBOSE
#define CV_IPP_RUN_(condition, func, ...) \
{ \
- if (cv::ipp::useIPP() && (condition) && func) \
+ if (cv::ipp::useIPP() && (condition) && (func)) \
{ \
printf("%s: IPP implementation is running\n", CV_Func); \
fflush(stdout); \
}
#else
#define CV_IPP_RUN_(condition, func, ...) \
- if (cv::ipp::useIPP() && (condition) && func) \
+ if (cv::ipp::useIPP() && (condition) && (func)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return __VA_ARGS__; \
#define CV_IPP_RUN_FAST(func, ...)
#endif
-#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__)
+#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_((condition), (func), __VA_ARGS__)
#ifndef IPPI_CALL
}
#endif
+#ifdef ENABLE_INSTRUMENTATION
+namespace cv
+{
+namespace instr
+{
+struct InstrTLSStruct
+{
+ InstrTLSStruct()
+ {
+ pCurrentNode = NULL;
+ }
+ InstrNode* pCurrentNode;
+};
+
+class InstrStruct
+{
+public:
+ InstrStruct()
+ {
+ useInstr = false;
+ enableMapping = true;
+
+ rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN);
+ tlsStruct.get()->pCurrentNode = &rootNode;
+ }
+
+ Mutex mutexCreate;
+ Mutex mutexCount;
+
+ bool useInstr;
+ bool enableMapping;
+ InstrNode rootNode;
+ TLSData<InstrTLSStruct> tlsStruct;
+};
+
+class CV_EXPORTS IntrumentationRegion
+{
+public:
+ IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
+ ~IntrumentationRegion();
+
+private:
+ bool m_disabled; // region status
+ uint64 m_regionTicks;
+};
+
+InstrStruct& getInstrumentStruct();
+InstrTLSStruct& getInstrumentTLSStruct();
+CV_EXPORTS InstrNode* getCurrentNode();
+}
+}
+
+///// General instrumentation
+// Instrument region
+#define CV_INSTRUMENT_REGION_META(TYPE, IMPL, ...) ::cv::instr::IntrumentationRegion __instr_region__(__FUNCTION__, __FILE__, __LINE__, TYPE, IMPL);
+// Instrument functions with non-void return type
+#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\
+{\
+ if(::cv::instr::useInstrumentation()){\
+ ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
+ try{\
+ auto status = ((FUN)(__VA_ARGS__));\
+ if(ERROR_COND){\
+ ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+ CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
+ }\
+ return status;\
+ }catch(...){\
+ ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+ CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
+ throw;\
+ }\
+ }else{\
+ return ((FUN)(__VA_ARGS__));\
+ }\
+}())
+// Instrument functions with void return type
+#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\
+{\
+ if(::cv::instr::useInstrumentation()){\
+ ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\
+ try{\
+ (FUN)(__VA_ARGS__);\
+ }catch(...){\
+ ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\
+ CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\
+ throw;\
+ }\
+ }else{\
+ (FUN)(__VA_ARGS__);\
+ }\
+}())
+// Instrumentation information marker
+#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(#NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);}
+
+///// General instrumentation
+// General OpenCV region instrumentation macro
+#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
+// Parallel OpenCV region instrumentation macro
+#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN)
+
+///// IPP instrumentation
+// Wrapper region instrumentation macro
+#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP)
+// Function instrumentation macro
+#define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__)
+// Diagnostic markers
+#define CV_INSTRUMENT_MARK_IPP(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_IPP, NAME)
+
+///// OpenCL instrumentation
+// Wrapper region instrumentation macro
+#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL)
+// Function instrumentation macro
+#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__)
+// Diagnostic markers
+#define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME)
+#else
+#define CV_INSTRUMENT_REGION()
+#define CV_INSTRUMENT_REGION_MT()
+
+#define CV_INSTRUMENT_REGION_IPP()
+#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__))
+#define CV_INSTRUMENT_MARK_IPP(NAME)
+
+#define CV_INSTRUMENT_REGION_OPENCL()
+#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__))
+#define CV_INSTRUMENT_MARK_OPENCL(NAME)
+#endif
+
//! @endcond
#endif // __OPENCV_CORE_PRIVATE_HPP__
//! @endcond
+
+// Basic Node class for tree building
+template<class OBJECT>
+class CV_EXPORTS Node
+{
+public:
+ Node()
+ {
+ m_pParent = 0;
+ }
+ Node(OBJECT& payload) : m_payload(payload)
+ {
+ m_pParent = 0;
+ }
+ ~Node()
+ {
+ removeChilds();
+ if (m_pParent)
+ {
+ int idx = m_pParent->findChild(this);
+ if (idx >= 0)
+ m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx);
+ }
+ }
+
+ Node<OBJECT>* findChild(OBJECT& payload) const
+ {
+ for(int i = 0; i < this->m_childs.size(); i++)
+ {
+ if(this->m_childs[i]->m_payload == payload)
+ return this->m_childs[i];
+ }
+ return NULL;
+ }
+
+ int findChild(Node<OBJECT> *pNode) const
+ {
+ for (int i = 0; i < this->m_childs.size(); i++)
+ {
+ if(this->m_childs[i] == pNode)
+ return i;
+ }
+ return -1;
+ }
+
+ void addChild(Node<OBJECT> *pNode)
+ {
+ if(!pNode)
+ return;
+
+ CV_Assert(pNode->m_pParent == 0);
+ pNode->m_pParent = this;
+ this->m_childs.push_back(pNode);
+ }
+
+ void removeChilds()
+ {
+ for(int i = 0; i < m_childs.size(); i++)
+ {
+ m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
+ delete m_childs[i];
+ }
+ m_childs.clear();
+ }
+
+public:
+ OBJECT m_payload;
+ Node<OBJECT>* m_pParent;
+ std::vector<Node<OBJECT>*> m_childs;
+};
+
+// Instrumentation external interface
+namespace instr
+{
+enum
+{
+ TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function
+ TYPE_MARKER, // Information marker
+ TYPE_WRAPPER, // Wrapper function for implementation
+ TYPE_FUN, // Simple function call
+};
+
+enum
+{
+ IMPL_PLAIN = 0,
+ IMPL_IPP,
+ IMPL_OPENCL,
+};
+
+enum
+{
+ FLAGS_MAPPING = 0x01,
+};
+
+enum
+{
+ COMMAND_RESET = 0x01,
+};
+
+class CV_EXPORTS NodeData
+{
+public:
+ NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN);
+ NodeData(NodeData &ref);
+ ~NodeData();
+ NodeData& operator=(const NodeData&);
+
+ cv::String m_funName;
+ int m_instrType;
+ int m_implType;
+ cv::String m_fileName;
+ int m_lineNum;
+
+ bool m_funError;
+ volatile int m_counter;
+ bool m_stopPoint;
+ uint64 m_ticksMean;
+
+};
+bool operator==(const NodeData& lhs, const NodeData& rhs);
+
+typedef Node<NodeData> InstrNode;
+
+CV_EXPORTS bool useInstrumentation();
+CV_EXPORTS void setUseInstrumentation(bool flag);
+CV_EXPORTS InstrNode* getTrace();
+CV_EXPORTS void resetTrace();
+CV_EXPORTS void setFlags(int modeFlags);
+CV_EXPORTS int getFlags();
+};
+
} //namespace cv
#ifndef DISABLE_OPENCV_24_COMPATIBILITY
public:
ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
{
+
body = &_body;
wholeRange = _r;
double len = wholeRange.end - wholeRange.start;
nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
+
+#ifdef ENABLE_INSTRUMENTATION
+ pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode;
+#endif
}
void operator()(const cv::Range& sr) const
{
+#ifdef ENABLE_INSTRUMENTATION
+ {
+ cv::instr::InstrTLSStruct *pInstrTLS = &cv::instr::getInstrumentTLSStruct();
+ pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread
+ }
+#endif
+
cv::Range r;
r.start = (int)(wholeRange.start +
((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
const cv::ParallelLoopBody* body;
cv::Range wholeRange;
int nstripes;
+#ifdef ENABLE_INSTRUMENTATION
+ cv::instr::InstrNode *pThreadRoot;
+#endif
};
#if defined HAVE_TBB
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
{
+ CV_INSTRUMENT_REGION()
+
#ifdef CV_PARALLEL_FRAMEWORK
if(numThreads != 0)
static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
{
+ CV_INSTRUMENT_REGION_OPENCL()
+
bool haveMask = _mask.kind() != _InputArray::NONE;
int nz = haveMask ? -1 : (int)_src.total();
Scalar mean, stddev;
k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
size_t globalsize = groups * wgs;
- if (!k.run(1, &globalsize, &wgs, false))
+
+ if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false))
return false;
typedef Scalar (* part_sum)(Mat m);
{
static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
{
+ CV_INSTRUMENT_REGION_IPP()
+
#if IPP_VERSION_X100 >= 700
int cn = src.channels();
size_t total_size = src.total();
if( !mask.empty() )
{
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
- ippiMaskMeanStdDevFuncC1 ippFuncC1 =
+ ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR =
type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
0;
- if( ippFuncC1 )
+ if( ippiMean_StdDev_C1MR )
{
- if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
+ if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
{
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
- ippiMaskMeanStdDevFuncC3 ippFuncC3 =
+ ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR =
type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
0;
- if( ippFuncC3 )
+ if( ippiMean_StdDev_C3CMR )
{
- if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
- ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
- ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
+ if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
+ CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
+ CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
{
return true;
}
else
{
typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
- ippiMeanStdDevFuncC1 ippFuncC1 =
+ ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R =
type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
#if (IPP_VERSION_X100 >= 810)
type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
#endif
0;
- if( ippFuncC1 )
+ if( ippiMean_StdDev_C1R )
{
- if( ippFuncC1(src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
+ if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
{
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
- ippiMeanStdDevFuncC3 ippFuncC3 =
+ ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR =
type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
0;
- if( ippFuncC3 )
+ if( ippiMean_StdDev_C3CR )
{
- if( ippFuncC3(src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
- ippFuncC3(src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
- ippFuncC3(src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
+ if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
+ CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
+ CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
{
return true;
}
void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
{
+ CV_INSTRUMENT_REGION()
+
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_meanStdDev(_src, _mean, _sdv, _mask))
}
#endif
+namespace instr
+{
+bool useInstrumentation()
+{
+#ifdef ENABLE_INSTRUMENTATION
+ return getInstrumentStruct().useInstr;
+#else
+ return false;
+#endif
+}
+
+void setUseInstrumentation(bool flag)
+{
+#ifdef ENABLE_INSTRUMENTATION
+ getInstrumentStruct().useInstr = flag;
+#else
+ CV_UNUSED(flag);
+#endif
+}
+
+InstrNode* getTrace()
+{
+#ifdef ENABLE_INSTRUMENTATION
+ return &getInstrumentStruct().rootNode;
+#else
+ return NULL;
+#endif
+}
+
+void resetTrace()
+{
+#ifdef ENABLE_INSTRUMENTATION
+ getInstrumentStruct().rootNode.removeChilds();
+ getInstrumentTLSStruct().pCurrentNode = &getInstrumentStruct().rootNode;
+#endif
+}
+
+void setFlags(int modeFlags)
+{
+#ifdef ENABLE_INSTRUMENTATION
+ getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING);
+#else
+ CV_UNUSED(modeFlags);
+#endif
+}
+int getFlags()
+{
+#ifdef ENABLE_INSTRUMENTATION
+ int flags = 0;
+ if(getInstrumentStruct().enableMapping)
+ flags |= FLAGS_MAPPING;
+ return flags;
+#else
+ return 0;
+#endif
+}
+
+NodeData::NodeData(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
+{
+ m_instrType = TYPE_GENERAL;
+ m_implType = IMPL_PLAIN;
+
+ m_funError = false;
+ m_counter = 0;
+ m_stopPoint = false;
+ m_ticksMean = 0;
+
+ m_funName = funName;
+ m_instrType = instrType;
+ m_implType = implType;
+ m_fileName = fileName;
+ m_lineNum = lineNum;
+}
+NodeData::NodeData(NodeData &ref)
+{
+ *this = ref;
+}
+NodeData& NodeData::operator=(const NodeData &right)
+{
+ this->m_funName = right.m_funName;
+ this->m_instrType = right.m_instrType;
+ this->m_implType = right.m_implType;
+ this->m_funError = right.m_funError;
+ this->m_counter = right.m_counter;
+ this->m_stopPoint = right.m_stopPoint;
+ this->m_ticksMean = right.m_ticksMean;
+ this->m_fileName = right.m_fileName;
+ this->m_lineNum = right.m_lineNum;
+
+ return *this;
+}
+NodeData::~NodeData()
+{
+}
+bool operator==(const NodeData& left, const NodeData& right)
+{
+ if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName)
+ return true;
+ return false;
+}
+
+#ifdef ENABLE_INSTRUMENTATION
+InstrStruct& getInstrumentStruct()
+{
+ static InstrStruct instr;
+ return instr;
+}
+
+InstrTLSStruct& getInstrumentTLSStruct()
+{
+ return *getInstrumentStruct().tlsStruct.get();
+}
+
+InstrNode* getCurrentNode()
+{
+ return getInstrumentTLSStruct().pCurrentNode;
+}
+
+IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType, int implType)
+{
+ m_disabled = false;
+ m_regionTicks = 0;
+
+ InstrStruct *pStruct = &getInstrumentStruct();
+ if(pStruct->useInstr)
+ {
+ InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
+
+ // Disable in case of failure
+ if(!pTLS->pCurrentNode)
+ {
+ m_disabled = true;
+ return;
+ }
+
+ m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint;
+ if(m_disabled)
+ return;
+
+ NodeData payload(funName, fileName, lineNum, instrType, implType);
+ Node<NodeData>* pChild = NULL;
+
+ if(pStruct->enableMapping)
+ {
+ // Critical section
+ cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation
+ pChild = pTLS->pCurrentNode->findChild(payload);
+ if(!pChild)
+ {
+ pChild = new Node<NodeData>(payload);
+ pTLS->pCurrentNode->addChild(pChild);
+ }
+ }
+ else
+ {
+ pChild = pTLS->pCurrentNode->findChild(payload);
+ if(!pChild)
+ {
+ pTLS->pCurrentNode->m_payload.m_stopPoint = true;
+ return;
+ }
+ }
+ pTLS->pCurrentNode = pChild;
+
+ m_regionTicks = getTickCount();
+ }
+}
+
+IntrumentationRegion::~IntrumentationRegion()
+{
+ InstrStruct *pStruct = &getInstrumentStruct();
+ if(pStruct->useInstr)
+ {
+ if(!m_disabled)
+ {
+ InstrTLSStruct *pTLS = &getInstrumentTLSStruct();
+ if(pTLS->pCurrentNode->m_payload.m_stopPoint)
+ {
+ pTLS->pCurrentNode->m_payload.m_stopPoint = false;
+ }
+ else
+ {
+ if(pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL &&
+ pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
+ cv::ocl::finish();
+
+ uint64 ticks = (getTickCount() - m_regionTicks);
+ {
+ cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation
+ pTLS->pCurrentNode->m_payload.m_counter++;
+ if(pTLS->pCurrentNode->m_payload.m_counter <= 1)
+ pTLS->pCurrentNode->m_payload.m_ticksMean = ticks;
+ else
+ pTLS->pCurrentNode->m_payload.m_ticksMean = (pTLS->pCurrentNode->m_payload.m_ticksMean*(pTLS->pCurrentNode->m_payload.m_counter-1) + ticks)/pTLS->pCurrentNode->m_payload.m_counter;
+ }
+
+ pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent;
+ }
+ }
+ }
+}
+#endif
+}
+
namespace ipp
{
// TODO Replace finish call to dstUMat.wait()
#define OCL_TEST_CYCLE() \
- for (cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
+ for (cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
#define OCL_TEST_CYCLE_N(n) \
- for(declare.iterations(n), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
+ for (declare.iterations(n), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer())
#define OCL_TEST_CYCLE_MULTIRUN(runsNum) \
- for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
+ for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r)
} ImplData;
#endif
+#ifdef ENABLE_INSTRUMENTATION
+class InstumentData
+{
+public:
+ static ::cv::String treeToString();
+ static void printTree();
+};
+#endif
+
class CV_EXPORTS TestBase: public ::testing::Test
{
public:
virtual void SetUp();
virtual void TearDown();
- void startTimer();
+ bool startTimer(); // bool is dummy for conditional loop
void stopTimer();
bool next();
#ifdef CV_COLLECT_IMPL_DATA
ImplData implConf;
#endif
+#ifdef ENABLE_INSTRUMENTATION
+ InstumentData instrConf;
+#endif
+
private:
typedef std::vector<std::pair<int, cv::Size> > SizeVector;
typedef std::vector<int64> TimeVector;
CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\
}
-#define TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); stopTimer())
-#define TEST_CYCLE() for(; startTimer(), next(); stopTimer())
-#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ++r)
+#define TEST_CYCLE_N(n) for(declare.iterations(n); next() && startTimer(); stopTimer())
+#define TEST_CYCLE() for(; next() && startTimer(); stopTimer())
+#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); next() && startTimer(); stopTimer()) for(int r = 0; r < runsNum; ++r)
namespace perf
{
#ifdef CV_COLLECT_IMPL_DATA
static bool param_collect_impl;
#endif
+#ifdef ENABLE_INSTRUMENTATION
+static bool param_instrument;
+#endif
extern bool test_ipp_check;
#ifdef HAVE_CUDA
}
};
+#ifdef ENABLE_INSTRUMENTATION
+static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot)
+{
+ // Print empty line for a big tree nodes
+ if(pNode->m_pParent)
+ {
+ int parendIdx = pNode->m_pParent->findChild(pNode);
+ if(parendIdx > 0 && pNode->m_pParent->m_childs[parendIdx-1]->m_childs.size())
+ {
+ printShift(pNode->m_pParent->m_childs[parendIdx-1]->m_childs[0], pRoot);
+ printf("\n");
+ }
+ }
+
+ // Check if parents have more childs
+ std::vector<cv::instr::InstrNode*> cache;
+ cv::instr::InstrNode *pTmpNode = pNode;
+ while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot)
+ {
+ cache.push_back(pTmpNode->m_pParent);
+ pTmpNode = pTmpNode->m_pParent;
+ }
+ for(int i = (int)cache.size()-1; i >= 0; i--)
+ {
+ if(cache[i]->m_pParent)
+ {
+ if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1)
+ printf(" ");
+ else
+ printf("| ");
+ }
+ }
+}
+
+static double calcLocalWeight(cv::instr::InstrNode *pNode)
+{
+ if(pNode->m_pParent && pNode->m_pParent->m_pParent)
+ return ((double)pNode->m_payload.m_ticksMean*100/pNode->m_pParent->m_payload.m_ticksMean);
+ else
+ return 100;
+}
+
+static double calcGlobalWeight(cv::instr::InstrNode *pNode)
+{
+ cv::instr::InstrNode* globNode = pNode;
+
+ while(globNode->m_pParent && globNode->m_pParent->m_pParent)
+ globNode = globNode->m_pParent;
+
+ return ((double)pNode->m_payload.m_ticksMean*100/(double)globNode->m_payload.m_ticksMean);
+}
+
+static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoot)
+{
+ printf("%s", (pNode->m_payload.m_funName.substr(0, 40) + ((pNode->m_payload.m_funName.size()>40)?"...":"")).c_str());
+
+ // Write instrumentation flags
+ if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL || pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN)
+ {
+ printf("<");
+ if(pNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER)
+ printf("W");
+ else if(pNode->m_payload.m_instrType == cv::instr::TYPE_FUN)
+ printf("F");
+ else if(pNode->m_payload.m_instrType == cv::instr::TYPE_MARKER)
+ printf("MARK");
+
+ if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL && pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN)
+ printf("_");
+ if(pNode->m_payload.m_implType == cv::instr::IMPL_IPP)
+ printf("IPP");
+ else if(pNode->m_payload.m_implType == cv::instr::IMPL_OPENCL)
+ printf("OCL");
+
+ printf(">");
+ }
+
+ if(pNode->m_pParent)
+ {
+ printf(" - C:%d", pNode->m_payload.m_counter);
+ printf(" T:%.4fms", (double)pNode->m_payload.m_ticksMean/cv::getTickFrequency()*1000);
+ if(pNode->m_pParent->m_pParent)
+ printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode));
+ }
+ printf("\n");
+
+ // Group childes
+ std::vector<cv::String> groups;
+ {
+ bool bFound = false;
+ for(size_t i = 0; i < pNode->m_childs.size(); i++)
+ {
+ bFound = false;
+ for(size_t j = 0; j < groups.size(); j++)
+ {
+ if(groups[j] == pNode->m_childs[i]->m_payload.m_funName)
+ {
+ bFound = true;
+ break;
+ }
+ }
+ if(!bFound)
+ groups.push_back(pNode->m_childs[i]->m_payload.m_funName);
+ }
+ }
+
+ for(size_t g = 0; g < groups.size(); g++)
+ {
+ for(size_t i = 0; i < pNode->m_childs.size(); i++)
+ {
+ if(pNode->m_childs[i]->m_payload.m_funName == groups[g])
+ {
+ printShift(pNode->m_childs[i], pRoot);
+
+ if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i]))
+ printf("\\---");
+ else
+ printf("|---");
+ printNodeRec(pNode->m_childs[i], pRoot);
+ }
+ }
+ }
+}
+
+template <typename T>
+std::string to_string_with_precision(const T value, const int p = 3)
+{
+ std::ostringstream out;
+ out << std::fixed << std::setprecision(p) << value;
+ return out.str();
+}
+
+static cv::String nodeToString(cv::instr::InstrNode *pNode)
+{
+ cv::String string;
+ if (pNode->m_payload.m_funName == "ROOT")
+ string = pNode->m_payload.m_funName;
+ else
+ {
+ string = "#";
+ string += std::to_string(pNode->m_payload.m_instrType);
+ string += pNode->m_payload.m_funName;
+ string += " - L:";
+ string += to_string_with_precision(calcLocalWeight(pNode));
+ string += ", G:";
+ string += to_string_with_precision(calcGlobalWeight(pNode));
+ }
+ string += "(";
+ for(size_t i = 0; i < pNode->m_childs.size(); i++)
+ string += nodeToString(pNode->m_childs[i]);
+ string += ")";
+
+ return string;
+}
+
+static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl)
+{
+ uint64 ticks = 0;
+
+ if(pNode->m_pParent && pNode->m_payload.m_instrType == type && pNode->m_payload.m_implType == impl)
+ ticks = pNode->m_payload.m_ticksMean;
+
+ for(size_t i = 0; i < pNode->m_childs.size(); i++)
+ ticks += getNodeTimeRec(pNode->m_childs[i], type, impl);
+
+ return ticks;
+}
+
+static uint64 getImplTime(int impl)
+{
+ uint64 ticks = 0;
+ cv::instr::InstrNode *pRoot = cv::instr::getTrace();
+
+ ticks = getNodeTimeRec(pRoot, cv::instr::TYPE_FUN, impl);
+
+ return ticks;
+}
+
+static uint64 getTotalTime()
+{
+ uint64 ticks = 0;
+ cv::instr::InstrNode *pRoot = cv::instr::getTrace();
+
+ for(size_t i = 0; i < pRoot->m_childs.size(); i++)
+ ticks += pRoot->m_childs[i]->m_payload.m_ticksMean;
+
+ return ticks;
+}
+
+::cv::String InstumentData::treeToString()
+{
+ cv::String string = nodeToString(cv::instr::getTrace());
+ return string;
+}
+
+void InstumentData::printTree()
+{
+ if(cv::instr::getTrace()->m_childs.size())
+ {
+ printf("[ TRACE ]\n");
+ printNodeRec(cv::instr::getTrace(), cv::instr::getTrace());
+#ifdef HAVE_IPP
+ printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()));
+#endif
+#ifdef HAVE_OPENCL
+ printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()));
+#endif
+ printf("\n[/TRACE ]\n");
+ fflush(stdout);
+ }
+}
+#endif
/*****************************************************************************************\
* ::perf::TestBase
#ifdef CV_COLLECT_IMPL_DATA
"{ perf_collect_impl |false |collect info about executed implementations}"
#endif
+#ifdef ENABLE_INSTRUMENTATION
+ "{ perf_instrument |false |instrument code to collect implementations trace}"
+#endif
"{ help h |false |print help info}"
#ifdef HAVE_CUDA
"{ perf_cuda_device |0 |run CUDA test suite onto specific CUDA capable device}"
#ifdef CV_COLLECT_IMPL_DATA
param_collect_impl = args.get<bool>("perf_collect_impl");
#endif
+#ifdef ENABLE_INSTRUMENTATION
+ param_instrument = args.get<bool>("perf_instrument");
+#endif
#ifdef ANDROID
param_affinity_mask = args.get<int>("perf_affinity_mask");
log_power_checkpoints = args.has("perf_log_power_checkpoints");
else
cv::setUseCollection(0);
#endif
+#ifdef ENABLE_INSTRUMENTATION
+ if(param_instrument)
+ cv::instr::setUseInstrumentation(true);
+ else
+ cv::instr::setUseInstrumentation(false);
+#endif
#ifdef HAVE_CUDA
cv::Mat b(2048, 2048, CV_32S, cv::Scalar(2));
declare.time(30);
double s = 0;
- for(declare.iterations(20); startTimer(), next(); stopTimer())
+ for(declare.iterations(20); next() && startTimer(); stopTimer())
s+=a.dot(b);
declare.time(s);
//self calibration
SetUp();
- for(declare.iterations(1000); startTimer(), next(); stopTimer()){}
+ for(declare.iterations(1000); next() && startTimer(); stopTimer()){}
}
};
}
#endif
- if (has_next)
- startTimer(); // really we should measure activity from this moment, so reset start time
return has_next;
}
return res;
}
-void TestBase::startTimer()
+bool TestBase::startTimer()
{
+#ifdef ENABLE_INSTRUMENTATION
+ if(currentIter == 0)
+ {
+ cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_MAPPING); // enable mapping for the first run
+ cv::instr::resetTrace();
+ }
+#endif
lastTime = cv::getTickCount();
+ return true; // dummy true for conditional loop
}
void TestBase::stopTimer()
if (lastTime < 0) lastTime = 0;
times.push_back(lastTime);
lastTime = 0;
+
+#ifdef ENABLE_INSTRUMENTATION
+ cv::instr::setFlags(cv::instr::getFlags()&~cv::instr::FLAGS_MAPPING); // disable mapping to decrease overhead for +1 run
+#endif
}
performance_metrics& TestBase::calcMetrics()
RecordProperty("gstddev", cv::format("%.6f", m.gstddev).c_str());
RecordProperty("mean", cv::format("%.0f", m.mean).c_str());
RecordProperty("stddev", cv::format("%.0f", m.stddev).c_str());
+#ifdef ENABLE_INSTRUMENTATION
+ if(cv::instr::useInstrumentation())
+ {
+ cv::String tree = InstumentData::treeToString();
+ RecordProperty("functions_hierarchy", tree.c_str());
+ RecordProperty("total_ipp_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime())));
+ RecordProperty("total_opencl_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime())));
+ cv::instr::resetTrace();
+ }
+#endif
#ifdef CV_COLLECT_IMPL_DATA
if(param_collect_impl)
{
fflush(stdout);
}
#endif
+
+#ifdef ENABLE_INSTRUMENTATION
+ if(cv::instr::useInstrumentation())
+ InstumentData::printTree();
+#endif
+
reportMetrics(true);
}