From 17e2bf5717559ccf4fe809dd9f83afa7582f8ef5 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 13 Oct 2019 11:14:41 +0000 Subject: [PATCH] core(tls): implement releasing of TLS on thread termination - move TLS & instrumentation code out of core/utility.hpp - (*) TLSData lost .gather() method (to dispose thread data on thread termination) - use TLSDataAccumulator for reliable collecting of thread data - prefer using of .detachData() + .cleanupDetachedData() instead of .gather() method (*) API is broken: replace TLSData => TLSDataAccumulator if gather required (objects disposal on threads termination is not available in accumulator mode) --- modules/core/include/opencv2/core/private.hpp | 4 + modules/core/include/opencv2/core/utility.hpp | 168 +-------------- .../include/opencv2/core/utils/instrumentation.hpp | 125 +++++++++++ modules/core/include/opencv2/core/utils/tls.hpp | 237 +++++++++++++++++++++ .../include/opencv2/core/utils/trace.private.hpp | 4 +- modules/core/src/ocl.cpp | 25 +-- modules/core/src/ovx.cpp | 12 +- modules/core/src/precomp.hpp | 2 +- modules/core/src/rand.cpp | 2 +- modules/core/src/system.cpp | 170 ++++++++++----- modules/core/src/umatrix.cpp | 2 + modules/core/test/test_utils.cpp | 2 + modules/core/test/test_utils_tls.impl.hpp | 134 ++++++++++++ modules/imgproc/src/histogram.cpp | 2 + 14 files changed, 650 insertions(+), 239 deletions(-) create mode 100644 modules/core/include/opencv2/core/utils/instrumentation.hpp create mode 100644 modules/core/include/opencv2/core/utils/tls.hpp create mode 100644 modules/core/test/test_utils_tls.impl.hpp diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 5e66801..24f7fc6 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -53,6 +53,10 @@ #include +#ifdef ENABLE_INSTRUMENTATION +#include "opencv2/core/utils/instrumentation.hpp" +#endif + #ifdef HAVE_EIGEN # if defined __GNUC__ && defined __APPLE__ # pragma GCC diagnostic ignored "-Wshadow" diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index cbec10b..e7f169b 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -63,30 +63,6 @@ namespace cv { -#ifdef CV_COLLECT_IMPL_DATA -CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays -CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays -// Get stored implementation flags and functions names arrays -// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function -CV_EXPORTS int getImpl(std::vector &impl, std::vector &funName); - -CV_EXPORTS bool useCollection(); // return implementation collection state -CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state - -#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation -#define CV_IMPL_OCL 0x02 // OpenCL implementation -#define CV_IMPL_IPP 0x04 // IPP implementation -#define CV_IMPL_MT 0x10 // multithreaded implementation - -#define CV_IMPL_ADD(impl) \ - if(cv::useCollection()) \ - { \ - cv::addImpl(impl, CV_Func); \ - } -#else -#define CV_IMPL_ADD(impl) -#endif - //! @addtogroup core_utils //! @{ @@ -726,61 +702,6 @@ private: AutoLock& operator = (const AutoLock&); }; -// TLS interface -class CV_EXPORTS TLSDataContainer -{ -protected: - TLSDataContainer(); - virtual ~TLSDataContainer(); - - void gatherData(std::vector &data) const; -#if OPENCV_ABI_COMPATIBILITY > 300 - void* getData() const; - void release(); - -private: -#else - void release(); - -public: - void* getData() const; -#endif - virtual void* createDataInstance() const = 0; - virtual void deleteDataInstance(void* pData) const = 0; - - int key_; - -public: - void cleanup(); //! Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid. -}; - -// Main TLS data class -template -class TLSData : protected TLSDataContainer -{ -public: - inline TLSData() {} - inline ~TLSData() { release(); } // Release key and delete associated data - inline T* get() const { return (T*)getData(); } // Get data associated with key - inline T& getRef() const { T* ptr = (T*)getData(); CV_Assert(ptr); return *ptr; } // Get data associated with key - - // Get data from all threads - inline void gather(std::vector &data) const - { - std::vector &dataVoid = reinterpret_cast&>(data); - gatherData(dataVoid); - } - - inline void cleanup() { TLSDataContainer::cleanup(); } - -private: - virtual void* createDataInstance() const CV_OVERRIDE {return new T;} // Wrapper to allocate data by template - virtual void deleteDataInstance(void* pData) const CV_OVERRIDE {delete (T*)pData;} // Wrapper to release data by template - - // Disable TLS copy operations - TLSData(TLSData &) {} - TLSData& operator =(const TLSData &) {return *this;} -}; /** @brief Designed for command line parsing @@ -1199,88 +1120,6 @@ public: std::vector*> m_childs; }; -// Instrumentation external interface -namespace instr -{ - -#if !defined OPENCV_ABI_CHECK - -enum TYPE -{ - TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function - TYPE_MARKER, // Information marker - TYPE_WRAPPER, // Wrapper function for implementation - TYPE_FUN, // Simple function call -}; - -enum IMPL -{ - IMPL_PLAIN = 0, - IMPL_IPP, - IMPL_OPENCL, -}; - -struct NodeDataTls -{ - NodeDataTls() - { - m_ticksTotal = 0; - } - uint64 m_ticksTotal; -}; - -class CV_EXPORTS NodeData -{ -public: - NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); - NodeData(NodeData &ref); - ~NodeData(); - NodeData& operator=(const NodeData&); - - cv::String m_funName; - cv::instr::TYPE m_instrType; - cv::instr::IMPL m_implType; - const char* m_fileName; - int m_lineNum; - void* m_retAddress; - bool m_alwaysExpand; - bool m_funError; - - volatile int m_counter; - volatile uint64 m_ticksTotal; - TLSData m_tls; - int m_threads; - - // No synchronization - double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; } - double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; } -}; -bool operator==(const NodeData& lhs, const NodeData& rhs); - -typedef Node InstrNode; - -CV_EXPORTS InstrNode* getTrace(); - -#endif // !defined OPENCV_ABI_CHECK - - -CV_EXPORTS bool useInstrumentation(); -CV_EXPORTS void setUseInstrumentation(bool flag); -CV_EXPORTS void resetTrace(); - -enum FLAGS -{ - FLAGS_NONE = 0, - FLAGS_MAPPING = 0x01, - FLAGS_EXPAND_SAME_NAMES = 0x02, -}; - -CV_EXPORTS void setFlags(FLAGS modeFlags); -static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); } -CV_EXPORTS FLAGS getFlags(); - -} // namespace instr - namespace samples { @@ -1355,6 +1194,13 @@ CV_EXPORTS int getThreadID(); } //namespace cv +#ifdef CV_COLLECT_IMPL_DATA +#include "opencv2/core/utils/instrumentation.hpp" +#else +/// Collect implementation data on OpenCV function call. Requires ENABLE_IMPL_COLLECTION build option. +#define CV_IMPL_ADD(impl) +#endif + #ifndef DISABLE_OPENCV_24_COMPATIBILITY #include "opencv2/core/core_c.h" #endif diff --git a/modules/core/include/opencv2/core/utils/instrumentation.hpp b/modules/core/include/opencv2/core/utils/instrumentation.hpp new file mode 100644 index 0000000..3639867 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/instrumentation.hpp @@ -0,0 +1,125 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_INSTR_HPP +#define OPENCV_UTILS_INSTR_HPP + +#include +#include + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +#ifdef CV_COLLECT_IMPL_DATA +CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays +CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays +// Get stored implementation flags and functions names arrays +// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function +CV_EXPORTS int getImpl(std::vector &impl, std::vector &funName); + +CV_EXPORTS bool useCollection(); // return implementation collection state +CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state + +#define CV_IMPL_PLAIN 0x01 // native CPU OpenCV implementation +#define CV_IMPL_OCL 0x02 // OpenCL implementation +#define CV_IMPL_IPP 0x04 // IPP implementation +#define CV_IMPL_MT 0x10 // multithreaded implementation + +#undef CV_IMPL_ADD +#define CV_IMPL_ADD(impl) \ + if(cv::useCollection()) \ + { \ + cv::addImpl(impl, CV_Func); \ + } +#endif + +// Instrumentation external interface +namespace instr +{ + +#if !defined OPENCV_ABI_CHECK + +enum TYPE +{ + TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function + TYPE_MARKER, // Information marker + TYPE_WRAPPER, // Wrapper function for implementation + TYPE_FUN, // Simple function call +}; + +enum IMPL +{ + IMPL_PLAIN = 0, + IMPL_IPP, + IMPL_OPENCL, +}; + +struct NodeDataTls +{ + NodeDataTls() + { + m_ticksTotal = 0; + } + uint64 m_ticksTotal; +}; + +class CV_EXPORTS NodeData +{ +public: + NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN); + NodeData(NodeData &ref); + ~NodeData(); + NodeData& operator=(const NodeData&); + + cv::String m_funName; + cv::instr::TYPE m_instrType; + cv::instr::IMPL m_implType; + const char* m_fileName; + int m_lineNum; + void* m_retAddress; + bool m_alwaysExpand; + bool m_funError; + + volatile int m_counter; + volatile uint64 m_ticksTotal; + TLSDataAccumulator m_tls; + int m_threads; + + // No synchronization + double getTotalMs() const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; } + double getMeanMs() const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; } +}; +bool operator==(const NodeData& lhs, const NodeData& rhs); + +typedef Node InstrNode; + +CV_EXPORTS InstrNode* getTrace(); + +#endif // !defined OPENCV_ABI_CHECK + + +CV_EXPORTS bool useInstrumentation(); +CV_EXPORTS void setUseInstrumentation(bool flag); +CV_EXPORTS void resetTrace(); + +enum FLAGS +{ + FLAGS_NONE = 0, + FLAGS_MAPPING = 0x01, + FLAGS_EXPAND_SAME_NAMES = 0x02, +}; + +CV_EXPORTS void setFlags(FLAGS modeFlags); +static inline void setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); } +CV_EXPORTS FLAGS getFlags(); + +} // namespace instr + +//! @} + +} // namespace + +#endif // OPENCV_UTILS_TLS_HPP diff --git a/modules/core/include/opencv2/core/utils/tls.hpp b/modules/core/include/opencv2/core/utils/tls.hpp new file mode 100644 index 0000000..b5f1138 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/tls.hpp @@ -0,0 +1,237 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_TLS_HPP +#define OPENCV_UTILS_TLS_HPP + +#include + +namespace cv { + +//! @addtogroup core_utils +//! @{ + +namespace details { class TlsStorage; } + +/** TLS container base implementation + * + * Don't use directly. + * + * @sa TLSData, TLSDataAccumulator templates + */ +class CV_EXPORTS TLSDataContainer +{ +protected: + TLSDataContainer(); + virtual ~TLSDataContainer(); + + /// @deprecated use detachData() instead + void gatherData(std::vector &data) const; + /// get TLS data and detach all data from threads (similar to cleanup() call) + void detachData(std::vector& data); + + void* getData() const; + void release(); + +protected: + virtual void* createDataInstance() const = 0; + virtual void deleteDataInstance(void* pData) const = 0; + +#if OPENCV_ABI_COMPATIBILITY > 300 +private: +#else +public: +#endif + int key_; + + friend class cv::details::TlsStorage; // core/src/system.cpp + +public: + void cleanup(); //!< Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid. + +private: + // Disable copy/assign (noncopyable pattern) + TLSDataContainer(TLSDataContainer &); + TLSDataContainer& operator =(const TLSDataContainer &); +}; + + +/** @brief Simple TLS data class + * + * @sa TLSDataAccumulator + */ +template +class TLSData : protected TLSDataContainer +{ +public: + inline TLSData() {} + inline ~TLSData() { release(); } + + inline T* get() const { return (T*)getData(); } //!< Get data associated with key + inline T& getRef() const { T* ptr = (T*)getData(); CV_DbgAssert(ptr); return *ptr; } //!< Get data associated with key + + /// Release associated thread data + inline void cleanup() + { + TLSDataContainer::cleanup(); + } + +protected: + /// Wrapper to allocate data by template + virtual void* createDataInstance() const CV_OVERRIDE { return new T; } + /// Wrapper to release data by template + virtual void deleteDataInstance(void* pData) const CV_OVERRIDE { delete (T*)pData; } +}; + + +/// TLS data accumulator with gathering methods +template +class TLSDataAccumulator : public TLSData +{ + mutable cv::Mutex mutex; + mutable std::vector dataFromTerminatedThreads; + std::vector detachedData; + bool cleanupMode; +public: + TLSDataAccumulator() : cleanupMode(false) {} + ~TLSDataAccumulator() + { + release(); + } + + /** @brief Get data from all threads + * @deprecated replaced by detachData() + * + * Lifetime of vector data is valid until next detachData()/cleanup()/release() calls + * + * @param[out] data result buffer (should be empty) + */ + void gather(std::vector &data) const + { + CV_Assert(cleanupMode == false); // state is not valid + CV_Assert(data.empty()); + { + std::vector &dataVoid = reinterpret_cast&>(data); + TLSDataContainer::gatherData(dataVoid); + } + { + AutoLock lock(mutex); + data.reserve(data.size() + dataFromTerminatedThreads.size()); + for (typename std::vector::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + data.push_back((T*)*i); + } + } + } + + /** @brief Get and detach data from all threads + * + * Call cleanupDetachedData() when returned vector is not needed anymore. + * + * @return Vector with associated data. Content is preserved (including lifetime of attached data pointers) until next detachData()/cleanupDetachedData()/cleanup()/release() calls + */ + std::vector& detachData() + { + CV_Assert(cleanupMode == false); // state is not valid + std::vector dataVoid; + { + TLSDataContainer::detachData(dataVoid); + } + { + AutoLock lock(mutex); + detachedData.reserve(dataVoid.size() + dataFromTerminatedThreads.size()); + for (typename std::vector::const_iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + detachedData.push_back((T*)*i); + } + dataFromTerminatedThreads.clear(); + for (typename std::vector::const_iterator i = dataVoid.begin(); i != dataVoid.end(); ++i) + { + detachedData.push_back((T*)(void*)*i); + } + } + dataVoid.clear(); + return detachedData; + } + + /// Release associated thread data returned by detachData() call + void cleanupDetachedData() + { + AutoLock lock(mutex); + cleanupMode = true; + _cleanupDetachedData(); + cleanupMode = false; + } + + /// Release associated thread data + void cleanup() + { + cleanupMode = true; + TLSDataContainer::cleanup(); + + AutoLock lock(mutex); + _cleanupDetachedData(); + _cleanupTerminatedData(); + cleanupMode = false; + } + + /// Release associated thread data and free TLS key + void release() + { + cleanupMode = true; + TLSDataContainer::release(); + { + AutoLock lock(mutex); + _cleanupDetachedData(); + _cleanupTerminatedData(); + } + } + +protected: + // synchronized + void _cleanupDetachedData() + { + for (typename std::vector::iterator i = detachedData.begin(); i != detachedData.end(); ++i) + { + deleteDataInstance((T*)*i); + } + detachedData.clear(); + } + + // synchronized + void _cleanupTerminatedData() + { + for (typename std::vector::iterator i = dataFromTerminatedThreads.begin(); i != dataFromTerminatedThreads.end(); ++i) + { + deleteDataInstance((T*)*i); + } + dataFromTerminatedThreads.clear(); + } + +protected: + virtual void* createDataInstance() const CV_OVERRIDE + { + // Note: we can collect all allocated data here, but this would require raced mutex locks + return new T; + } + virtual void deleteDataInstance(void* pData) const CV_OVERRIDE + { + if (cleanupMode) + { + delete (T*)pData; + } + else + { + AutoLock lock(mutex); + dataFromTerminatedThreads.push_back((T*)pData); + } + } +}; + + +//! @} + +} // namespace + +#endif // OPENCV_UTILS_TLS_HPP diff --git a/modules/core/include/opencv2/core/utils/trace.private.hpp b/modules/core/include/opencv2/core/utils/trace.private.hpp index 1798166..afc4115 100644 --- a/modules/core/include/opencv2/core/utils/trace.private.hpp +++ b/modules/core/include/opencv2/core/utils/trace.private.hpp @@ -9,6 +9,8 @@ #include +#include + #include "trace.hpp" //! @cond IGNORED @@ -332,7 +334,7 @@ public: Mutex mutexCreate; Mutex mutexCount; - TLSData tls; + TLSDataAccumulator tls; cv::Ptr trace_storage; private: diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 22408df..8ebb006 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -909,19 +909,19 @@ bool haveOpenCL() bool useOpenCL() { - CoreTLSData* data = getCoreTlsData().get(); - if( data->useOpenCL < 0 ) + CoreTLSData& data = getCoreTlsData(); + if (data.useOpenCL < 0) { try { - data->useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0; + data.useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0; } catch (...) { - data->useOpenCL = 0; + data.useOpenCL = 0; } } - return data->useOpenCL > 0; + return data.useOpenCL > 0; } #ifdef HAVE_OPENCL @@ -937,14 +937,14 @@ void setUseOpenCL(bool flag) { CV_TRACE_FUNCTION(); - CoreTLSData* data = getCoreTlsData().get(); + CoreTLSData& data = getCoreTlsData(); if (!flag) { - data->useOpenCL = 0; + data.useOpenCL = 0; } else if( haveOpenCL() ) { - data->useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0; + data.useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0; } } @@ -1655,7 +1655,7 @@ size_t Device::profilingTimerResolution() const const Device& Device::getDefault() { const Context& ctx = Context::getDefault(); - int idx = getCoreTlsData().get()->device; + int idx = getCoreTlsData().device; const Device& device = ctx.device(idx); return device; } @@ -2562,9 +2562,10 @@ void attachContext(const String& platformName, void* platformID, void* context, CV_OCL_CHECK(clRetainContext((cl_context)context)); // clear command queue, if any - getCoreTlsData().get()->oclQueue.finish(); + CoreTLSData& data = getCoreTlsData(); + data.oclQueue.finish(); Queue q; - getCoreTlsData().get()->oclQueue = q; + data.oclQueue = q; return; } // attachContext() @@ -2752,7 +2753,7 @@ void* Queue::ptr() const Queue& Queue::getDefault() { - Queue& q = getCoreTlsData().get()->oclQueue; + Queue& q = getCoreTlsData().oclQueue; if( !q.p && haveOpenCL() ) q.create(Context::getDefault()); return q; diff --git a/modules/core/src/ovx.cpp b/modules/core/src/ovx.cpp index d906ead..9685cba 100644 --- a/modules/core/src/ovx.cpp +++ b/modules/core/src/ovx.cpp @@ -76,13 +76,13 @@ bool haveOpenVX() bool useOpenVX() { #ifdef HAVE_OPENVX - CoreTLSData* data = getCoreTlsData().get(); - if( data->useOpenVX < 0 ) + CoreTLSData& data = getCoreTlsData(); + if (data.useOpenVX < 0) { // enabled (if available) by default - data->useOpenVX = haveOpenVX() ? 1 : 0; + data.useOpenVX = haveOpenVX() ? 1 : 0; } - return data->useOpenVX > 0; + return data.useOpenVX > 0; #else return false; #endif @@ -93,8 +93,8 @@ void setUseOpenVX(bool flag) #ifdef HAVE_OPENVX if( haveOpenVX() ) { - CoreTLSData* data = getCoreTlsData().get(); - data->useOpenVX = flag ? 1 : 0; + CoreTLSData& data = getCoreTlsData(); + data.useOpenVX = flag ? 1 : 0; } #else CV_Assert(!flag && "OpenVX support isn't enabled at compile time"); diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index acaee08..0ffde88 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -354,7 +354,7 @@ struct CoreTLSData #endif }; -TLSData& getCoreTlsData(); +CoreTLSData& getCoreTlsData(); #if defined(BUILD_SHARED_LIBS) #if defined _WIN32 || defined WINCE diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp index aa952b2..539f92a 100644 --- a/modules/core/src/rand.cpp +++ b/modules/core/src/rand.cpp @@ -770,7 +770,7 @@ void RNG::fill( InputOutputArray _mat, int disttype, cv::RNG& cv::theRNG() { - return getCoreTlsData().get()->rng; + return getCoreTlsData().rng; } void cv::setRNGSeed(int seed) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 1b67775..b39173d 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -50,6 +50,9 @@ #include +#include +#include + namespace cv { static Mutex* __initialization_mutex = NULL; @@ -1375,6 +1378,8 @@ bool Mutex::trylock() { return impl->trylock(); } //////////////////////////////// thread-local storage //////////////////////////////// +namespace details { + #ifdef _WIN32 #ifdef _MSC_VER #pragma warning(disable:4505) // unreferenced local function has been removed @@ -1436,9 +1441,10 @@ void TlsAbstraction::SetData(void *pData) } #endif #else // _WIN32 +static void opencv_tls_destructor(void* pData); TlsAbstraction::TlsAbstraction() { - CV_Assert(pthread_key_create(&tlsKey, NULL) == 0); + CV_Assert(pthread_key_create(&tlsKey, opencv_tls_destructor) == 0); } TlsAbstraction::~TlsAbstraction() { @@ -1479,42 +1485,46 @@ public: } ~TlsStorage() { - for(size_t i = 0; i < threads.size(); i++) - { - if(threads[i]) - { - /* Current architecture doesn't allow proper global objects release, so this check can cause crashes - - // Check if all slots were properly cleared - for(size_t j = 0; j < threads[i]->slots.size(); j++) - { - CV_Assert(threads[i]->slots[j] == 0); - } - */ - delete threads[i]; - } - } - threads.clear(); + // TlsStorage object should not be released + // There is no reliable way to avoid problems caused by static initialization order fiasco + CV_LOG_FATAL(NULL, "TlsStorage::~TlsStorage() call is not expected"); } - void releaseThread() + void releaseThread(void* tlsValue = NULL) { + ThreadData *pTD = tlsValue == NULL ? (ThreadData*)tls.GetData() : (ThreadData*)tlsValue; + if (pTD == NULL) + return; // no OpenCV TLS data for this thread AutoLock guard(mtxGlobalAccess); - ThreadData *pTD = (ThreadData*)tls.GetData(); - for(size_t i = 0; i < threads.size(); i++) + for (size_t i = 0; i < threads.size(); i++) { - if(pTD == threads[i]) + if (pTD == threads[i]) { - threads[i] = 0; - break; + threads[i] = NULL; + if (tlsValue == NULL) + tls.SetData(0); + std::vector& thread_slots = pTD->slots; + for (size_t slotIdx = 0; slotIdx < thread_slots.size(); slotIdx++) + { + void* pData = thread_slots[slotIdx]; + thread_slots[slotIdx] = NULL; + if (!pData) + continue; + TLSDataContainer* container = tlsSlots[slotIdx].container; + if (container) + container->deleteDataInstance(pData); + else + CV_LOG_ERROR(NULL, "TLS: container for slotIdx=" << slotIdx << " is NULL. Can't release thread data"); + } + delete pTD; + return; } } - tls.SetData(0); - delete pTD; + CV_LOG_WARNING(NULL, "TLS: Can't release thread TLS data (unknown pointer or data race): " << (void*)pTD); } // Reserve TLS storage index - size_t reserveSlot() + size_t reserveSlot(TLSDataContainer* container) { AutoLock guard(mtxGlobalAccess); CV_Assert(tlsSlotsSize == tlsSlots.size()); @@ -1522,15 +1532,15 @@ public: // Find unused slots for(size_t slot = 0; slot < tlsSlotsSize; slot++) { - if(!tlsSlots[slot]) + if (tlsSlots[slot].container == NULL) { - tlsSlots[slot] = 1; + tlsSlots[slot].container = container; return slot; } } // Create new slot - tlsSlots.push_back(1); tlsSlotsSize++; + tlsSlots.push_back(TlsSlotInfo(container)); tlsSlotsSize++; return tlsSlotsSize - 1; } @@ -1555,7 +1565,9 @@ public: } if (!keepSlot) - tlsSlots[slotIdx] = 0; + { + tlsSlots[slotIdx].container = NULL; // mark slot as free (see reserveSlot() implementation) + } } // Get data by TLS storage index @@ -1604,8 +1616,26 @@ public: tls.SetData((void*)threadData); { AutoLock guard(mtxGlobalAccess); - threadData->idx = threads.size(); - threads.push_back(threadData); + + bool found = false; + // Find unused slots + for(size_t slot = 0; slot < threads.size(); slot++) + { + if (threads[slot] == NULL) + { + threadData->idx = (int)slot; + threads[slot] = threadData; + found = true; + break; + } + } + + if (!found) + { + // Create new slot + threadData->idx = threads.size(); + threads.push_back(threadData); + } } } @@ -1622,8 +1652,14 @@ private: Mutex mtxGlobalAccess; // Shared objects operation guard size_t tlsSlotsSize; // equal to tlsSlots.size() in synchronized sections - // without synchronization this counter doesn't desrease - it is used for slotIdx sanity checks - std::vector tlsSlots; // TLS keys state + // without synchronization this counter doesn't decrease - it is used for slotIdx sanity checks + + struct TlsSlotInfo + { + TlsSlotInfo(TLSDataContainer* _container) : container(_container) {} + TLSDataContainer* container; // attached container (to dispose data of terminated threads) + }; + std::vector tlsSlots; // TLS keys state std::vector threads; // Array for all allocated data. Thread data pointers are placed here to allow data cleanup }; @@ -1633,9 +1669,19 @@ static TlsStorage &getTlsStorage() CV_SINGLETON_LAZY_INIT_REF(TlsStorage, new TlsStorage()) } +#ifndef _WIN32 // pthread key destructor +static void opencv_tls_destructor(void* pData) +{ + getTlsStorage().releaseThread(pData); +} +#endif + +} // namespace details +using namespace details; + TLSDataContainer::TLSDataContainer() { - key_ = (int)getTlsStorage().reserveSlot(); // Reserve key from TLS storage + key_ = (int)getTlsStorage().reserveSlot(this); // Reserve key from TLS storage } TLSDataContainer::~TLSDataContainer() @@ -1648,11 +1694,17 @@ void TLSDataContainer::gatherData(std::vector &data) const getTlsStorage().gather(key_, data); } +void TLSDataContainer::detachData(std::vector &data) +{ + getTlsStorage().releaseSlot(key_, data, true); +} + void TLSDataContainer::release() { - std::vector data; - data.reserve(32); - getTlsStorage().releaseSlot(key_, data); // Release key and get stored data for proper destruction + if (key_ == -1) + return; // already released + std::vector data; data.reserve(32); + getTlsStorage().releaseSlot(key_, data, false); // Release key and get stored data for proper destruction key_ = -1; for(size_t i = 0; i < data.size(); i++) // Delete all associated data deleteDataInstance(data[i]); @@ -1660,8 +1712,7 @@ void TLSDataContainer::release() void TLSDataContainer::cleanup() { - std::vector data; - data.reserve(32); + std::vector data; data.reserve(32); getTlsStorage().releaseSlot(key_, data, true); // Extract stored data with removal from TLS tables for(size_t i = 0; i < data.size(); i++) // Delete all associated data deleteDataInstance(data[i]); @@ -1680,11 +1731,16 @@ void* TLSDataContainer::getData() const return pData; } -TLSData& getCoreTlsData() +static TLSData& getCoreTlsDataTLS() { CV_SINGLETON_LAZY_INIT_REF(TLSData, new TLSData()) } +CoreTLSData& getCoreTlsData() +{ + return getCoreTlsDataTLS().getRef(); +} + #if defined CVAPI_EXPORTS && defined _WIN32 && !defined WINCE #ifdef WINRT #pragma warning(disable:4447) // Disable warning 'main' signature found without threading model @@ -2338,12 +2394,12 @@ String getIppVersion() bool useIPP() { #ifdef HAVE_IPP - CoreTLSData* data = getCoreTlsData().get(); - if(data->useIPP < 0) + CoreTLSData& data = getCoreTlsData(); + if (data.useIPP < 0) { - data->useIPP = getIPPSingleton().useIPP; + data.useIPP = getIPPSingleton().useIPP; } - return (data->useIPP > 0); + return (data.useIPP > 0); #else return false; #endif @@ -2351,24 +2407,24 @@ bool useIPP() void setUseIPP(bool flag) { - CoreTLSData* data = getCoreTlsData().get(); + CoreTLSData& data = getCoreTlsData(); #ifdef HAVE_IPP - data->useIPP = (getIPPSingleton().useIPP)?flag:false; + data.useIPP = (getIPPSingleton().useIPP)?flag:false; #else CV_UNUSED(flag); - data->useIPP = false; + data.useIPP = false; #endif } bool useIPP_NotExact() { #ifdef HAVE_IPP - CoreTLSData* data = getCoreTlsData().get(); - if(data->useIPP_NE < 0) + CoreTLSData& data = getCoreTlsData(); + if (data.useIPP_NE < 0) { - data->useIPP_NE = getIPPSingleton().useIPP_NE; + data.useIPP_NE = getIPPSingleton().useIPP_NE; } - return (data->useIPP_NE > 0); + return (data.useIPP_NE > 0); #else return false; #endif @@ -2376,12 +2432,12 @@ bool useIPP_NotExact() void setUseIPP_NotExact(bool flag) { - CoreTLSData* data = getCoreTlsData().get(); + CoreTLSData& data = getCoreTlsData(); #ifdef HAVE_IPP - data->useIPP_NE = flag; + data.useIPP_NE = flag; #else CV_UNUSED(flag); - data->useIPP_NE = false; + data.useIPP_NE = false; #endif } @@ -2407,7 +2463,7 @@ namespace tegra { bool useTegra() { - cv::CoreTLSData* data = cv::getCoreTlsData().get(); + cv::CoreTLSData* data = cv::getCoreTlsData(); if (data->useTegra < 0) { @@ -2423,7 +2479,7 @@ bool useTegra() void setUseTegra(bool flag) { - cv::CoreTLSData* data = cv::getCoreTlsData().get(); + cv::CoreTLSData* data = cv::getCoreTlsData(); data->useTegra = flag; } diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index c31d9e1..58f1f90 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -43,6 +43,8 @@ #include "opencl_kernels_core.hpp" #include "umatrix.hpp" +#include + ///////////////////////////////// UMat implementation /////////////////////////////// namespace cv { diff --git a/modules/core/test/test_utils.cpp b/modules/core/test/test_utils.cpp index c566762..2bae778 100644 --- a/modules/core/test/test_utils.cpp +++ b/modules/core/test/test_utils.cpp @@ -4,6 +4,8 @@ #include "test_precomp.hpp" #include "opencv2/core/utils/logger.hpp" +#include "test_utils_tls.impl.hpp" + namespace opencv_test { namespace { static const char * const keys = diff --git a/modules/core/test/test_utils_tls.impl.hpp b/modules/core/test/test_utils_tls.impl.hpp new file mode 100644 index 0000000..36b8805 --- /dev/null +++ b/modules/core/test/test_utils_tls.impl.hpp @@ -0,0 +1,134 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// This is .hpp file included from test_utils.cpp + +#ifdef CV_CXX11 +#include // std::thread +#endif + +#include "opencv2/core/utils/tls.hpp" + +namespace opencv_test { namespace { + +class TLSReporter +{ +public: + static int g_last_id; + static int g_allocated; + + int id; + + TLSReporter() + { + id = CV_XADD(&g_last_id, 1); + CV_XADD(&g_allocated, 1); + } + ~TLSReporter() + { + CV_XADD(&g_allocated, -1); + } +}; + +int TLSReporter::g_last_id = 0; +int TLSReporter::g_allocated = 0; + +#ifdef CV_CXX11 + +template +static void callNThreadsWithTLS(int N, TLSData& tls) +{ + std::vector threads(N); + for (int i = 0; i < N; i++) + { + threads[i] = std::thread([&]() { + TLSReporter* pData = tls.get(); + (void)pData; + }); + } + for (int i = 0; i < N; i++) + { + threads[i].join(); + } + threads.clear(); +} + +TEST(Core_TLS, HandleThreadTermination) +{ + const int init_id = TLSReporter::g_last_id; + const int init_allocated = TLSReporter::g_allocated; + + const int N = 4; + TLSData tls; + + // use TLS + ASSERT_NO_THROW(callNThreadsWithTLS(N, tls)); + + EXPECT_EQ(init_id + N, TLSReporter::g_last_id); + EXPECT_EQ(init_allocated + 0, TLSReporter::g_allocated); +} + + +static void testTLSAccumulator(bool detachFirst) +{ + const int init_id = TLSReporter::g_last_id; + const int init_allocated = TLSReporter::g_allocated; + + const int N = 4; + TLSDataAccumulator tls; + + { // empty TLS checks + std::vector& data0 = tls.detachData(); + EXPECT_EQ((size_t)0, data0.size()); + tls.cleanupDetachedData(); + } + + // use TLS + ASSERT_NO_THROW(callNThreadsWithTLS(N, tls)); + + EXPECT_EQ(init_id + N, TLSReporter::g_last_id); + EXPECT_EQ(init_allocated + N, TLSReporter::g_allocated); + + if (detachFirst) + { + std::vector& data1 = tls.detachData(); + EXPECT_EQ((size_t)N, data1.size()); + + // no data through gather after detachData() + std::vector data2; + tls.gather(data2); + EXPECT_EQ((size_t)0, data2.size()); + + tls.cleanupDetachedData(); + + EXPECT_EQ(init_id + N, TLSReporter::g_last_id); + EXPECT_EQ(init_allocated + 0, TLSReporter::g_allocated); + EXPECT_EQ((size_t)0, data1.size()); + } + else + { + std::vector data2; + tls.gather(data2); + EXPECT_EQ((size_t)N, data2.size()); + + std::vector& data1 = tls.detachData(); + EXPECT_EQ((size_t)N, data1.size()); + + tls.cleanupDetachedData(); + + EXPECT_EQ((size_t)0, data1.size()); + // data2 is not empty, but it has invalid contents + EXPECT_EQ((size_t)N, data2.size()); + } + + EXPECT_EQ(init_id + N, TLSReporter::g_last_id); + EXPECT_EQ(init_allocated + 0, TLSReporter::g_allocated); +} + +TEST(Core_TLS, AccumulatorHoldData_detachData) { testTLSAccumulator(true); } +TEST(Core_TLS, AccumulatorHoldData_gather) { testTLSAccumulator(false); } + +#endif + +}} // namespace diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp index d4ff218..64a5c61 100644 --- a/modules/imgproc/src/histogram.cpp +++ b/modules/imgproc/src/histogram.cpp @@ -45,6 +45,8 @@ #include "opencv2/core/openvx/ovx_defs.hpp" +#include "opencv2/core/utils/tls.hpp" + namespace cv { -- 2.7.4