1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
13 #include <unordered_map>
23 #if ENABLE_PROFILING_ITT
24 #include <ittnotify.h>
27 namespace InferenceEngine {
29 template< typename Static, typename Block>
30 void annotateBegin(Static&& static_, Block&& block_);
32 template< typename Static, typename Block>
33 void annotateEnd(Static&& static_, Block&& block_);
35 template< typename Static, typename Block, typename Local>
38 template<std::size_t...> struct idx{};
40 template<std::size_t N, std::size_t... S> struct idx<N, S...> : idx<N-1, N-1, S...> {};
42 template<std::size_t... S> struct idx<0, S...> {
43 using type = idx<S...>;
46 template<typename ArgTuple, std::size_t ...I>
47 Static_(ArgTuple&& arg_tuple, idx<I...>)
48 : static_{std::get<I>(std::forward<ArgTuple>(arg_tuple))...} {}
50 template<typename ArgTuple>
51 explicit Static_(ArgTuple&& arg_tuple)
52 : Static_{std::forward<ArgTuple>(arg_tuple), typename idx< std::tuple_size<ArgTuple>::value >::type{}} {}
57 static Static_ static_;
61 Annotate(const Annotate&) = delete;
62 Annotate& operator=(const Annotate&) = delete;
63 Annotate(Annotate&&) = default;
64 Annotate& operator=(Annotate&&) = default;
66 template<typename ...Ts>
67 inline explicit Annotate(Ts&& ...xs)
68 : block_{std::forward<Ts>(xs)...} {
69 annotateBegin(static_.static_, block_);
73 annotateEnd(static_.static_, block_);
77 template< typename Static, typename Block, typename Local>
78 typename Annotate< Static, Block, Local >::Static_ Annotate< Static, Block, Local >::static_(Local::static_args());
80 #define IE_ANNOTATE_CONCAT(x, y) IE_ANNOTATE_CONCAT_EVAL(x, y)
81 #define IE_ANNOTATE_CONCAT_EVAL(x, y) x ## y
83 #define IE_ANNOTATE_UNPACK(tuple) IE_ANNOTATE_UNPACK_EVAL tuple
84 #define IE_ANNOTATE_UNPACK_EVAL(...) __VA_ARGS__
86 #define IE_ANNOTATE_MAKE_NAME(lib_name, postfix) \
89 IE_ANNOTATE_CONCAT(__intel_util_annotate_, \
94 #define IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) IE_ANNOTATE_MAKE_NAME(lib_name, _ctx)
95 #define IE_ANNOTATE_VARIABLE_NAME(lib_name) IE_ANNOTATE_MAKE_NAME(lib_name, _variable)
96 #define IE_ANNOTATE_FUNC_NAME(lib_name) IE_ANNOTATE_MAKE_NAME(lib_name, _func)
98 #define IE_ANNOTATE_MAKE_SCOPE_TYPE(lib_name, static_type, block_type, make_static_args_tuple) \
99 struct IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) : \
100 ::InferenceEngine::Annotate< \
103 IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) > { \
104 using ::InferenceEngine::Annotate< \
107 IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) >::Annotate; \
108 static auto static_args() \
109 ->decltype(std::make_tuple(IE_ANNOTATE_UNPACK(make_static_args_tuple))) { \
110 return std::make_tuple(IE_ANNOTATE_UNPACK(make_static_args_tuple));} \
113 #define IE_ANNOTATE_MAKE_SCOPE(lib_name, static_type, block_type, make_static_args_tuple, make_block_args_tuple) \
114 IE_ANNOTATE_MAKE_SCOPE_TYPE(lib_name, static_type, block_type, make_static_args_tuple) \
115 IE_ANNOTATE_VARIABLE_NAME(lib_name){IE_ANNOTATE_UNPACK(make_block_args_tuple)};
117 #if ENABLE_PROFILING_ITT
118 struct IttTaskHandles {
119 __itt_domain* const domain;
120 __itt_string_handle* const handle;
122 explicit IttTaskHandles(const char* task_name)
123 : domain{ __itt_domain_create("InferenceEngine") }
124 , handle{ __itt_string_handle_create(task_name) } {}
129 inline static void annotateBegin(IttTaskHandles& h, IttBlock&) {
130 __itt_task_begin(h.domain, __itt_null, __itt_null, h.handle);
133 inline static void annotateEnd(IttTaskHandles& h, IttBlock&) {
134 __itt_task_end(h.domain);
137 #define IE_ITT_SCOPE(task_name) \
138 IE_ANNOTATE_MAKE_SCOPE(InferenceEngineItt, \
139 ::InferenceEngine::IttTaskHandles, \
140 ::InferenceEngine::IttBlock, \
143 #define IE_ITT_SCOPE(task_name)
146 class TimeResultsMap {
148 std::unordered_map<std::string, std::deque<double> > m_map;
152 inline void add(const std::string& name, double val) {
153 std::unique_lock<std::mutex> lock(mutex);
154 m_map[name].push_back(val);
157 inline ~TimeResultsMap() {
158 for (auto && iter : m_map) {
159 const size_t num = iter.second.size();
160 double valSum = 0, valMin = (std::numeric_limits<double>::max)(), valMax = std::numeric_limits<double>::lowest(), logSum = 0;
162 for (auto val : iter.second) {
165 logSum += std::log(val);
168 valMin = std::fmin(val, valMin);
169 valMax = std::fmax(val, valMax);
172 std::cout << std::setw(20) << iter.first << " collected by " << std::setw(8) << num << " samples, ";
173 std::cout << "mean " << std::setw(12) << (valSum / num)/1000000 << " ms, ";
174 std::cout << "geomean " << std::setw(12) << (logCount ? std::exp(logSum / logCount) : 0)/1000000 << " ms, ";
175 std::cout << "min " << std::setw(12) << valMin/1000000 << " ms, ";
176 std::cout << "max " << std::setw(12) << valMax/1000000 << " ms" << std::endl;
182 using Clock = std::chrono::high_resolution_clock;
189 inline static void annotateBegin(TimeResultsMap&, TimeSampler& t) {
190 t.t = TimeSampler::Clock::now();
193 inline static void annotateEnd(TimeResultsMap& m, TimeSampler& t) {
194 m.add(t.name, std::chrono::duration_cast<std::chrono::nanoseconds>(TimeSampler::Clock::now() - t.t).count());
197 #if ENABLE_PROFILING_RAW
198 #define IE_TIMER_SCOPE(timerName) \
199 IE_ANNOTATE_MAKE_SCOPE( \
200 InferenceEngineTimer, \
201 ::InferenceEngine::TimeResultsMap, \
202 ::InferenceEngine::TimeSampler, \
206 #define IE_TIMER_SCOPE(timerName)
209 #define IE_STR(x) IE_STR_(x)
210 #define IE_STR_(x) #x
212 #define IE_PROFILING_AUTO_SCOPE(NAME) IE_ITT_SCOPE(IE_STR(NAME)); IE_TIMER_SCOPE(IE_STR(NAME))
214 struct ProfilingTask {
217 #if ENABLE_PROFILING_ITT
218 __itt_domain* domain;
219 __itt_string_handle* handle;
222 ProfilingTask() = default;
223 ProfilingTask(const ProfilingTask&) = default;
225 inline explicit ProfilingTask(const std::string& task_name)
227 #if ENABLE_PROFILING_ITT
228 , domain(__itt_domain_create("InferenceEngine"))
229 , handle(__itt_string_handle_create(task_name.c_str()))
236 struct IttProfilingTask {
240 inline static void annotateBegin(IttStatic&, IttProfilingTask& t) {
241 #if ENABLE_PROFILING_ITT
242 __itt_task_begin(t.t.domain, __itt_null, __itt_null, t.t.handle);
246 inline static void annotateEnd(IttStatic&, IttProfilingTask& t) {
247 #if ENABLE_PROFILING_ITT
248 __itt_task_end(t.t.domain);
252 #if ENABLE_PROFILING_ITT
253 #define IE_ITT_TASK_SCOPE(profilingTask) \
254 IE_ANNOTATE_MAKE_SCOPE( \
255 InferenceEngineIttScopeTask, \
256 ::InferenceEngine::IttStatic, \
257 ::InferenceEngine::IttProfilingTask, \
261 #define IE_ITT_TASK_SCOPE(profiling_task)
264 #define IE_PROFILING_AUTO_SCOPE_TASK(PROFILING_TASK) IE_ITT_TASK_SCOPE(PROFILING_TASK); IE_TIMER_SCOPE(PROFILING_TASK.name)
266 inline static void anotateSetThreadName(const char* name) {
267 #if ENABLE_PROFILING_ITT
268 __itt_thread_set_name(name);
270 // to suppress "unused" warning
273 } // namespace InferenceEngine