Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / inference_engine / ie_profiling.hpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #pragma once
6
7 #ifndef NOMINMAX
8 #define NOMINMAX
9 #endif
10
11 #include <chrono>
12 #include <utility>
13 #include <unordered_map>
14 #include <deque>
15 #include <cmath>
16 #include <iostream>
17 #include <iomanip>
18 #include <string>
19 #include <limits>
20 #include <mutex>
21 #include <cfloat>
22
23 #if ENABLE_PROFILING_ITT
24 #include <ittnotify.h>
25 #endif
26
27 namespace InferenceEngine {
28
29 template< typename Static, typename Block>
30 void annotateBegin(Static&& static_, Block&& block_);
31
32 template< typename Static, typename Block>
33 void annotateEnd(Static&& static_, Block&& block_);
34
35 template< typename Static, typename Block, typename Local>
36 struct Annotate {
37     struct Static_ {
38         template<std::size_t...> struct idx{};
39
40         template<std::size_t N, std::size_t... S> struct idx<N, S...> : idx<N-1, N-1, S...> {};
41
42         template<std::size_t... S> struct idx<0, S...> {
43             using type = idx<S...>;
44         };
45
46         template<typename ArgTuple, std::size_t ...I>
47         Static_(ArgTuple&& arg_tuple, idx<I...>)
48         : static_{std::get<I>(std::forward<ArgTuple>(arg_tuple))...} {}
49
50         template<typename ArgTuple>
51         explicit Static_(ArgTuple&& arg_tuple)
52         : Static_{std::forward<ArgTuple>(arg_tuple), typename idx< std::tuple_size<ArgTuple>::value >::type{}} {}
53
54         Static static_;
55     };
56
57     static Static_ static_;
58
59     Block   block_;
60
61     Annotate(const Annotate&)               = delete;
62     Annotate& operator=(const Annotate&)    = delete;
63     Annotate(Annotate&&)                    = default;
64     Annotate& operator=(Annotate&&)         = default;
65
66     template<typename ...Ts>
67     inline explicit Annotate(Ts&& ...xs)
68     : block_{std::forward<Ts>(xs)...} {
69         annotateBegin(static_.static_, block_);
70     }
71
72     inline ~Annotate() {
73         annotateEnd(static_.static_, block_);
74     }
75 };
76
77 template< typename Static, typename Block, typename Local>
78 typename Annotate< Static, Block, Local >::Static_ Annotate< Static, Block, Local >::static_(Local::static_args());
79
80 #define IE_ANNOTATE_CONCAT(x, y) IE_ANNOTATE_CONCAT_EVAL(x, y)
81 #define IE_ANNOTATE_CONCAT_EVAL(x, y)  x ## y
82
83 #define IE_ANNOTATE_UNPACK(tuple) IE_ANNOTATE_UNPACK_EVAL tuple
84 #define IE_ANNOTATE_UNPACK_EVAL(...)  __VA_ARGS__
85
86 #define IE_ANNOTATE_MAKE_NAME(lib_name, postfix)        \
87     IE_ANNOTATE_CONCAT(                                 \
88         IE_ANNOTATE_CONCAT(                             \
89             IE_ANNOTATE_CONCAT(__intel_util_annotate_,  \
90                                     lib_name),          \
91             postfix),                                   \
92     __LINE__)
93
94 #define IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name)   IE_ANNOTATE_MAKE_NAME(lib_name, _ctx)
95 #define IE_ANNOTATE_VARIABLE_NAME(lib_name)     IE_ANNOTATE_MAKE_NAME(lib_name, _variable)
96 #define IE_ANNOTATE_FUNC_NAME(lib_name)         IE_ANNOTATE_MAKE_NAME(lib_name, _func)
97
98 #define IE_ANNOTATE_MAKE_SCOPE_TYPE(lib_name, static_type, block_type, make_static_args_tuple)  \
99     struct IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) :                                              \
100         ::InferenceEngine::Annotate<                                                            \
101             static_type,                                                                        \
102             block_type,                                                                         \
103             IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) > {                                           \
104             using ::InferenceEngine::Annotate<                                                  \
105                 static_type,                                                                    \
106                 block_type,                                                                     \
107                 IE_ANNOTATE_LOCAL_TYPE_NAME(lib_name) >::Annotate;                              \
108             static auto static_args()                                                           \
109             ->decltype(std::make_tuple(IE_ANNOTATE_UNPACK(make_static_args_tuple))) {           \
110                 return std::make_tuple(IE_ANNOTATE_UNPACK(make_static_args_tuple));}            \
111         }
112
113 #define IE_ANNOTATE_MAKE_SCOPE(lib_name, static_type, block_type, make_static_args_tuple, make_block_args_tuple)    \
114     IE_ANNOTATE_MAKE_SCOPE_TYPE(lib_name, static_type, block_type, make_static_args_tuple)                          \
115     IE_ANNOTATE_VARIABLE_NAME(lib_name){IE_ANNOTATE_UNPACK(make_block_args_tuple)};
116
117 #if ENABLE_PROFILING_ITT
118 struct IttTaskHandles {
119     __itt_domain*        const domain;
120     __itt_string_handle* const handle;
121
122     explicit IttTaskHandles(const char* task_name)
123     : domain{ __itt_domain_create("InferenceEngine") }
124     , handle{ __itt_string_handle_create(task_name) } {}
125 };
126
127 struct IttBlock{};
128
129 inline static void annotateBegin(IttTaskHandles& h, IttBlock&) {
130     __itt_task_begin(h.domain, __itt_null, __itt_null, h.handle);
131 }
132
133 inline static void annotateEnd(IttTaskHandles& h, IttBlock&) {
134     __itt_task_end(h.domain);
135 }
136
137 #define IE_ITT_SCOPE(task_name)                                 \
138     IE_ANNOTATE_MAKE_SCOPE(InferenceEngineItt,                  \
139                            ::InferenceEngine::IttTaskHandles,   \
140                            ::InferenceEngine::IttBlock,         \
141                            (task_name), ())
142 #else
143     #define IE_ITT_SCOPE(task_name)
144 #endif
145
146 class TimeResultsMap {
147 protected:
148     std::unordered_map<std::string, std::deque<double> > m_map;
149     std::mutex mutex;
150
151 public:
152     inline void add(const  std::string& name, double val) {
153         std::unique_lock<std::mutex> lock(mutex);
154         m_map[name].push_back(val);
155     }
156
157     inline ~TimeResultsMap() {
158         for (auto && iter : m_map) {
159             const size_t num = iter.second.size();
160             double valSum = 0, valMin = (std::numeric_limits<double>::max)(), valMax = std::numeric_limits<double>::lowest(), logSum = 0;
161             int logCount = 0;
162             for (auto val : iter.second) {
163                 if (val > 0) {
164                     logCount++;
165                     logSum += std::log(val);
166                 }
167                 valSum += val;
168                 valMin = std::fmin(val, valMin);
169                 valMax = std::fmax(val, valMax);
170             }
171
172             std::cout << std::setw(20) << iter.first << " collected by " << std::setw(8) << num << " samples, ";
173             std::cout << "mean " << std::setw(12) << (valSum / num)/1000000 << " ms, ";
174             std::cout << "geomean " << std::setw(12) << (logCount ? std::exp(logSum / logCount) : 0)/1000000 << " ms, ";
175             std::cout << "min " << std::setw(12) << valMin/1000000 << " ms, ";
176             std::cout << "max " << std::setw(12) << valMax/1000000 << " ms" << std::endl;
177         }
178     }
179 };
180
181 struct TimeSampler {
182     using Clock = std::chrono::high_resolution_clock;
183
184     std::string name;
185
186     Clock::time_point t;
187 };
188
189 inline static void annotateBegin(TimeResultsMap&, TimeSampler& t) {
190     t.t = TimeSampler::Clock::now();
191 }
192
193 inline static void annotateEnd(TimeResultsMap& m, TimeSampler& t) {
194     m.add(t.name, std::chrono::duration_cast<std::chrono::nanoseconds>(TimeSampler::Clock::now() - t.t).count());
195 }
196
197 #if ENABLE_PROFILING_RAW
198     #define IE_TIMER_SCOPE(timerName)           \
199         IE_ANNOTATE_MAKE_SCOPE(                 \
200             InferenceEngineTimer,               \
201             ::InferenceEngine::TimeResultsMap,  \
202             ::InferenceEngine::TimeSampler,     \
203             (),                                 \
204             (timerName))
205 #else
206     #define IE_TIMER_SCOPE(timerName)
207 #endif
208
209 #define IE_STR(x) IE_STR_(x)
210 #define IE_STR_(x) #x
211
212 #define IE_PROFILING_AUTO_SCOPE(NAME) IE_ITT_SCOPE(IE_STR(NAME)); IE_TIMER_SCOPE(IE_STR(NAME))
213
214 struct ProfilingTask {
215     std::string name;
216
217 #if ENABLE_PROFILING_ITT
218     __itt_domain*        domain;
219     __itt_string_handle* handle;
220 #endif
221
222     ProfilingTask() = default;
223     ProfilingTask(const ProfilingTask&) = default;
224
225     inline explicit ProfilingTask(const std::string& task_name)
226     : name(task_name)
227 #if ENABLE_PROFILING_ITT
228     , domain(__itt_domain_create("InferenceEngine"))
229     , handle(__itt_string_handle_create(task_name.c_str()))
230 #endif
231     {}
232 };
233
234 struct IttStatic{};
235
236 struct IttProfilingTask {
237     ProfilingTask& t;
238 };
239
240 inline static void annotateBegin(IttStatic&, IttProfilingTask& t) {
241 #if ENABLE_PROFILING_ITT
242     __itt_task_begin(t.t.domain, __itt_null, __itt_null, t.t.handle);
243 #endif
244 }
245
246 inline static void annotateEnd(IttStatic&, IttProfilingTask& t) {
247 #if ENABLE_PROFILING_ITT
248     __itt_task_end(t.t.domain);
249 #endif
250 }
251
252 #if ENABLE_PROFILING_ITT
253     #define IE_ITT_TASK_SCOPE(profilingTask)            \
254         IE_ANNOTATE_MAKE_SCOPE(                         \
255             InferenceEngineIttScopeTask,                \
256             ::InferenceEngine::IttStatic,               \
257             ::InferenceEngine::IttProfilingTask,        \
258             (),                                         \
259             (profilingTask))
260 #else
261     #define IE_ITT_TASK_SCOPE(profiling_task)
262 #endif
263
264 #define IE_PROFILING_AUTO_SCOPE_TASK(PROFILING_TASK) IE_ITT_TASK_SCOPE(PROFILING_TASK); IE_TIMER_SCOPE(PROFILING_TASK.name)
265
266 inline static void anotateSetThreadName(const char* name) {
267     #if ENABLE_PROFILING_ITT
268     __itt_thread_set_name(name);
269     #endif
270     // to suppress "unused" warning
271     (void)(name);
272 }
273 }  // namespace InferenceEngine