1 // SPDX-License-Identifier: Apache-2.0
3 * Copyright (C) 2020 Jihoon Lee <jhoon.it.lee@samsung.com>
6 * @date 09 December 2020
7 * @brief Profiler related codes to be used to benchmark things
8 * @see https://github.com/nnstreamer/nntrainer
9 * @author Jihoon Lee <jhoon.it.lee@samsung.com>
10 * @bug No known bugs except for NYI items
22 #include <nntrainer_error.h>
23 #include <nntrainer_log.h>
29 void GenericProfileListener::onNotifyTimeEvent(
30 PROFILE_EVENT event, const int time_item, const std::string &str,
31 const std::chrono::microseconds &duration) {
32 auto time_iter = time_taken.find(time_item);
34 if (time_iter == time_taken.end()) {
35 reset(time_item, str);
36 time_iter = time_taken.find(time_item);
37 if (time_iter == time_taken.end()) {
38 throw std::runtime_error("Couldn't find time_iter.");
42 auto &cnt_ = std::get<GenericProfileListener::CNT>(time_iter->second);
48 auto &cur_ = std::get<GenericProfileListener::CUR>(time_iter->second);
49 auto &min_ = std::get<GenericProfileListener::MIN>(time_iter->second);
50 auto &max_ = std::get<GenericProfileListener::MAX>(time_iter->second);
51 auto &sum_ = std::get<GenericProfileListener::SUM>(time_iter->second);
54 min_ = std::min(min_, duration);
55 max_ = std::max(max_, duration);
59 void GenericProfileListener::onNotifyMemoryEvent(
60 PROFILE_EVENT event, const size_t alloc_current, const size_t alloc_total,
61 const std::string &str, const std::chrono::microseconds &duration,
62 const std::string &cache_policy, bool cache_swap) {
64 if (event != EVENT_MEM_ANNOTATE) {
65 mem_max = std::max(mem_max, alloc_total);
66 mem_sum += alloc_total;
67 mem_average = mem_sum / ++mem_count;
70 mem_taken.emplace_back(event, alloc_current, alloc_total, str, duration,
71 cache_policy, cache_swap);
74 void GenericProfileListener::notify(
75 PROFILE_EVENT event, const std::shared_ptr<ProfileEventData> data) {
77 case EVENT_TIME_START:
78 /* ignore start time. we only consider duration of item */
81 onNotifyTimeEvent(event, data->time_item, data->event_str, data->duration);
84 case EVENT_MEM_DEALLOC:
85 case EVENT_MEM_ANNOTATE:
86 onNotifyMemoryEvent(event, data->alloc_current, data->alloc_total,
87 data->event_str, data->duration, data->cache_policy,
91 throw std::runtime_error("Invalid PROFILE_EVENT");
96 void GenericProfileListener::reset(const int time_item,
97 const std::string &name) {
98 time_taken[time_item] = std::make_tuple(
99 std::chrono::microseconds{0}, std::chrono::microseconds::max(),
100 std::chrono::microseconds::min(), std::chrono::microseconds{0}, int{0});
101 names[time_item] = name;
104 const std::chrono::microseconds
105 GenericProfileListener::result(const int time_item) {
106 auto iter = time_taken.find(time_item);
108 if (iter == time_taken.end() ||
109 std::get<GenericProfileListener::CNT>(iter->second) == 0) {
110 std::stringstream ss;
111 ss << "time_item has never recorded: " << names[time_item];
112 throw std::invalid_argument("time_item has never recorded");
115 return std::get<GenericProfileListener::CUR>(iter->second);
118 void GenericProfileListener::report(std::ostream &out) const {
119 std::vector<unsigned int> column_size = {20, 23, 23, 23, 23, 23, 23, 23};
121 for (auto &[item, time] : time_taken) {
122 auto title = names.find(item);
123 if (title == names.end())
124 throw std::runtime_error("Couldn't find name. it's already removed.");
126 std::max(column_size[0], static_cast<unsigned int>(title->second.size()));
128 auto total_col_size =
129 std::accumulate(column_size.begin(), column_size.end(), 0);
132 out << "warm up: " << warmups << '\n';
134 auto end = std::chrono::steady_clock::now();
136 std::chrono::duration_cast<std::chrono::microseconds>(end - start_time);
138 out << "profiled for " << duration.count() << '\n';
142 out << std::setw(column_size[0]) << "key"
143 << std::setw(column_size[1]) << "avg"
144 << std::setw(column_size[2]) << "min"
145 << std::setw(column_size[3]) << "max"
146 << std::setw(column_size[4]) << "sum"
147 << std::setw(column_size[5]) << "pct" << '\n';
151 out << std::string(total_col_size, '=') << '\n';
153 std::map<int, std::function<void(std::ostream & out)>> ordered_report;
155 /// calculate metrics while skipping warmups
156 for (auto &time : time_taken) {
157 auto func = [&](std::ostream &out_) {
158 auto &cnt_ = std::get<GenericProfileListener::CNT>(time.second);
159 auto &min_ = std::get<GenericProfileListener::MIN>(time.second);
160 auto &max_ = std::get<GenericProfileListener::MAX>(time.second);
161 auto &sum_ = std::get<GenericProfileListener::SUM>(time.second);
163 auto title = names.find(time.first);
165 if (title == names.end())
166 throw std::runtime_error("Couldn't find name. it's already removed.");
169 if (warmups >= cnt_) {
170 out_ << std::left << std::setw(total_col_size) << title->second
171 << "less data then warmup\n";
173 << std::right; // Restore outputstream adjustflag to standard stream
177 out_.setf(std::ios::fixed);
178 std::streamsize default_precision = out_.precision(2);
180 out_ << std::setw(column_size[0]) << title->second
181 << std::setw(column_size[1]) << sum_.count() / (cnt_ - warmups)
182 << std::setw(column_size[2]) << min_.count()
183 << std::setw(column_size[3]) << max_.count()
184 << std::setw(column_size[4]) << sum_.count()
185 << std::setw(column_size[5]) << sum_.count() / (double)duration.count() * 100 << '\n';
187 out_.precision(default_precision);
188 out_.unsetf(std::ios::fixed);
190 ordered_report[-time.first] = func;
193 for (auto &entry : ordered_report)
196 ordered_report.clear();
199 column_size = {30, 20, 20, 60, 10, 20, 10};
201 out << std::string(total_col_size, '=') << '\n';
205 out << std::setw(column_size[0]) << "event"
206 << std::setw(column_size[1]) << "size"
207 << std::setw(column_size[2]) << "total"
208 << std::setw(column_size[3]) << "info"
209 << std::setw(column_size[4]) << "dur"
210 << std::setw(column_size[5]) << "policy"
211 << std::setw(column_size[6]) << "swap"
214 out << std::string(total_col_size, '=') << std::endl;
217 for (auto &mem : mem_taken) {
218 auto func = [&](std::ostream &out_) {
219 auto &event = std::get<PROFILE_EVENT>(mem);
220 auto &cur = std::get<1>(mem);
221 auto &total = std::get<2>(mem);
222 auto &info = std::get<3>(mem);
223 auto &dur = std::get<std::chrono::microseconds>(mem);
224 auto &policy = std::get<5>(mem);
225 auto &swap = std::get<bool>(mem);
227 out_.setf(std::ios::fixed);
228 out_.setf(std::ios::right);
229 std::streamsize default_precision = out_.precision(2);
231 if (event == EVENT_MEM_ANNOTATE) {
232 out_ << info << std::endl;
234 out_ << std::setw(column_size[0]) << ((event == EVENT_MEM_ALLOC) ? "ALLOC" :
235 (event == EVENT_MEM_DEALLOC) ? "DEALLOC" : "")
236 << std::setw(column_size[1]) << std::to_string(cur)
237 << std::setw(column_size[2]) << std::to_string(total)
238 << std::setw(column_size[3]) << info
239 << std::setw(column_size[4]) << ((event == EVENT_MEM_DEALLOC) ? std::to_string(dur.count()) : "")
240 << std::setw(column_size[5]) << policy
241 << std::setw(column_size[6]) << (swap ? ((event == EVENT_MEM_ALLOC) ? "IN" :
242 (event == EVENT_MEM_DEALLOC) ? "OUT" : "") : "")
246 out_.precision(default_precision);
247 out_.unsetf(std::ios::fixed);
249 ordered_report[order++] = func;
252 for (auto &entry : ordered_report)
255 out << "Max Memory Size = " << mem_max << std::endl;
256 out << "Average Memory Size = " << mem_average << std::endl;
259 Profiler &Profiler::Global() {
260 static Profiler instance;
264 void Profiler::start(const int item) {
266 /// @todo: consider race condition
267 if (time_item_times.find(item) != time_item_times.end())
268 throw std::invalid_argument("profiler has already started");
271 auto name = time_item_names.find(item);
272 if (name == time_item_names.end())
273 throw std::invalid_argument("the item is not registered");
275 time_item_times[item] = std::chrono::steady_clock::now();
277 auto data = std::make_shared<ProfileEventData>(item, 0, 0, name->second,
278 std::chrono::microseconds(0));
279 notifyListeners(EVENT_TIME_START, data);
282 void Profiler::end(const int item) {
283 /// @todo: consider race condition
284 auto end = std::chrono::steady_clock::now();
287 if (time_item_times.find(item) == time_item_times.end())
288 throw std::invalid_argument("profiler hasn't started with the item");
291 auto name = time_item_names.find(item);
292 if (name == time_item_names.end())
293 throw std::invalid_argument("the item is not registered");
295 auto start = time_item_times[item];
297 std::chrono::duration_cast<std::chrono::microseconds>(end - start);
299 std::make_shared<ProfileEventData>(item, 0, 0, name->second, duration);
301 notifyListeners(EVENT_TIME_END, data);
303 time_item_times.erase(item);
306 void Profiler::notifyListeners(PROFILE_EVENT event,
307 const std::shared_ptr<ProfileEventData> data) {
308 std::lock_guard<std::mutex> lock(listeners_mutex);
310 for (auto &l : time_item_listeners[data->time_item])
311 l->notify(event, data);
313 for (auto &l : listeners)
314 l->notify(event, data);
317 void Profiler::subscribe(std::shared_ptr<ProfileListener> listener,
318 const std::set<int> &time_items) {
319 if (listener == nullptr) {
320 throw std::invalid_argument("listener is null!");
323 std::lock_guard<std::mutex> lock(listeners_mutex);
324 if (time_items.empty()) {
325 listeners.insert(listener);
327 for (auto item : time_items)
328 time_item_listeners[item].insert(listener);
332 void Profiler::unsubscribe(std::shared_ptr<ProfileListener> listener) {
333 std::lock_guard<std::mutex> lock(listeners_mutex);
334 listeners.erase(listener);
336 for (auto &[item, listeners] : time_item_listeners) {
337 auto found = listeners.find(listener);
338 if (found != listeners.end())
339 listeners.erase(found);
343 int Profiler::registerTimeItem(const std::string &name) {
344 std::lock_guard<std::mutex> lock(registr_mutex);
346 int item = time_item_names.size() + 1;
348 time_item_names[item] = name;
349 time_item_listeners[item] = std::set<std::shared_ptr<ProfileListener>>();
351 ml_logd("[Profiler] Event registered, time item name: %s key: %d",
357 void Profiler::alloc(const void *ptr, size_t size, const std::string &str,
358 const std::string &policy, bool swap) {
359 std::lock_guard<std::mutex> lock(allocates_mutex);
362 auto found = allocates.find(ptr);
363 if (found != allocates.end())
364 throw std::invalid_argument("memory profiler is already allocated");
367 allocates[ptr] = std::tuple<size_t, timepoint, std::string>(
368 size, std::chrono::steady_clock::now(), str);
372 auto data = std::make_shared<ProfileEventData>(
373 0, size, total_size.load(), str, std::chrono::microseconds(0), policy,
375 notifyListeners(EVENT_MEM_ALLOC, data);
378 void Profiler::dealloc(const void *ptr, const std::string &policy, bool swap) {
379 std::lock_guard<std::mutex> lock(allocates_mutex);
381 auto end = std::chrono::steady_clock::now();
382 auto found = allocates.find(ptr);
384 if (found == allocates.end())
385 throw std::invalid_argument("memory profiler didn't allocated");
387 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
388 end - std::get<timepoint>(found->second));
390 auto size = std::get<size_t>(found->second);
393 auto str = std::get<std::string>(found->second);
394 auto data = std::make_shared<ProfileEventData>(0, size, total_size.load(),
395 str, duration, policy, swap);
397 notifyListeners(EVENT_MEM_DEALLOC, data);
399 allocates.erase(found);
402 void Profiler::annotate(const std::string &str) {
403 std::lock_guard<std::mutex> lock(allocates_mutex);
405 auto data = std::make_shared<ProfileEventData>(0, 0, 0, str,
406 std::chrono::microseconds(0));
408 notifyListeners(EVENT_MEM_ANNOTATE, data);
411 } // namespace profile
413 } // namespace nntrainer