From b33ef665cd7dc4b12220048f705930a0a12047f8 Mon Sep 17 00:00:00 2001 From: Jiho Chu Date: Mon, 30 May 2022 13:56:38 +0900 Subject: [PATCH] [Utils] Add Memory profile feature to Profiler This patch implement memory profiling feature. Profile is refactored to handle memory statics information. Unnecessary dependency from ProfileListener to Profile class is removed, and inner information is redesigned to handle both time and memory profiling. For the memory profile, below infomation is managed: event: ALLOC | DEALLOC current size: total allocated memory size info: user friendly tag for the analysis duration: time interval between alloc and dealloc Signed-off-by: Jiho Chu --- Applications/MNIST/jni/main.cpp | 7 +- nntrainer/graph/network_graph.cpp | 10 +- nntrainer/layers/layer_node.cpp | 20 ++- nntrainer/models/neuralnet.cpp | 6 +- nntrainer/tensor/memory_pool.cpp | 15 +- nntrainer/utils/profiler.cpp | 341 +++++++++++++++++++++++--------------- nntrainer/utils/profiler.h | 267 +++++++++++++++++------------ 7 files changed, 402 insertions(+), 264 deletions(-) diff --git a/Applications/MNIST/jni/main.cpp b/Applications/MNIST/jni/main.cpp index 89c8c77..c3a7c11 100644 --- a/Applications/MNIST/jni/main.cpp +++ b/Applications/MNIST/jni/main.cpp @@ -222,8 +222,9 @@ int main(int argc, char *argv[]) { } #ifdef PROFILE - nntrainer::profile::GenericProfileListener listener( - &nntrainer::profile::Profiler::Global()); + auto listener = + std::make_shared(); + nntrainer::profile::Profiler::Global().subscribe(listener); #endif const std::vector args(argv + 1, argv + argc); @@ -301,7 +302,7 @@ int main(int argc, char *argv[]) { } #ifdef PROFILE - std::cout << listener; + std::cout << *listener; #endif #if defined(APP_VALIDATE) diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index 2b22070..132917f 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -332,9 +332,9 @@ void NetworkGraph::applyGradients( sharedConstTensors NetworkGraph::forwarding(bool training) const { for (auto iter = cbegin(); iter != cend(); iter++) { auto const &ln = *iter; - START_PROFILE(profile_keys.at(ln->getType())); + PROFILE_TIME_START(profile_keys.at(ln->getType())); ln->forwarding(training); - END_PROFILE(profile_keys.at(ln->getType())); + PROFILE_TIME_END(profile_keys.at(ln->getType())); } sharedConstTensors out; @@ -371,9 +371,9 @@ void NetworkGraph::backwarding( for (auto iter = iter_begin; iter != iter_end; iter++) { auto &ln = *iter; - START_PROFILE(profile_keys.at(ln->getType())); + PROFILE_TIME_START(profile_keys.at(ln->getType())); backwarding_op(ln, iteration); - END_PROFILE(profile_keys.at(ln->getType())); + PROFILE_TIME_END(profile_keys.at(ln->getType())); } /** perform clipping of the gradients by global norm if any */ @@ -828,7 +828,7 @@ int NetworkGraph::initialize(const std::vector &model_input_names, if (profile_keys.find(lnode->getType()) == profile_keys.end()) { int event_key = 0; - REGISTER_EVENT(lnode->getType(), event_key); + PROFILE_TIME_REGISTER_EVENT(event_key, lnode->getType()); profile_keys[lnode->getType()] = event_key; } diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index 83af983..fe71da3 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -577,9 +577,11 @@ InitLayerContext LayerNode::finalize(const std::vector &input_dims) { }; #endif - REGISTER_EVENT(profile_name(FORWARD_SUFFIX), forward_event_key); - REGISTER_EVENT(profile_name(CALC_DERIV_SUFFIX), calc_deriv_event_key); - REGISTER_EVENT(profile_name(CALC_GRAD_SUFFIX), calc_grad_event_key); + PROFILE_TIME_REGISTER_EVENT(forward_event_key, profile_name(FORWARD_SUFFIX)); + PROFILE_TIME_REGISTER_EVENT(calc_deriv_event_key, + profile_name(CALC_DERIV_SUFFIX)); + PROFILE_TIME_REGISTER_EVENT(calc_grad_event_key, + profile_name(CALC_GRAD_SUFFIX)); return init_context; } @@ -589,9 +591,9 @@ InitLayerContext LayerNode::finalize(const std::vector &input_dims) { */ void LayerNode::forwarding(bool training) { loss->set(run_context->getRegularizationLoss()); - START_PROFILE(forward_event_key); + PROFILE_TIME_START(forward_event_key); layer->forwarding(*run_context, training); - END_PROFILE(forward_event_key); + PROFILE_TIME_END(forward_event_key); #ifdef DEBUG if (!run_context->validate(getNumInputConnections() == 0, !requireLabel())) @@ -608,9 +610,9 @@ void LayerNode::forwarding(bool training) { * @brief calc the derivative to be passed to the previous layer */ void LayerNode::calcDerivative() { - START_PROFILE(calc_deriv_event_key); + PROFILE_TIME_START(calc_deriv_event_key); layer->calcDerivative(*run_context); - END_PROFILE(calc_deriv_event_key); + PROFILE_TIME_END(calc_deriv_event_key); #ifdef DEBUG if (!run_context->validate(getNumInputConnections() == 0, !requireLabel())) @@ -623,10 +625,10 @@ void LayerNode::calcDerivative() { * @brief Calculate the derivative of a layer */ void LayerNode::calcGradient() { - START_PROFILE(calc_grad_event_key); + PROFILE_TIME_START(calc_grad_event_key); if (needs_calc_gradient) layer->calcGradient(*run_context); - END_PROFILE(calc_grad_event_key); + PROFILE_TIME_END(calc_grad_event_key); #ifdef DEBUG if (!run_context->validate(getNumInputConnections() == 0, !requireLabel())) diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index 2272380..e0b06d8 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -558,9 +558,11 @@ sharedConstTensors NeuralNetwork::inference(sharedConstTensors X, allocate(ExecutionMode::INFERENCE); - START_PROFILE(profile::NN_FORWARD); + int nn_foward; + PROFILE_TIME_REGISTER_EVENT(nn_foward, "nn_forward"); + PROFILE_TIME_START(nn_foward); out = forwarding(X, label, false); - END_PROFILE(profile::NN_FORWARD); + PROFILE_TIME_END(nn_foward); if (free_mem) /** diff --git a/nntrainer/tensor/memory_pool.cpp b/nntrainer/tensor/memory_pool.cpp index e9aed59..d845094 100644 --- a/nntrainer/tensor/memory_pool.cpp +++ b/nntrainer/tensor/memory_pool.cpp @@ -10,12 +10,13 @@ * @brief This is Memory Pool Class */ +#include #include #include -#include #include #include +#include namespace nntrainer { @@ -90,6 +91,14 @@ void MemoryPool::allocate() { if (mem_pool == nullptr) throw std::runtime_error( "Failed to allocate memory: " + std::to_string(pool_size) + "bytes"); + +#ifdef PROFILE + static long long seq = 0; + + std::string msg("MemoryPool #"); + msg.append(std::to_string(seq++)); + PROFILE_MEM_ALLOC(mem_pool, pool_size, msg); +#endif } /** @@ -108,8 +117,10 @@ void *MemoryPool::getMemory(unsigned int idx) { * */ void MemoryPool::deallocate() { - if (mem_pool != nullptr) + if (mem_pool != nullptr) { free(mem_pool); + PROFILE_MEM_DEALLOC(mem_pool); + } mem_pool = nullptr; } diff --git a/nntrainer/utils/profiler.cpp b/nntrainer/utils/profiler.cpp index 7287c2f..1a24b73 100644 --- a/nntrainer/utils/profiler.cpp +++ b/nntrainer/utils/profiler.cpp @@ -11,9 +11,11 @@ * */ #include +#include #include #include #include +#include #include #include @@ -24,69 +26,76 @@ namespace nntrainer { namespace profile { -ProfileListener::ProfileListener(Profiler *profiler_, std::vector events) : - profiler(profiler_) { - if (profiler != nullptr) { - profiler->subscribe(this, events); - } -} - -ProfileListener::~ProfileListener() noexcept { - if (profiler != nullptr) { - try { - profiler->unsubscribe(this); - } catch (...) { - ml_logw("unsubscribing profiler failed may cause undefined error"); - } - } -} - -void GenericProfileListener::onNotify(const int event, - const std::chrono::microseconds &value) { - - time_iter = time_taken.find(event); +void GenericProfileListener::onNotifyTimeEvent( + PROFILE_EVENT event, const int time_item, const std::string &str, + const std::chrono::microseconds &duration) { + auto time_iter = time_taken.find(time_item); if (time_iter == time_taken.end()) { - reset(event); // this sets time_iter to current, note that this is a side - // effect of reset() + reset(time_item, str); + time_iter = time_taken.find(time_item); } + auto &cnt_ = std::get(time_iter->second); cnt_++; - if (warmups >= cnt_) { + if (warmups >= cnt_) return; - } auto &cur_ = std::get(time_iter->second); auto &min_ = std::get(time_iter->second); auto &max_ = std::get(time_iter->second); auto &sum_ = std::get(time_iter->second); - cur_ = value; - min_ = std::min(min_, value); - max_ = std::max(max_, value); - sum_ += value; + cur_ = duration; + min_ = std::min(min_, duration); + max_ = std::max(max_, duration); + sum_ += duration; } -void GenericProfileListener::reset(const int event) { - time_iter = - time_taken - .insert({event, std::make_tuple(std::chrono::microseconds{0}, - std::chrono::microseconds::max(), - std::chrono::microseconds::min(), - std::chrono::microseconds{0}, int{0})}) - .first; +void GenericProfileListener::onNotifyMemoryEvent( + PROFILE_EVENT event, const size_t total_alloc_size, const std::string &str, + const std::chrono::microseconds &duration) { + mem_taken.emplace_back(event, total_alloc_size, str, duration); +} + +void GenericProfileListener::notify( + PROFILE_EVENT event, const std::shared_ptr data) { + switch (event) { + case EVENT_TIME_START: + /* ignore start time. we only consider duration of item */ + break; + case EVENT_TIME_END: + onNotifyTimeEvent(event, data->time_item, data->event_str, data->duration); + break; + case EVENT_MEM_ALLOC: + case EVENT_MEM_DEALLOC: + onNotifyMemoryEvent(event, data->total_alloc_size, data->event_str, + data->duration); + break; + default: + throw std::runtime_error("Invalid PROFILE_EVENT"); + break; + } +} + +void GenericProfileListener::reset(const int time_item, + const std::string &name) { + time_taken[time_item] = std::make_tuple( + std::chrono::microseconds{0}, std::chrono::microseconds::max(), + std::chrono::microseconds::min(), std::chrono::microseconds{0}, int{0}); + names[time_item] = name; } const std::chrono::microseconds -GenericProfileListener::result(const int event) { - auto iter = time_taken.find(event); +GenericProfileListener::result(const int time_item) { + auto iter = time_taken.find(time_item); if (iter == time_taken.end() || std::get(iter->second) == 0) { std::stringstream ss; - ss << "event has never recorded" << profiler->eventToStr(event); - throw std::invalid_argument("event has never recorded"); + ss << "time_item has never recorded: " << names[time_item]; + throw std::invalid_argument("time_item has never recorded"); } return std::get(iter->second); @@ -95,17 +104,20 @@ GenericProfileListener::result(const int event) { void GenericProfileListener::report(std::ostream &out) const { std::vector column_size = {20, 23, 23, 23, 23, 23}; - for (auto &entry : time_taken) { - auto title = profiler->eventToStr(entry.first); + for (auto &[item, time] : time_taken) { + auto title = names.find(item); +#ifdef DEBUG + if (title == names.end()) + throw std::runtime_error("Couldn't find name. it's already removed."); +#endif column_size[0] = - std::max(column_size[0], static_cast(title.size())); + std::max(column_size[0], static_cast(title->second.size())); } auto total_col_size = std::accumulate(column_size.begin(), column_size.end(), 0); - if (warmups != 0) { + if (warmups != 0) out << "warm up: " << warmups << '\n'; - } auto end = std::chrono::steady_clock::now(); auto duration = @@ -129,17 +141,21 @@ void GenericProfileListener::report(std::ostream &out) const { std::map> ordered_report; /// calculate metrics while skipping warmups - for (auto &entry : time_taken) { + for (auto &time : time_taken) { auto func = [&](std::ostream &out_) { - auto &cnt_ = std::get(entry.second); - auto &min_ = std::get(entry.second); - auto &max_ = std::get(entry.second); - auto &sum_ = std::get(entry.second); + auto &cnt_ = std::get(time.second); + auto &min_ = std::get(time.second); + auto &max_ = std::get(time.second); + auto &sum_ = std::get(time.second); - auto title = profiler->eventToStr(entry.first); + auto title = names.find(time.first); +#ifdef DEBUG + if (title == names.end()) + throw std::runtime_error("Couldn't find name. it's already removed."); +#endif if (warmups >= cnt_) { - out_ << std::left << std::setw(total_col_size) << title + out_ << std::left << std::setw(total_col_size) << title->second << "less data then warmup\n"; out_ << std::right; // Restore outputstream adjustflag to standard stream @@ -149,7 +165,7 @@ void GenericProfileListener::report(std::ostream &out) const { out_.setf(std::ios::fixed); out_.precision(2); // clang-format off - out_ << std::setw(column_size[0]) << title + out_ << std::setw(column_size[0]) << title->second << std::setw(column_size[1]) << sum_.count() / (cnt_ - warmups) << std::setw(column_size[2]) << min_.count() << std::setw(column_size[3]) << max_.count() @@ -158,12 +174,48 @@ void GenericProfileListener::report(std::ostream &out) const { // clang-format on out_.unsetf(std::ios::fixed); }; - ordered_report[-entry.first] = func; + ordered_report[-time.first] = func; } - for (auto &entry : ordered_report) { + for (auto &entry : ordered_report) entry.second(out); + + ordered_report.clear(); + + out << std::string(total_col_size, '=') << '\n'; + out << "\n\n"; + /// creating header + // clang-format off + out << std::setw(column_size[0]) << "event" + << std::setw(column_size[1]) << "current size" + << std::setw(column_size[2]) << "info" + << std::setw(column_size[3]) << "dur" << '\n'; + // clang-format on + out << std::string(total_col_size, '=') << '\n'; + + int order = 0; + for (auto &mem : mem_taken) { + auto func = [&](std::ostream &out_) { + auto &event = std::get(mem); + auto &size = std::get(mem); + auto &info = std::get(mem); + auto &dur = std::get(mem); + + out_.setf(std::ios::fixed); + out_.precision(2); + // clang-format off + out_ << std::setw(column_size[0]) << ((event == EVENT_MEM_ALLOC) ? "ALLOC" : "DEALLOC") + << std::setw(column_size[1]) << size + << std::setw(column_size[2]) << info + << std::setw(column_size[3]) << dur.count() << '\n'; + // clang-format on + out_.unsetf(std::ios::fixed); + }; + ordered_report[order++] = func; } + + for (auto &entry : ordered_report) + entry.second(out); } Profiler &Profiler::Global() { @@ -171,126 +223,141 @@ Profiler &Profiler::Global() { return instance; } -void Profiler::start(const int &event) { +void Profiler::start(const int item) { #ifdef DEBUG /// @todo: consider race condition - auto iter = start_time.find(event); - if (iter != start_time.end()) { + if (time_item_times.find(item) != time_item_times.end()) throw std::invalid_argument("profiler has already started"); - } #endif - start_time[event] = std::chrono::steady_clock::now(); + auto name = time_item_names.find(item); + if (name == time_item_names.end()) + throw std::invalid_argument("the item is not registered"); + + time_item_times[item] = std::chrono::steady_clock::now(); + + auto data = std::make_shared(item, 0, name->second, + std::chrono::microseconds(0)); + notifyListeners(EVENT_TIME_START, data); } -void Profiler::end(const int &event) { +void Profiler::end(const int item) { /// @todo: consider race condition auto end = std::chrono::steady_clock::now(); - auto iter = start_time.find(event); #ifdef DEBUG - if (iter == start_time.end()) { - throw std::invalid_argument("profiler hasn't started with the event"); - } + if (time_item_times.find(item) == time_item_times.end()) + throw std::invalid_argument("profiler hasn't started with the item"); #endif + auto name = time_item_names.find(item); + if (name == time_item_names.end()) + throw std::invalid_argument("the item is not registered"); + + auto start = time_item_times[item]; auto duration = - std::chrono::duration_cast(end - iter->second); - notify(event, duration); + std::chrono::duration_cast(end - start); + auto data = + std::make_shared(item, 0, name->second, duration); -#ifdef DEBUG - start_time.erase(iter); -#endif + notifyListeners(EVENT_TIME_END, data); + + time_item_times.erase(item); } -void Profiler::notify(const int &event, - const std::chrono::microseconds &value) { - std::lock_guard lk(subscription_mutex); - for (auto &listener : all_event_listeners) { - listener->onNotify(event, value); - } +void Profiler::notifyListeners(PROFILE_EVENT event, + const std::shared_ptr data) { + std::lock_guard lock(listeners_mutex); - auto &items = event_listeners[event]; + for (auto &l : time_item_listeners[data->time_item]) + l->notify(event, data); - for (auto &listner : items) { - listner->onNotify(event, value); - } + for (auto &l : listeners) + l->notify(event, data); } -void Profiler::subscribe(ProfileListener *listener, - const std::vector &events) { +void Profiler::subscribe(std::shared_ptr listener, + const std::set &time_items) { if (listener == nullptr) { throw std::invalid_argument("listener is null!"); } - { - std::lock_guard lk(subscription_mutex); - if (all_registered_listeners.count(listener) == 1) { - throw std::invalid_argument( - "listener is already registered, please unsubscribe before subscribe "); - } - - all_registered_listeners.insert(listener); + std::lock_guard lock(listeners_mutex); + if (time_items.empty()) { + listeners.insert(listener); + } else { + for (auto item : time_items) + time_item_listeners[item].insert(listener); + } +} - if (events.empty()) { - all_event_listeners.insert(listener); - return; - } +void Profiler::unsubscribe(std::shared_ptr listener) { + std::lock_guard lock(listeners_mutex); + listeners.erase(listener); - for (auto event : events) { - auto iter = event_listeners.find(event); - if (iter == event_listeners.end()) { - event_listeners[event] = std::unordered_set{}; - } - event_listeners[event].insert(listener); - } + for (auto &[item, listeners] : time_item_listeners) { + auto found = listeners.find(listener); + if (found != listeners.end()) + listeners.erase(found); } } -void Profiler::unsubscribe(ProfileListener *listener) { - std::lock_guard lk(subscription_mutex); - all_registered_listeners.erase(listener); - all_event_listeners.erase(listener); +int Profiler::registerTimeItem(const std::string &name) { + std::lock_guard lock(registr_mutex); - for (auto &it : event_listeners) { - it.second.erase(listener); - } + int item = time_item_names.size() + 1; + + time_item_names[item] = name; + time_item_listeners[item] = std::set>(); + + ml_logd("[Profiler] Event registered, time item name: %s key: %d", + name.c_str(), item); + + return item; } -std::string Profiler::eventToStr(const int event) { +void Profiler::alloc(const void *ptr, size_t size, const std::string &str) { + std::lock_guard lock(allocates_mutex); - /// reserved event - if (event >= 0) { - switch (event) { - case EVENT::NN_FORWARD: - return "nn_forward"; - } - } - /// custom events - else if (event < 0) { - std::lock_guard lk(event_registration_mutex); - int actual_idx = -event - 1; - - if (actual_idx > static_cast(custom_events.size()) - 1) { - std::stringstream ss; - ss << "undef(" << event << ')'; - return ss.str(); - } - return custom_events[actual_idx]; - } +#ifdef DEBUG + auto found = allocates.find(ptr); + if (found != allocates.end()) + throw std::invalid_argument("memory profiler is already allocated"); +#endif + + allocates[ptr] = std::tuple( + size, std::chrono::steady_clock::now(), str); + + total_size += size; - throw std::logic_error("control should not reach here"); + auto data = std::make_shared(0, total_size.load(), str, + std::chrono::microseconds(0)); + notifyListeners(EVENT_MEM_ALLOC, data); } -int Profiler::registerEvent(const std::string &name) { - std::lock_guard lk(event_registration_mutex); - custom_events.push_back(name); - int key = -custom_events.size(); +void Profiler::dealloc(const void *ptr) { + std::lock_guard lock(allocates_mutex); + + auto end = std::chrono::steady_clock::now(); + auto found = allocates.find(ptr); + +#ifdef DEBUG + if (found == allocates.end()) + throw std::invalid_argument("memory profiler didn't allocated"); +#endif + + auto duration = std::chrono::duration_cast( + end - std::get(found->second)); + + total_size -= std::get(found->second); + + auto str = std::get(found->second); + auto data = + std::make_shared(0, total_size.load(), str, duration); - ml_logd("[Profiler] Event registered, event name: %s event key: %d", - name.c_str(), key); + notifyListeners(EVENT_MEM_DEALLOC, data); - return key; + allocates.erase(found); } } // namespace profile diff --git a/nntrainer/utils/profiler.h b/nntrainer/utils/profiler.h index a61ab7d..21757dd 100644 --- a/nntrainer/utils/profiler.h +++ b/nntrainer/utils/profiler.h @@ -12,50 +12,91 @@ */ #ifndef __PROFILER_H__ #define __PROFILER_H__ -#ifndef PROFILE - -#define START_PROFILE(event_key) -#define END_PROFILE(event_key) -#define REGISTER_EVENT(event_key, ret) - -#else - -#define START_PROFILE(event_key) \ - do { \ - auto &prof = nntrainer::profile::Profiler::Global(); \ - prof.start(event_key); \ - } while (0); - -#define END_PROFILE(event_key) \ - do { \ - auto &prof = nntrainer::profile::Profiler::Global(); \ - prof.end(event_key); \ - } while (0); - -#define REGISTER_EVENT(event_str, ret) \ - do { \ - auto &prof = nntrainer::profile::Profiler::Global(); \ - ret = prof.registerEvent(event_str); \ - } while (0); - -#endif /** PROFILE */ #include #include #include +#include #include +#include #include #include #include #include + +using timepoint = std::chrono::time_point; + +#ifndef PROFILE + +#define PROFILE_TIME_START(event_key) +#define PROFILE_TIME_END(event_key) +#define PROFILE_TIME_REGISTER_EVENT(event_key, event_str) +#define PROFILE_MEM_ALLOC(ptr, size, str) +#define PROFILE_MEM_DEALLOC(ptr) + +#else /** PROFILE */ + +#define PROFILE_TIME_START(event_key) \ + nntrainer::profile::Profiler::Global().start(event_key) + +#define PROFILE_TIME_END(event_key) \ + nntrainer::profile::Profiler::Global().end(event_key) + +#define PROFILE_TIME_REGISTER_EVENT(event_key, event_str) \ + do { \ + event_key = \ + nntrainer::profile::Profiler::Global().registerTimeItem(event_str); \ + } while (0) + +#define PROFILE_MEM_ALLOC(ptr, size, str) \ + nntrainer::profile::Profiler::Global().alloc(ptr, size, str) + +#define PROFILE_MEM_DEALLOC(ptr) \ + nntrainer::profile::Profiler::Global().dealloc(ptr) + +#endif /** PROFILE */ + namespace nntrainer { + namespace profile { -typedef enum { - NN_FORWARD = 1 /**< Neuralnet single inference without loss calculation */, -} EVENT; +enum PROFILE_EVENT { + EVENT_TIME_START = 0, + EVENT_TIME_END = 1, + EVENT_MEM_ALLOC = 2, + EVENT_MEM_DEALLOC = 3, +}; + +/** + * @brief Data for each profile event + * + */ +struct ProfileEventData { +public: + /** + * @brief Construct a new ProfileEventData struct + * + */ + ProfileEventData(int item, size_t size, std::string str, + std::chrono::microseconds dur) : + time_item(item), + total_alloc_size(size), + event_str(str), + duration(dur) {} + + /* for time profile */ + int time_item; + + /* for memory profile */ + size_t total_alloc_size; + + /* common data */ + std::string event_str; + std::chrono::microseconds duration; +}; class Profiler; + /** * @brief Generic profile listener class to attach to a profiler, * this can be inherited to create a custom profile listener @@ -65,41 +106,38 @@ public: /** * @brief Construct a new Profile Listener object * - * @param profiler_ profiler that this listener is bound to. Unsubscribe will - * be called when destruction - * @param events events for this profiler to listen to */ - ProfileListener(Profiler *profiler_, std::vector events); + explicit ProfileListener() = default; /** * @brief Destroy the Base Profile Listener object * */ - virtual ~ProfileListener() noexcept; + virtual ~ProfileListener() noexcept = default; /** * @brief A callback function to be called from a profiler * - * @param event event key to store the result - * @param value time value from the profiler + * @param event event type + * @param data event data */ - virtual void onNotify(const int event, - const std::chrono::microseconds &value) = 0; + virtual void notify(PROFILE_EVENT event, + const std::shared_ptr data) = 0; /** * @brief resets the listener to the inital state for a particular key * - * @param event event which profiler should notice + * @param time_item time item which will be reset */ - virtual void reset(const int event) = 0; + virtual void reset(const int time_item, const std::string &str) = 0; /** * @brief get the latest result of a event * - * @param event event to query the result + * @param time_item time item to query the result * @return const std::chrono::microseconds */ - virtual const std::chrono::microseconds result(const int event) = 0; + virtual const std::chrono::microseconds result(const int time_item) = 0; /** * @brief report the result @@ -107,9 +145,6 @@ public: * @param out outstream object to make a report */ virtual void report(std::ostream &out) const = 0; - -protected: - Profiler *profiler; }; /** @@ -121,18 +156,12 @@ public: /** * @brief Construct a new GenericProfile Listener object * - * @param profiler profiler that this listener is bound to. pass null if empty - * @param warmups_ ignore first @a warmups_ records when making report + * @param warmups_ ignore first @a warmups_ records when making time report */ - GenericProfileListener(Profiler *profiler, std::vector events = {}, - int warmups_ = 0) : - ProfileListener(profiler, events), + explicit GenericProfileListener(int warmups_ = 0) : + ProfileListener(), start_time(std::chrono::steady_clock::now()), - warmups(warmups_) { - for (auto &event : events) { - reset(event); - } - } + warmups(warmups_) {} /** * @brief Destroy the Generic Profile Listener object @@ -141,16 +170,18 @@ public: virtual ~GenericProfileListener() = default; /** - * @copydoc ProfileListener::onNotify(const int event, const - * std::chrono::microseconds &value) + * @brief A callback function to be called from a profiler + * + * @param event event type + * @param data event data */ - virtual void onNotify(const int event, - const std::chrono::microseconds &value) override; + virtual void notify(PROFILE_EVENT event, + const std::shared_ptr data) override; /** - * @copydoc ProfileListener::reset(const int event) + * @copydoc ProfileListener::reset(const int time_item) */ - virtual void reset(const int event) override; + virtual void reset(const int time_item, const std::string &str) override; /** * @copydoc ProfileListener::result(const int event) @@ -163,6 +194,22 @@ public: virtual void report(std::ostream &out) const override; private: + /** + * @brief Called when time event occurs + * + */ + void onNotifyTimeEvent(PROFILE_EVENT event, const int time_item, + const std::string &str, + const std::chrono::microseconds &duration); + + /** + * @brief Called when memory event occurs + * + */ + void onNotifyMemoryEvent(PROFILE_EVENT event, const size_t total_alloc_size, + const std::string &str, + const std::chrono::microseconds &duration); + std::chrono::time_point start_time; unsigned int warmups; @@ -179,11 +226,11 @@ private: unsigned int /** CNT */>> time_taken; - /** - * @brief iterator for the time_taken - * - */ - decltype(time_taken)::iterator time_iter; + std::list< + std::tuple> + mem_taken; + + std::unordered_map names; }; /** @@ -207,7 +254,7 @@ public: * @brief Construct a new Profiler object * */ - Profiler() {} + Profiler() : total_size(0) {} /** * @brief Deleted constructor @@ -218,27 +265,41 @@ public: /** * - * @brief Get Global app context. + * @brief Get Global Profiler * - * @return AppContext& + * @return Profiler& */ static Profiler &Global(); /** - * @brief start profile + * @brief start time profile * - * @param key to record the profile result. Either designated key from enum - * or arbitrary key can be used + * @param time_item time item to be recorded */ - void start(const int &key); + void start(const int time_item); /** - * @brief end profile and notify to the listeners + * @brief end time profile and notify to the listeners * - * @param key to record the profile result. Either designated key from enum - * or arbitrary key can be used + * @param time_item time item to be finished */ - void end(const int &key); + void end(const int time_item); + + /** + * @brief trace memory allocation + * + * @param ptr allocated memory pointer + * @param size amount of allocated memory + * @param str information string + */ + void alloc(const void *ptr, size_t size, const std::string &str); + + /** + * @brief trace memory de-allocation + * + * @param ptr de-allocated memory pointer + */ + void dealloc(const void *ptr); /** * @brief subscribe a listener to the profiler @@ -249,15 +310,15 @@ public: * to all events * @throw std::invalid_argument if listener is already registered */ - void subscribe(ProfileListener *listener, - const std::vector &events = {}); + void subscribe(std::shared_ptr listener, + const std::set &time_item = {}); /** * @brief unsubscribe a listener from the profiler * * @param listener listener to unsubscribe */ - void unsubscribe(ProfileListener *listener); + void unsubscribe(std::shared_ptr listener); /** * @brief registerEvent to record. @@ -265,14 +326,7 @@ public: * * @return int return NEGATIVE integer to distinguish from reserved events */ - int registerEvent(const std::string &name); - - /** - * @brief get string representation of an event - * - * @return std::string name - */ - std::string eventToStr(const int event); + int registerTimeItem(const std::string &name); private: /** @@ -281,31 +335,32 @@ private: * @param event event to notify * @param value measured value from the profiler */ - void notify(const int &event, const std::chrono::microseconds &value); + void notifyListeners(PROFILE_EVENT event, + const std::shared_ptr data); - std::unordered_set - all_registered_listeners; /**< prevent registering listener twice */ + std::unordered_set> + listeners; /**< event listeners */ - std::unordered_set - all_event_listeners; /**< listeners listen to every events */ + std::unordered_map + time_item_names; /**< registered item names (time_item, string) */ + std::unordered_map + time_item_times; /**< registered time items (time_item, time) */ + std::unordered_map>> + time_item_listeners; + /**< registered listeners for each itemtems (time_item, listeners) */ - std::unordered_map> - event_listeners; /**< listeners for an events */ + std::unordered_map> + allocates; /**< allocated memory information (ptr, (size, time, info) */ - std::unordered_map> - start_time; /**< start_time of the clock */ + std::atomic total_size; /**< total allocated memory size */ - std::vector - custom_events; /**< integer indexed custom events. Actual key is negative to - avoid clashing */ - - std::mutex event_registration_mutex; /**< protect custom event registration */ - - std::mutex subscription_mutex; /**< protect sub/unsub routine to - gaurantee invarient */ + std::mutex listeners_mutex; /**< protect listeners */ + std::mutex allocates_mutex; /**< protect allocates */ + std::mutex registr_mutex; /**< protect custom event registration */ }; } // namespace profile + } // namespace nntrainer #endif /** __PROFILER_H__ */ -- 2.7.4