From ef328f09927932f2bf102499000fef3974958056 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ivan=20Vagin/AI=20Tools=20Lab=20/SRR/Engineer/=EC=82=BC?= =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Tue, 20 Aug 2019 15:43:55 +0900 Subject: [PATCH] [neurun] Introduced ITimer, CPUTimer and CLTimer (#6690) * [neurun] Introduced ITimer, CPUTimer and CLTimer * Introduced ITimer, CPUTimer and CLTimer * Added timer getters to backends config * Made ProfileObserver to measure time with ITimer Signed-off-by: Ivan Vagin * Review fixes Signed-off-by: Ivan Vagin --- runtimes/neurun/backend/acl_cl/CLTimer.h | 108 +++++++++++++++++++++ runtimes/neurun/backend/acl_cl/Config.h | 3 + runtimes/neurun/backend/acl_neon/Config.h | 7 ++ runtimes/neurun/backend/cpu/Config.h | 7 ++ runtimes/neurun/core/include/backend/IConfig.h | 5 + .../neurun/core/include/exec/ExecutionObservers.h | 4 +- runtimes/neurun/core/include/util/ITimer.h | 57 +++++++++++ .../neurun/core/src/exec/ExecutionObservers.cc | 19 ++-- 8 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 runtimes/neurun/backend/acl_cl/CLTimer.h create mode 100644 runtimes/neurun/core/include/util/ITimer.h diff --git a/runtimes/neurun/backend/acl_cl/CLTimer.h b/runtimes/neurun/backend/acl_cl/CLTimer.h new file mode 100644 index 0000000..3939ee7 --- /dev/null +++ b/runtimes/neurun/backend/acl_cl/CLTimer.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_ACL_CL_CLTIMER_H__ +#define __NEURUN_BACKEND_ACL_CL_CLTIMER_H__ + +#include +#include +#include +#include +#include +#include + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ + +/** + * @brief Class to measure CL kernels execution time + */ +class CLTimer : public util::ITimer +{ +public: + /** + * @brief This function replaces CL function, which enqueues a command to execute a kernel + * with a wrapper which remembers enqueued kernels + */ + void handleBegin() override + { + _measured_events.clear(); + + _origin_enqueue_function = arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr; + + auto _timer_enqueue_function = [this](cl_command_queue command_queue, cl_kernel kernel, + cl_uint work_dim, const size_t *gwo, const size_t *gws, + const size_t *lws, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *usr_event) { + cl_event event; + cl_int enqueue_res = + this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws, + num_events_in_wait_list, event_wait_list, &event); + this->_measured_events.emplace_back(event); + + // According to spec, if NULL was provided in usr_event - event shouldn't be returned + if (usr_event != nullptr) + { + clRetainEvent(event); + *usr_event = event; + } + return enqueue_res; + }; + arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _timer_enqueue_function; + + // Set CL_QUEUE_PROFILING_ENABLE flag for the CL command-queue, if it isn't already set + auto &cl_scheduler = arm_compute::CLScheduler::get(); + auto props = cl_scheduler.queue().getInfo(); + if ((props & CL_QUEUE_PROFILING_ENABLE) == 0) + { + cl_scheduler.set_queue( + cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE)); + } + }; + + /** + * @brief Get timer result by addition executed CL kernels durations + */ + void handleEnd() override + { + _timer_res = 0; + for (auto const &event : _measured_events) + { + cl_ulong start; + cl_ulong end; + event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end); + _timer_res += (end - start) / 1000.f; // nanoseconds -> microseconds + } + + // Restore origin CL enqueue function + arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _origin_enqueue_function; + }; + +private: + std::function _origin_enqueue_function; + std::list<::cl::Event> _measured_events; +}; + +} // namespace acl_cl +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_ACL_CL_CLTIMER_H__ diff --git a/runtimes/neurun/backend/acl_cl/Config.h b/runtimes/neurun/backend/acl_cl/Config.h index db42a5a..1857651 100644 --- a/runtimes/neurun/backend/acl_cl/Config.h +++ b/runtimes/neurun/backend/acl_cl/Config.h @@ -17,6 +17,8 @@ #ifndef __NEURUN_BACKEND_ACL_CL_CONFIG_H__ #define __NEURUN_BACKEND_ACL_CL_CONFIG_H__ +#include "CLTimer.h" +#include #include namespace neurun @@ -32,6 +34,7 @@ public: std::string id() override { return "acl_cl"; } void initialize() override; bool SupportSubTensorAlloc() override { return true; } + std::unique_ptr timer() override { return nnfw::cpp14::make_unique(); } }; } // namespace acl_cl diff --git a/runtimes/neurun/backend/acl_neon/Config.h b/runtimes/neurun/backend/acl_neon/Config.h index f593d8b..0656fa4 100644 --- a/runtimes/neurun/backend/acl_neon/Config.h +++ b/runtimes/neurun/backend/acl_neon/Config.h @@ -18,6 +18,8 @@ #define __NEURUN_BACKEND_ACL_NEON_CONFIG_H__ #include +#include +#include namespace neurun { @@ -32,6 +34,11 @@ public: std::string id() override { return "acl_neon"; } void initialize() override; bool SupportSubTensorAlloc() override { return true; } + + std::unique_ptr timer() override + { + return nnfw::cpp14::make_unique(); + } }; } // namespace acl_neon diff --git a/runtimes/neurun/backend/cpu/Config.h b/runtimes/neurun/backend/cpu/Config.h index dfa8c0f..ac55d98 100644 --- a/runtimes/neurun/backend/cpu/Config.h +++ b/runtimes/neurun/backend/cpu/Config.h @@ -18,6 +18,8 @@ #define __NEURUN_BACKEND_CPU_CONFIG_H__ #include +#include +#include namespace neurun { @@ -36,6 +38,11 @@ public: // NOTE CPU allocator cannot support subtensor allocation yet return false; } + + std::unique_ptr timer() override + { + return nnfw::cpp14::make_unique(); + } }; } // namespace cpu diff --git a/runtimes/neurun/core/include/backend/IConfig.h b/runtimes/neurun/core/include/backend/IConfig.h index 681bf01..0e95720 100644 --- a/runtimes/neurun/core/include/backend/IConfig.h +++ b/runtimes/neurun/core/include/backend/IConfig.h @@ -17,6 +17,8 @@ #ifndef __NEURUN_BACKEND_ICONFIG_H__ #define __NEURUN_BACKEND_ICONFIG_H__ +#include "util/ITimer.h" +#include #include namespace neurun @@ -32,6 +34,9 @@ struct IConfig virtual void initialize() = 0; // Support subtensor allocation virtual bool SupportSubTensorAlloc() = 0; + + // Timer is used for backend profiling. In case of default (nullptr) timer profiler won't work. + virtual std::unique_ptr timer() { return nullptr; } }; } // namespace backend diff --git a/runtimes/neurun/core/include/exec/ExecutionObservers.h b/runtimes/neurun/core/include/exec/ExecutionObservers.h index 79006ed..61c8bf1 100644 --- a/runtimes/neurun/core/include/exec/ExecutionObservers.h +++ b/runtimes/neurun/core/include/exec/ExecutionObservers.h @@ -20,8 +20,8 @@ #include "exec/IFunction.h" #include "model/Operation.h" #include "backend/ExecTime.h" +#include "util/ITimer.h" #include "IExecutor.h" -#include namespace neurun { @@ -55,7 +55,7 @@ private: void uploadExecTime() { _et->uploadOperationsExecTime(); } private: - std::chrono::steady_clock::time_point _prev_time; + std::unique_ptr _timer; std::shared_ptr _et; }; diff --git a/runtimes/neurun/core/include/util/ITimer.h b/runtimes/neurun/core/include/util/ITimer.h new file mode 100644 index 0000000..28b4ca1 --- /dev/null +++ b/runtimes/neurun/core/include/util/ITimer.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_UTIL_ITIMER_H__ +#define __NEURUN_UTIL_ITIMER_H__ + +#include + +namespace neurun +{ +namespace util +{ + +class ITimer +{ +public: + virtual void handleBegin() = 0; + virtual void handleEnd() = 0; + int getTime() { return _timer_res; }; + +protected: + int _timer_res{0}; +}; + +class CPUTimer : public ITimer +{ +public: + void handleBegin() override { _start_time = std::chrono::steady_clock::now(); }; + + void handleEnd() override + { + const auto end_time = std::chrono::steady_clock::now(); + _timer_res = + std::chrono::duration_cast(end_time - _start_time).count(); + }; + +private: + std::chrono::steady_clock::time_point _start_time; // in microseconds +}; + +} // namespace util +} // namespace neurun + +#endif // __NEURUN_UTIL_ITIMER_H__ diff --git a/runtimes/neurun/core/src/exec/ExecutionObservers.cc b/runtimes/neurun/core/src/exec/ExecutionObservers.cc index 8d8ef42..f984c5a 100644 --- a/runtimes/neurun/core/src/exec/ExecutionObservers.cc +++ b/runtimes/neurun/core/src/exec/ExecutionObservers.cc @@ -26,19 +26,22 @@ namespace exec { void ProfileObserver::handleBegin(neurun::exec::IExecutor *, const neurun::model::Operation *, - const neurun::backend::Backend *) + const neurun::backend::Backend *backend) { - _prev_time = std::chrono::steady_clock::now(); + _timer = backend->config()->timer(); + if (_timer == nullptr) + throw std::runtime_error("To profile backend timer() method must be implemented"); + _timer->handleBegin(); } void ProfileObserver::handleEnd(IExecutor *exec, const model::Operation *node, const backend::Backend *backend) { - const auto exec_end = std::chrono::steady_clock::now(); - const auto time = - std::chrono::duration_cast(exec_end - _prev_time).count(); + _timer->handleEnd(); + const auto timer_res = _timer->getTime(); + auto node_name = node->getName(); - VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << time << std::endl; + VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl; // fill ExecTime: bool is_quantized = exec->model().operands.at(node->getInputs().at(0)).typeInfo().type() == @@ -58,11 +61,11 @@ void ProfileObserver::handleEnd(IExecutor *exec, const model::Operation *node, auto *permute_node = dynamic_cast(node); _et->updatePermuteTime(permute_node->param().input_backend_ctx->backend, permute_node->param().output_backend_ctx->backend, is_quantized, size, - time); + timer_res); } else { - _et->updateOperationExecTime(backend, node_name, is_quantized, size, time); + _et->updateOperationExecTime(backend, node_name, is_quantized, size, timer_res); } }; -- 2.7.4