--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_BACKEND_ACL_CL_CLTIMER_H__
+#define __NEURUN_BACKEND_ACL_CL_CLTIMER_H__
+
+#include <util/ITimer.h>
+#include <arm_compute/core/CL/OpenCL.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <chrono>
+#include <list>
+#include <sstream>
+
+namespace neurun
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+/**
+ * @brief Class to measure CL kernels execution time
+ */
+class CLTimer : public util::ITimer
+{
+public:
+ /**
+ * @brief This function replaces CL function, which enqueues a command to execute a kernel
+ * with a wrapper which remembers enqueued kernels
+ */
+ void handleBegin() override
+ {
+ _measured_events.clear();
+
+ _origin_enqueue_function = arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr;
+
+ auto _timer_enqueue_function = [this](cl_command_queue command_queue, cl_kernel kernel,
+ cl_uint work_dim, const size_t *gwo, const size_t *gws,
+ const size_t *lws, cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list, cl_event *usr_event) {
+ cl_event event;
+ cl_int enqueue_res =
+ this->_origin_enqueue_function(command_queue, kernel, work_dim, gwo, gws, lws,
+ num_events_in_wait_list, event_wait_list, &event);
+ this->_measured_events.emplace_back(event);
+
+ // According to spec, if NULL was provided in usr_event - event shouldn't be returned
+ if (usr_event != nullptr)
+ {
+ clRetainEvent(event);
+ *usr_event = event;
+ }
+ return enqueue_res;
+ };
+ arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _timer_enqueue_function;
+
+ // Set CL_QUEUE_PROFILING_ENABLE flag for the CL command-queue, if it isn't already set
+ auto &cl_scheduler = arm_compute::CLScheduler::get();
+ auto props = cl_scheduler.queue().getInfo<CL_QUEUE_PROPERTIES>();
+ if ((props & CL_QUEUE_PROFILING_ENABLE) == 0)
+ {
+ cl_scheduler.set_queue(
+ cl::CommandQueue(cl_scheduler.context(), props | CL_QUEUE_PROFILING_ENABLE));
+ }
+ };
+
+ /**
+ * @brief Get timer result by addition executed CL kernels durations
+ */
+ void handleEnd() override
+ {
+ _timer_res = 0;
+ for (auto const &event : _measured_events)
+ {
+ cl_ulong start;
+ cl_ulong end;
+ event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
+ event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
+ _timer_res += (end - start) / 1000.f; // nanoseconds -> microseconds
+ }
+
+ // Restore origin CL enqueue function
+ arm_compute::CLSymbols::get().clEnqueueNDRangeKernel_ptr = _origin_enqueue_function;
+ };
+
+private:
+ std::function<decltype(clEnqueueNDRangeKernel)> _origin_enqueue_function;
+ std::list<::cl::Event> _measured_events;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace neurun
+
+#endif // __NEURUN_BACKEND_ACL_CL_CLTIMER_H__
#ifndef __NEURUN_BACKEND_ACL_CL_CONFIG_H__
#define __NEURUN_BACKEND_ACL_CL_CONFIG_H__
+#include "CLTimer.h"
+#include <cpp14/memory.h>
#include <backend/IConfig.h>
namespace neurun
std::string id() override { return "acl_cl"; }
void initialize() override;
bool SupportSubTensorAlloc() override { return true; }
+ std::unique_ptr<util::ITimer> timer() override { return nnfw::cpp14::make_unique<CLTimer>(); }
};
} // namespace acl_cl
#define __NEURUN_BACKEND_ACL_NEON_CONFIG_H__
#include <backend/IConfig.h>
+#include <cpp14/memory.h>
+#include <util/ITimer.h>
namespace neurun
{
std::string id() override { return "acl_neon"; }
void initialize() override;
bool SupportSubTensorAlloc() override { return true; }
+
+ std::unique_ptr<util::ITimer> timer() override
+ {
+ return nnfw::cpp14::make_unique<util::CPUTimer>();
+ }
};
} // namespace acl_neon
#define __NEURUN_BACKEND_CPU_CONFIG_H__
#include <backend/IConfig.h>
+#include <cpp14/memory.h>
+#include <util/ITimer.h>
namespace neurun
{
// NOTE CPU allocator cannot support subtensor allocation yet
return false;
}
+
+ std::unique_ptr<util::ITimer> timer() override
+ {
+ return nnfw::cpp14::make_unique<util::CPUTimer>();
+ }
};
} // namespace cpu
#ifndef __NEURUN_BACKEND_ICONFIG_H__
#define __NEURUN_BACKEND_ICONFIG_H__
+#include "util/ITimer.h"
+#include <memory>
#include <string>
namespace neurun
virtual void initialize() = 0;
// Support subtensor allocation
virtual bool SupportSubTensorAlloc() = 0;
+
+ // Timer is used for backend profiling. In case of default (nullptr) timer profiler won't work.
+ virtual std::unique_ptr<util::ITimer> timer() { return nullptr; }
};
} // namespace backend
#include "exec/IFunction.h"
#include "model/Operation.h"
#include "backend/ExecTime.h"
+#include "util/ITimer.h"
#include "IExecutor.h"
-#include <chrono>
namespace neurun
{
void uploadExecTime() { _et->uploadOperationsExecTime(); }
private:
- std::chrono::steady_clock::time_point _prev_time;
+ std::unique_ptr<util::ITimer> _timer;
std::shared_ptr<backend::ExecTime> _et;
};
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_UTIL_ITIMER_H__
+#define __NEURUN_UTIL_ITIMER_H__
+
+#include <chrono>
+
+namespace neurun
+{
+namespace util
+{
+
+class ITimer
+{
+public:
+ virtual void handleBegin() = 0;
+ virtual void handleEnd() = 0;
+ int getTime() { return _timer_res; };
+
+protected:
+ int _timer_res{0};
+};
+
+class CPUTimer : public ITimer
+{
+public:
+ void handleBegin() override { _start_time = std::chrono::steady_clock::now(); };
+
+ void handleEnd() override
+ {
+ const auto end_time = std::chrono::steady_clock::now();
+ _timer_res =
+ std::chrono::duration_cast<std::chrono::microseconds>(end_time - _start_time).count();
+ };
+
+private:
+ std::chrono::steady_clock::time_point _start_time; // in microseconds
+};
+
+} // namespace util
+} // namespace neurun
+
+#endif // __NEURUN_UTIL_ITIMER_H__
{
void ProfileObserver::handleBegin(neurun::exec::IExecutor *, const neurun::model::Operation *,
- const neurun::backend::Backend *)
+ const neurun::backend::Backend *backend)
{
- _prev_time = std::chrono::steady_clock::now();
+ _timer = backend->config()->timer();
+ if (_timer == nullptr)
+ throw std::runtime_error("To profile backend timer() method must be implemented");
+ _timer->handleBegin();
}
void ProfileObserver::handleEnd(IExecutor *exec, const model::Operation *node,
const backend::Backend *backend)
{
- const auto exec_end = std::chrono::steady_clock::now();
- const auto time =
- std::chrono::duration_cast<std::chrono::microseconds>(exec_end - _prev_time).count();
+ _timer->handleEnd();
+ const auto timer_res = _timer->getTime();
+
auto node_name = node->getName();
- VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << time << std::endl;
+ VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl;
// fill ExecTime:
bool is_quantized = exec->model().operands.at(node->getInputs().at(0)).typeInfo().type() ==
auto *permute_node = dynamic_cast<const model::operation::PermuteNode *>(node);
_et->updatePermuteTime(permute_node->param().input_backend_ctx->backend,
permute_node->param().output_backend_ctx->backend, is_quantized, size,
- time);
+ timer_res);
}
else
{
- _et->updateOperationExecTime(backend, node_name, is_quantized, size, time);
+ _et->updateOperationExecTime(backend, node_name, is_quantized, size, timer_res);
}
};