#include "backend/IConfig.h"
#include "backend/IShapeFixer.h"
#include "util/logging.h"
+#include "util/Utils.h"
#include "exec/FunctionSequence.h"
#include <cassert>
#include <cmath>
{
const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)};
const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)};
- /* Broadcasting isn't supported on CPU: no way to differ the existing exec_time record of
+ /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record of
* Add with and without broadcasting*/
- /*Quant is also supported: throws an exception in run(): in case of scheduling without warm-up
+ /*Quant is also unsupported: throws an exception in run(): in case of scheduling without warm-up
it isn't catched by tryBackend()*/
if (quant ||
!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
<< "Started task scheduling: uses all backends to get more metrics for data transfer"
<< std::endl;
size_t backend_ind = 0;
- for (const auto &rank : _ranks)
+ for (const auto &rank : _rank_to_op)
{
VERBOSE(Scheduler::scheduleNode) << "scheduling (" << rank.second.value() << ")" << std::endl;
const auto &node = _graph->operations().at(rank.second);
if (is_profiling)
{
bool profile_data_transfer = true;
- const auto &node = _graph->operations().at(_ranks.begin()->second);
+ const auto &node = _graph->operations().at(_rank_to_op.begin()->second);
const bool quant = isQuant(*_graph, node);
const auto size = getOperationsFlattenedIOSize(*_graph, node);
for (const auto *backend : all_backends)
}
// for each task select the backend with the smallest earliest finishing time(eft)
- for (const auto &rank : _ranks)
+ for (const auto &rank : _rank_to_op)
{
scheduleNode(rank.second);
}
{
VERBOSE(Scheduler::makeRank) << "task prioritizing" << std::endl;
- model::OperationIndexMap<int64_t> calculated_rank;
+ _graph->operations().iterate(
+ [&](const model::OperationIndex &index, const model::Operation &) { DFSMaxRank(index); });
- const int NOT_SET = -1;
+ // Check that ranks are calculated for all operations(nodes)
_graph->operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
- calculated_rank[index] = NOT_SET;
+ UNUSED_RELEASE(index);
+ assert(_op_to_rank->find(index) != _op_to_rank->end());
});
-
- for (const auto &it : calculated_rank)
- {
- DFSMaxRank(it.first, calculated_rank);
- }
-
- // Rank for all of the operations(nodes) must be calculated.
- assert(std::all_of(calculated_rank.begin(), calculated_rank.end(),
- [](const std::pair<const model::OperationIndex, int64_t> &v) {
- return v.second != NOT_SET;
- }));
VERBOSE(Scheduler::makeRank) << "task prioritizing finished" << std::endl;
}
-int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index,
- std::unordered_map<model::OperationIndex, int64_t> &calculated_rank)
+int64_t Scheduler::DFSMaxRank(const model::OperationIndex &index)
{
- if (calculated_rank.at(index) >= 0)
- return calculated_rank.at(index);
+ auto op_to_rank_it = _op_to_rank->find(index);
+ if (op_to_rank_it != _op_to_rank->end())
+ return op_to_rank_it->second;
const auto all_backends = backend::BackendManager::instance().getAll();
const auto &node = _graph->operations().at(index);
- int64_t rank = 0, std = 0;
+ int64_t rank = 0;
const bool quant = isQuant(*_graph, node);
const auto size = getOperationsFlattenedIOSize(*_graph, node);
- auto all_backends_size = static_cast<int64_t>(all_backends.size());
+ auto supported_backends_quantity = static_cast<int64_t>(all_backends.size());
- const auto max_child_rank = DFSChildrensMaxRank(index, calculated_rank);
+ const auto max_child_rank = DFSChildrenMaxRank(index);
// get average exec time of this op
for (const auto &backend : all_backends)
else
{
// this operation isn't supported in this backend
- --all_backends_size;
+ --supported_backends_quantity;
}
}
- assert((all_backends_size > 0) && "Encountered unsupported op");
- rank /= all_backends_size;
+ assert((supported_backends_quantity > 0) && "Encountered unsupported op");
+ rank /= supported_backends_quantity;
// get standard deviation
+ int64_t std = 0;
for (const auto backend : all_backends)
{
const auto exec_time = getTime(backend, node.getName(), quant, size);
std += (exec_time - rank) * (exec_time - rank);
}
}
- std /= all_backends_size;
+ std /= supported_backends_quantity;
if (std > 0)
{
std = static_cast<int>(std::sqrt(std));
rank *= std;
}
rank += max_child_rank;
- calculated_rank[index] = rank;
assert(rank >= 0);
- _ranks.emplace(rank, index);
- _indexed_ranks->emplace(index, rank);
+ _rank_to_op.emplace(rank, index);
+ _op_to_rank->emplace(index, rank);
VERBOSE(Scheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.getName()
<< " is " << rank << std::endl;
return rank;
}
-int64_t
-Scheduler::DFSChildrensMaxRank(const model::OperationIndex &index,
- std::unordered_map<model::OperationIndex, int64_t> &calculated_rank)
+int64_t Scheduler::DFSChildrenMaxRank(const model::OperationIndex &index)
{
const auto all_backends = backend::BackendManager::instance().getAll();
const auto &node = _graph->operations().at(index);
{
const auto &operand = _graph->operands().at(output);
const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
- // avarage data transfer cost of this operand's data
+ // average data transfer cost of this operand's data
int64_t avg_transfer_cost = 1;
for (const auto *backend : all_backends)
{
avg_transfer_cost /= all_backends.size();
for (const auto &use : operand.getUses().list())
{
- const auto cur_child_rank = DFSMaxRank(use, calculated_rank);
+ const auto cur_child_rank = DFSMaxRank(use);
max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
}
}
{prev_op_ft + it.second, prev_op_ft});
}
- _parents_eft[index] = eft;
+ _ops_eft[index] = eft;
_backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time);
_backend_resolver->setBackend(index, chosen_backend);
{
const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
const bool is_parallel_exec = "Parallel" == util::getConfigString(util::config::EXECUTOR);
- // Permutation will cause creating a seperate subgraph that contains just this permutation node.
+ // Permutation will cause creating a separate subgraph that contains just this permutation node.
// This isn't needed for Linear executor since it doesn't use subgraphs
// Number 1 ms is picked experimentally
int64_t permute_fine = 1000;
exec_time *= CPU_DELAY;
}
- // get max eft of direct (one lavel above) predecessors
+ // get max eft of direct (one level above) predecessors
auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
int64_t total_transfer_cost = 0;
std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
- // Find free time for data transfering and insert it into backend taskset. This is needed:
+ // Find free time for data transferring and insert it into backend taskset. This is needed:
// 1. Time for multiple permutations for this node's input is found correctly
// 2. If backend==cpu, then free time for this node must come after permutations
auto cpu_backend = backend::BackendManager::instance().get("cpu");
std::multimap<int64_t, int64_t> &transfer_st_exec_time)
{
int64_t max_pred_eft = 0;
- for (const auto &input : node.getInputs())
+ for (const auto &input_operand_idx : node.getInputs())
{
- const auto &operand = _graph->operands().at(input);
- const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
- // operations, whose output is current node's this input operand
- for (const auto &defs : operand.getDef().list())
+ const auto &input_operand = _graph->operands().at(input_operand_idx);
+ const bool quant = input_operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
+
+ for (const auto &input_node_idx : input_operand.getDef().list())
{
- // Data transfer cost from this parent's backend to current node's backend:
- auto parent_backend = _backend_resolver->getBackend(defs);
+ // Data transfer cost from parent's node backend to current node's backend:
+ auto parent_backend = _backend_resolver->getBackend(input_node_idx);
- max_pred_eft = std::max(max_pred_eft, _parents_eft.at(defs));
+ max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx));
if (parent_backend != backend)
{
- // Multiply operand size by 2 because size must discribe input+output size
+ // Multiply operand size by 2 because size must describe input+output size
int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant,
- operand.info().total_size() * 2);
- transfer_st_exec_time.emplace(_parents_eft.at(defs), transfer_cost);
+ input_operand.info().total_size() * 2);
+ transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
}
}
}
* @param[in] backend_resolver backend resolver
*/
Scheduler(const neurun::model::Operands &operands)
- : _run_cache{}, _backends_avail_time{}, _parents_eft{},
- _indexed_ranks{std::make_shared<model::OperationIndexMap<int64_t>>()}
+ : _run_cache{}, _backends_avail_time{}, _ops_eft{},
+ _op_to_rank{std::make_shared<model::OperationIndexMap<int64_t>>()}
{
_backend_resolver = nnfw::cpp14::make_unique<compiler::BackendResolver>(operands);
_exec_time =
* https://www.hindawi.com/journals/sp/2016/3676149/
*/
std::unique_ptr<compiler::BackendResolver> schedule(const graph::Graph &graph) final;
- std::shared_ptr<model::OperationIndexMap<int64_t>> getIndexedRanks() { return _indexed_ranks; }
+ std::shared_ptr<model::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; }
private:
void scheduleNode(const model::OperationIndex &);
*
* @param[in] backend: backend, for which to return the time
* @param[in] node: node to get eft of parents
- * @param[out] transfer_st_exec_time: est and exec time of data tranfer operation
+ * @param[out] transfer_st_exec_time: est and exec time of data transfer operation
*
* @return earliest finishing time of parent nodes
*/
void makeRank();
- int64_t DFSMaxRank(const model::OperationIndex &index,
- std::unordered_map<model::OperationIndex, int64_t> &calculated_rank);
+ int64_t DFSMaxRank(const model::OperationIndex &index);
- int64_t DFSChildrensMaxRank(const model::OperationIndex &index,
- std::unordered_map<model::OperationIndex, int64_t> &calculated_rank);
+ int64_t DFSChildrenMaxRank(const model::OperationIndex &index);
/**
* @brief Returns the time, when backend is available for at least given amount of time.
*
*
* @param[in] backend backend, for which to return the time
* @param[in] starting_time time, starting which to look for gap
- * @param[in] time_amount amound of the time, for which to look gap
+ * @param[in] time_amount amount of the time, for which to look gap
*
* @return time, when backend has at least time_amount free time
*/
std::unordered_map<const backend::Backend *, std::unordered_map<std::string, int>> _run_cache;
// Finishing and starting time of each backend
std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
- model::OperationIndexMap<int64_t> _parents_eft;
- std::multimap<int64_t, model::OperationIndex, std::greater<int64_t>> _ranks;
- std::shared_ptr<model::OperationIndexMap<int64_t>> _indexed_ranks;
+ model::OperationIndexMap<int64_t> _ops_eft;
+ std::multimap<int64_t, model::OperationIndex, std::greater<int64_t>> _rank_to_op;
+ std::shared_ptr<model::OperationIndexMap<int64_t>> _op_to_rank;
std::unique_ptr<compiler::BackendResolver> _backend_resolver;
std::unique_ptr<backend::ExecTime> _exec_time;
const graph::Graph *_graph{nullptr};