return false;
}
-void Scheduler::scheduleShufflingBackends(const graph::Graph &graph)
+void Scheduler::scheduleShufflingBackends()
{
const auto all_backends = _backend_resolver->getAllBackends();
VERBOSE(Scheduler::scheduleNode)
for (const auto &rank : _ranks)
{
VERBOSE(Scheduler::scheduleNode) << "scheduling (" << rank.second.value() << ")" << std::endl;
- const auto &node = graph.operations().at(rank.second);
- bool quant = isQuant(graph, node);
- auto size = getOperationsFlattenedIOSize(graph, node);
+ const auto &node = _graph->operations().at(rank.second);
+ const bool quant = isQuant(*_graph, node);
+ const auto size = getOperationsFlattenedIOSize(*_graph, node);
for (size_t i = 0;; ++i)
{
if (i == all_backends.size())
{
backend_ind = 0;
}
- if (isWorkaroundSkip(graph, all_backends[backend_ind], node, quant))
+ if (isWorkaroundSkip(*_graph, all_backends[backend_ind], node, quant))
{
++backend_ind;
continue;
}
- auto exec_time =
+ const auto exec_time =
_exec_time->getOperationExecTime(all_backends[backend_ind], node.getName(), quant, size);
// Scheduling to measure data transfer must be done after measuring all backends seperately
assert(exec_time != _exec_time->NOT_FOUND);
std::unique_ptr<compiler::BackendResolver> Scheduler::schedule(const graph::Graph &graph)
{
+ _graph = &graph;
VERBOSE(Scheduler::schedule) << "task scheduling started" << std::endl;
// Make ranks and save in descending order
- makeRank(graph);
+ makeRank();
const auto all_backends = _backend_resolver->getAllBackends();
for (const auto *backend : all_backends)
_backends_avail_time.emplace(backend, std::map<int64_t, int64_t>{{0, 0}});
}
- bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
+ const bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
if (is_profiling)
{
bool profile_data_transfer = true;
- const auto &node = graph.operations().at(_ranks.begin()->second);
- bool quant = isQuant(graph, node);
- auto size = getOperationsFlattenedIOSize(graph, node);
+ const auto &node = _graph->operations().at(_ranks.begin()->second);
+ const bool quant = isQuant(*_graph, node);
+ const auto size = getOperationsFlattenedIOSize(*_graph, node);
for (const auto *backend : all_backends)
{
- auto exec_time = _exec_time->getOperationExecTime(backend, node.getName(), quant, size);
+ const auto exec_time = _exec_time->getOperationExecTime(backend, node.getName(), quant, size);
if (exec_time == _exec_time->NOT_FOUND)
{
profile_data_transfer = false;
}
if (profile_data_transfer)
{
- scheduleShufflingBackends(graph);
+ scheduleShufflingBackends();
VERBOSE(Scheduler::schedule) << "task scheduling finished" << std::endl;
return nnfw::cpp14::make_unique<BackendResolver>(*_backend_resolver);
}
// for each task select the backend with the smallest earliest finishing time(eft)
for (const auto &rank : _ranks)
{
- scheduleNode(graph, rank.second);
+ scheduleNode(rank.second);
}
VERBOSE(Scheduler::schedule) << "task scheduling finished" << std::endl;
return nnfw::cpp14::make_unique<BackendResolver>(*_backend_resolver);
int64_t Scheduler::getTime(const backend::Backend *backend, const std::string &operation,
bool quant, uint32_t size)
{
- auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
+ const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
if (time != _exec_time->NOT_FOUND)
{
return time;
}
// TODO: Too large function: need code refactoring
-void Scheduler::makeRank(const graph::Graph &model)
+void Scheduler::makeRank()
{
VERBOSE(Scheduler::makeRank) << "task prioritizing" << std::endl;
std::unordered_map<model::OperationIndex, int64_t> calculated_rank;
const int NOT_SET = -1;
- model.operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
+ _graph->operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
calculated_rank[index] = NOT_SET;
});
[&](const model::OperationIndex &index, const model::Operation &node) -> int64_t {
int64_t rank = 0, max_child_rank = 0, std = 0;
- bool quant = isQuant(model, node);
+ const bool quant = isQuant(*_graph, node);
if (calculated_rank.at(index) >= 0)
return calculated_rank.at(index);
// get children's max rank
for (const auto &output : node.getOutputs())
{
- const auto &operand = model.operands().at(output);
+ const auto &operand = _graph->operands().at(output);
// avarage data transfer cost of this operand's data
int64_t avg_transfer_cost = 1;
for (const auto *backend : all_backends)
avg_transfer_cost /= all_backends.size();
for (const auto &use : operand.getUses().list())
{
- auto cur_child_rank = dfs_recursive(use, model.operations().at(use));
+ const auto cur_child_rank = dfs_recursive(use, _graph->operations().at(use));
max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
}
}
- auto size = getOperationsFlattenedIOSize(model, node);
+ const auto size = getOperationsFlattenedIOSize(*_graph, node);
auto all_backends_size = static_cast<int64_t>(all_backends.size());
// get average exec time of this op
for (const auto &backend : all_backends)
// get standard deviation
for (const auto backend : all_backends)
{
- auto exec_time = getTime(backend, node.getName(), quant, size);
+ const auto exec_time = getTime(backend, node.getName(), quant, size);
if (exec_time < _exec_time->getMax())
{
std += (exec_time - rank) * (exec_time - rank);
};
for (const auto &it : calculated_rank)
{
- const auto &node = model.operations().at(it.first);
+ const auto &node = _graph->operations().at(it.first);
dfs_recursive(it.first, node);
}
int64_t Scheduler::backendAvailableTime(const backend::Backend *backend,
const int64_t &starting_time, const int64_t &time_amount)
{
- auto backend_times = _backends_avail_time.at(backend);
+ const auto backend_times = _backends_avail_time.at(backend);
// finishing and starting times of an op, that will come after current op
auto next_op_fst = backend_times.upper_bound(starting_time);
// finishing time of an op, that will come before current op
return prev_op_ft;
}
-// TODO: Too large function: need code refactoring
-void Scheduler::scheduleNode(const graph::Graph &model, const model::OperationIndex &index)
+void Scheduler::scheduleNode(const model::OperationIndex &index)
{
- const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
- // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
- // permutation on other branches or non-nnfw specific tasks and have to wait for it.
- // Number 2 is picked experimentally
- const auto CPU_DELAY = 2;
VERBOSE(Scheduler::scheduleNode) << "scheduling (" << index.value() << ")" << std::endl;
+
const auto all_backends = _backend_resolver->getAllBackends();
int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
- const auto &node = model.operations().at(index);
- const auto &obj = model.operands().at(node.getInputs().at(0));
- bool quant = isQuant(model, node);
+ const auto &node = _graph->operations().at(index);
- auto size = getOperationsFlattenedIOSize(model, node);
- std::multimap<int64_t, int64_t> selected_permute_avail_time;
+ std::multimap<int64_t, int64_t> selected_transfer_st_exec_time;
// select the backend with the smallest eft of this task
const backend::Backend *chosen_backend = nullptr;
for (const auto *backend : all_backends)
{
- std::multimap<int64_t, int64_t> permute_avail_time;
- if (isWorkaroundSkip(model, backend, node, quant))
- {
- continue;
- }
- // get average exec time of the op on this backend
- auto exec_time = getTime(backend, node.getName(), quant, size);
- if (backend->config()->id() == "cpu" && !is_linear_exec)
- {
- exec_time *= CPU_DELAY;
- }
- // get max eft of direct (one lavel above) predecessors
- int64_t max_pred_eft = 0;
- int64_t total_transfer_cost = 0;
- for (const auto &input : node.getInputs())
- {
- const auto &operand = model.operands().at(input);
- // operations, whose output is current node's this input operand
- for (const auto &defs : operand.getDef().list())
- {
- // Data transfer cost from this parent's backend to current node's backend:
- auto parent_backend = _backend_resolver->getBackend(defs);
-
- if (parent_backend == backend)
- {
- max_pred_eft = std::max(max_pred_eft, _parents_eft.at(defs));
- }
- else
- {
- // Multiply obj size by 2 because size my discribe input+output size
- int64_t transfer_cost =
- getTime(parent_backend, backend->config()->id(), quant, obj.info().total_size() * 2);
- if (!is_linear_exec)
- {
- transfer_cost *= CPU_DELAY;
- }
- total_transfer_cost += transfer_cost;
- permute_avail_time.insert({_parents_eft.at(defs), transfer_cost});
- }
- }
- }
-
- std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
- // Find free time for data transfering and insert it into backend taskset. This is needed:
- // 1. Time for multiple permutations for this node's input is found correctly
- // 2. If backend==cpu, then free time for this node must come after permutations
- for (const auto &it : permute_avail_time)
- {
- auto prev_op_ft =
- backendAvailableTime(_backend_resolver->getBackend("cpu"), it.first, it.second);
- max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second);
-
- auto tmp = _backends_avail_time[_backend_resolver->getBackend("cpu")].insert(
- {prev_op_ft + it.second, prev_op_ft});
- inserted_permutations.push_back(tmp.first);
- }
- // find the hole/gap, where this op can be put or the finishing time of the last assigned op
- auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time);
+ std::multimap<int64_t, int64_t> transfer_st_exec_time;
+ const auto est_and_et = ESTAndExecTime(backend, index, transfer_st_exec_time);
- // Remove inserted permutation from cpu's task set
- for (const auto &it : inserted_permutations)
- {
- _backends_avail_time[_backend_resolver->getBackend("cpu")].erase(it);
- }
- // In case linear executor measure just exec time and data transfer time
- if (is_linear_exec)
- {
- prev_op_ft = total_transfer_cost;
- VERBOSE(Scheduler::scheduleNode)
- << "exec_time of (" << index.value() << ") " << node.getName() << " quant==" << quant
- << " on " << backend->config()->id() << " is " << exec_time
- << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
- }
- else
+ if (eft > est_and_et.first + est_and_et.second)
{
- VERBOSE(Scheduler::scheduleNode)
- << "exec_time of (" << index.value() << ") " << node.getName() << " quant==" << quant
- << " on " << backend->config()->id() << ": " << exec_time
- << " microseconds. Backend available time: " << prev_op_ft
- << " Parent's max eft: " << max_pred_eft - total_transfer_cost
- << " data transfer cost: " << total_transfer_cost << std::endl;
- }
- if (eft > prev_op_ft + exec_time)
- {
- eft = prev_op_ft + exec_time;
- selected_exec_time = exec_time;
+ eft = est_and_et.first + est_and_et.second;
+ selected_exec_time = est_and_et.second;
chosen_backend = backend;
- selected_permute_avail_time = permute_avail_time;
+ selected_transfer_st_exec_time = transfer_st_exec_time;
}
}
- for (const auto &it : selected_permute_avail_time)
+
+ for (const auto &it : selected_transfer_st_exec_time)
{
auto prev_op_ft =
backendAvailableTime(_backend_resolver->getBackend("cpu"), it.first, it.second);
_backends_avail_time[_backend_resolver->getBackend("cpu")].insert(
{prev_op_ft + it.second, prev_op_ft});
}
- VERBOSE(Scheduler::scheduleNode) << "backend for " << node.getName() << " is "
- << chosen_backend->config()->id() << ". Its eft: " << eft
- << std::endl;
_parents_eft[index] = eft;
_backends_avail_time[chosen_backend].insert({eft, eft - selected_exec_time});
_backend_resolver->setBackend(index, chosen_backend);
+
+ VERBOSE(Scheduler::scheduleNode) << "backend for " << node.getName() << " is "
+ << chosen_backend->config()->id() << ". Its eft: " << eft
+ << std::endl;
+}
+
+std::pair<int64_t, int64_t>
+Scheduler::ESTAndExecTime(const backend::Backend *backend, const model::OperationIndex &index,
+ std::multimap<int64_t, int64_t> &transfer_st_exec_time)
+{
+ const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
+ // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
+ // permutation on other branches or non-nnfw specific tasks and have to wait for it.
+ // Number 2 is picked experimentally
+ const int64_t CPU_DELAY = 2;
+ const auto &node = _graph->operations().at(index);
+ const bool quant = isQuant(*_graph, node);
+ const auto size = getOperationsFlattenedIOSize(*_graph, node);
+ if (isWorkaroundSkip(*_graph, backend, node, quant))
+ {
+ return {_exec_time->getMax(), _exec_time->getMax()};
+ }
+ // get average exec time of the op on this backend
+ auto exec_time = getTime(backend, node.getName(), quant, size);
+ if (backend->config()->id() == "cpu" && !is_linear_exec)
+ {
+ exec_time *= CPU_DELAY;
+ }
+
+ // get max eft of direct (one lavel above) predecessors
+ auto max_pred_eft = predMaxEFT(backend, node, quant, transfer_st_exec_time);
+
+ int64_t total_transfer_cost = 0;
+ std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
+ // Find free time for data transfering and insert it into backend taskset. This is needed:
+ // 1. Time for multiple permutations for this node's input is found correctly
+ // 2. If backend==cpu, then free time for this node must come after permutations
+ for (auto &it : transfer_st_exec_time)
+ {
+ if (!is_linear_exec)
+ {
+ it.second *= CPU_DELAY;
+ }
+ total_transfer_cost += it.second;
+
+ const auto prev_op_ft =
+ backendAvailableTime(_backend_resolver->getBackend("cpu"), it.first, it.second);
+
+ max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second);
+
+ const auto tmp = _backends_avail_time[_backend_resolver->getBackend("cpu")].insert(
+ {prev_op_ft + it.second, prev_op_ft});
+ inserted_permutations.push_back(tmp.first);
+ }
+ // find the hole/gap, where this op can be put or the finishing time of the last assigned op
+ auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time);
+
+ // Remove inserted permutation from cpu's task set
+ for (const auto &it : inserted_permutations)
+ {
+ _backends_avail_time[_backend_resolver->getBackend("cpu")].erase(it);
+ }
+
+ // In case linear executor measure just exec time and data transfer time
+ if (is_linear_exec)
+ {
+ VERBOSE(Scheduler::scheduleNode)
+ << "exec_time of (" << index.value() << ") " << node.getName() << " quant==" << quant
+ << " on " << backend->config()->id() << " is " << exec_time
+ << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+ return {total_transfer_cost, exec_time};
+ }
+ VERBOSE(Scheduler::scheduleNode) << "exec_time of (" << index.value() << ") " << node.getName()
+ << " quant==" << quant << " on " << backend->config()->id()
+ << ": " << exec_time
+ << " microseconds. Backend available time: " << prev_op_ft
+ << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+ << " data transfer cost: " << total_transfer_cost << std::endl;
+
+ return {prev_op_ft, exec_time};
+}
+
+int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Operation &node,
+ bool quant, std::multimap<int64_t, int64_t> &transfer_st_exec_time)
+{
+ int64_t max_pred_eft = 0;
+ for (const auto &input : node.getInputs())
+ {
+ const auto &operand = _graph->operands().at(input);
+ // operations, whose output is current node's this input operand
+ for (const auto &defs : operand.getDef().list())
+ {
+ // Data transfer cost from this parent's backend to current node's backend:
+ auto parent_backend = _backend_resolver->getBackend(defs);
+
+ max_pred_eft = std::max(max_pred_eft, _parents_eft.at(defs));
+ if (parent_backend != backend)
+ {
+ // Multiply operand size by 2 because size must discribe input+output size
+ int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant,
+ operand.info().total_size() * 2);
+ transfer_st_exec_time.insert({_parents_eft.at(defs), transfer_cost});
+ }
+ }
+ }
+ return max_pred_eft;
}
} // namespace compiler