return false;
}
+// if a node can be merged into subgraph
+static bool isMergable(const graph::Graph &graph, const model::Operation &node)
+{
+ size_t prev_op_cnt = 0;
+ for (const auto &input : node.getInputs())
+ {
+ // only valid_inputs
+ const auto &operand = graph.operands().at(input);
+ if (operand.isConstant())
+ continue;
+
+ // This operand is input of operation, not weight or bias
+ if (operand.getDef().list().size() > 0)
+ ++prev_op_cnt;
+
+ // Current node has multiple inputs as concat or at the beginning of the separated branch
+ if (prev_op_cnt > 1 || operand.getUses().list().size() > 1)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
void Scheduler::scheduleShufflingBackends()
{
const auto all_backends = _backend_resolver->getAllBackends();
// finishing time of an op, that will come before current op
auto prev_op_ft = starting_time;
// until reach the "hole/gap", that is enough to run this op
- while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft < time_amount)
+ while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft <= time_amount)
{
- prev_op_ft = next_op_fst->first;
+ prev_op_ft = next_op_fst->first + 1;
++next_op_fst;
}
return prev_op_ft;
std::multimap<int64_t, int64_t> &transfer_st_exec_time)
{
const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
+ // Permutation will cause creating a seperate subgraph that contains just this permutation node.
+ // This isn't needed for Linear executor since it doesn't use subgraphs
+ // Number 1 ms is picked experimentally
+ int64_t permute_fine = 1000;
// Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
// permutation on other branches or non-nnfw specific tasks and have to wait for it.
// Number 2 is picked experimentally
const auto &node = _graph->operations().at(index);
const bool quant = isQuant(*_graph, node);
const auto size = getOperationsFlattenedIOSize(*_graph, node);
+ // if this node can be part of a subgraph, then assigning different backend will cause creating
+ // another subgraph
+ if (isMergable(*_graph, node))
+ {
+ permute_fine *= 2;
+ }
if (isWorkaroundSkip(*_graph, backend, node, quant))
{
return {_exec_time->getMax(), _exec_time->getMax()};
}
// get max eft of direct (one lavel above) predecessors
- auto max_pred_eft = predMaxEFT(backend, node, quant, transfer_st_exec_time);
+ auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
int64_t total_transfer_cost = 0;
std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
if (!is_linear_exec)
{
it.second *= CPU_DELAY;
+ it.second += permute_fine;
}
total_transfer_cost += it.second;
// In case linear executor measure just exec time and data transfer time
if (is_linear_exec)
{
- VERBOSE(Scheduler::scheduleNode)
+ VERBOSE(Scheduler::ESTAndExecTime)
<< "exec_time of (" << index.value() << ") " << node.getName() << " quant==" << quant
<< " on " << backend->config()->id() << " is " << exec_time
<< " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+
return {total_transfer_cost, exec_time};
}
- VERBOSE(Scheduler::scheduleNode) << "exec_time of (" << index.value() << ") " << node.getName()
- << " quant==" << quant << " on " << backend->config()->id()
- << ": " << exec_time
- << " microseconds. Backend available time: " << prev_op_ft
- << " Parent's max eft: " << max_pred_eft - total_transfer_cost
- << " data transfer cost: " << total_transfer_cost << std::endl;
+ VERBOSE(Scheduler::ESTAndExecTime) << "exec_time of (" << index.value() << ") " << node.getName()
+ << " quant==" << quant << " on " << backend->config()->id()
+ << ": " << exec_time
+ << " microseconds. Backend available time: " << prev_op_ft
+ << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+ << " data transfer cost: " << total_transfer_cost << std::endl;
return {prev_op_ft, exec_time};
}
int64_t Scheduler::predMaxEFT(const backend::Backend *backend, const model::Operation &node,
- bool quant, std::multimap<int64_t, int64_t> &transfer_st_exec_time)
+ std::multimap<int64_t, int64_t> &transfer_st_exec_time)
{
int64_t max_pred_eft = 0;
for (const auto &input : node.getInputs())
{
const auto &operand = _graph->operands().at(input);
+ const bool quant = operand.typeInfo().type() == model::DataType::QUANT8_ASYMM;
// operations, whose output is current node's this input operand
for (const auto &defs : operand.getDef().list())
{