return true;
}
+void HEScheduler::scheduleBranch(const model::OperationIndex &index,
+ model::OperationIndexMap<bool> &scheduled)
+{
+ auto loc_index = index;
+ const backend::Backend *parent_backend = nullptr;
+ while (true)
+ {
+ if (scheduled[loc_index])
+ {
+ return;
+ }
+ if (!scheduleNode(loc_index, parent_backend))
+ {
+ return;
+ }
+ scheduled[loc_index] = true;
+ parent_backend = _backend_resolver->getBackend(loc_index);
+
+ const auto &node = _graph->operations().at(loc_index);
+ model::OperandIndex tmp;
+ /* get the only output operand, that is input of the next single operation
+ * and just this nodes output.*/
+ if (node.getOutputs().size() != 1)
+ {
+ return;
+ }
+ const auto &only_out_operand = _graph->operands().at(*node.getOutputs().begin());
+ loc_index = only_out_operand.getUses().list().front();
+ /* verify, that next node is neither beginning nor ending node of a branch*/
+ const auto &next_node = _graph->operations().at(loc_index);
+ if (!isMergable(*_graph, next_node))
+ {
+ return;
+ }
+ }
+}
+
std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const graph::Graph &graph)
{
_graph = &graph;
}
}
+ model::OperationIndexMap<bool> visited;
+ graph.operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
+ visited[index] = false;
+ });
// for each task select the backend with the smallest earliest finishing time(eft)
for (const auto &rank : _rank_to_op)
{
- scheduleNode(rank.second);
+ scheduleBranch(rank.second, visited);
}
VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
return std::move(_backend_resolver);
return prev_op_ft;
}
-void HEScheduler::scheduleNode(const model::OperationIndex &index)
+bool HEScheduler::scheduleNode(const model::OperationIndex &index,
+ const backend::Backend *parent_backend)
{
VERBOSE(HEScheduler::scheduleNode) << "scheduling (" << index.value() << ")" << std::endl;
int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
throw std::runtime_error{"Fail to choose backend on scheduler"};
}
+ // this is part of a branch and it is assigned another backend
+ if (parent_backend && parent_backend != chosen_backend)
+ {
+ return false;
+ }
for (const auto &it : selected_transfer_st_exec_time)
{
auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
VERBOSE(HEScheduler::scheduleNode) << "backend for " << node.getName() << " is "
<< chosen_backend->config()->id() << ". Its eft: " << eft
<< std::endl;
+ return true;
}
std::pair<int64_t, int64_t>
private:
bool isNodeProfiled(const model::Operation &);
- void scheduleNode(const model::OperationIndex &);
+ bool scheduleNode(const model::OperationIndex &, const backend::Backend *parent_backend);
/**
* @brief Get earliest starting time and execution time of an operation on a backend.
*
int64_t tryBackend(const model::Operation &node, const backend::Backend *backend);
+ /**
+ * @brief Schedule a node and its successor until:
+ * 1. there is no branching or connection of multiple branches
+ * 2. for subsequent nodes: other than predecessor's backend is prefered
+ *
+ * @param[in] index: index of an operation
+ * @param[in] scheduled: a map to check if this node has already been scheduled
+ *
+ * @return N/A
+ */
+ void scheduleBranch(const model::OperationIndex &index,
+ model::OperationIndexMap<bool> &scheduled);
+
private:
// This variable stores backend/node pairs with unknown execution time, and hints scheduler
// whether it should assign these backends to these nodes:
{
// Increase execution time for GPU backend
ExecTime et(_mock_backends);
- /* for parallel executor: set a time, that is larger than branches_cnt*npu_exec_time
- so that npu is prefered: the i+1 level node of the first branch will wait for npu
- until it finishes the i-th nodes of all other branches in BFS order*/
- setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 2 + 1);
- /* for parallel executor: set ET of FC larger than Mul's to be determinant:
- if they are equal and scheduling is done in order mul1->FC1->FC2->mul2,
- then for mul2 gpu is selected since NPU_ET*3 > GPU_ET(which is NPU_ET * 2 + 1)*/
- setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 2 + 2);
+ /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
+ * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
+ * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
+ * branching or scheduler assigns another backend to a node*/
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
et.uploadOperationsExecTime();
// Test scheduler