#include "util/ConfigSource.h"
#include "compiler/IExecutionBuilder.h"
#include "compiler/BackendResolver.h"
-#include "backend/IConfig.h"
#include "backend/IShapeFixer.h"
#include "util/logging.h"
#include "util/Utils.h"
return std::move(_backend_resolver);
}
-int64_t Scheduler::getTime(const backend::Backend *backend, const std::string &operation,
- bool quant, uint32_t size)
+int64_t Scheduler::getOpTime(const backend::Backend *backend, const std::string &operation,
+ bool quant, uint32_t size)
{
const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
if (time != _exec_time->NOT_FOUND)
- {
return time;
- };
- return _run_cache.at(backend).at(operation);
+
+ return _is_supported.at(backend).at(operation) ? 1 : _exec_time->getMax();
+}
+
+int64_t Scheduler::getPermuteTime(const backend::Backend *src_backend,
+ const backend::Backend *dst_backend, bool quant, uint32_t size)
+{
+ const auto time = _exec_time->getPermuteTime(src_backend, dst_backend, quant, size);
+ if (time != _exec_time->NOT_FOUND)
+ return time;
+
+ // Makes the scheduler prefer keeping computations on one backend
+ return size / 200;
}
int64_t Scheduler::tryBackend(const model::Operation &node, const backend::Backend *backend)
{
- auto iter = _run_cache.find(backend);
- if (iter != _run_cache.end())
+ auto iter = _is_supported.find(backend);
+ if (iter != _is_supported.end())
{
auto it2 = iter->second.find(node.getName());
if (it2 != iter->second.end())
{
- return _run_cache[backend][node.getName()];
+ return _is_supported[backend][node.getName()] ? 1 : _exec_time->getMax();
}
}
try
{
_backend_resolver->getBackendContext(backend)->shape_fixer->fix(node);
- // always prefer the one, that is supported
- _run_cache[backend][node.getName()] = 1;
+
+ if (!util::getConfigBool(util::config::PROFILING_MODE))
+ throw std::runtime_error("You are trying to run heterogeneous scheduler with disabled "
+ "profiling mode, while there is no profiling information about some "
+ "nodes. Run scheduler with enabled profiling mode first.");
+
+ _is_supported[backend][node.getName()] = true;
}
catch (std::runtime_error &e)
{
- // Put to max so that during scheduling supported backend will be selected
- _run_cache[backend][node.getName()] = _exec_time->getMax();
+ _is_supported[backend][node.getName()] = false;
}
- return _run_cache[backend][node.getName()];
+ return _is_supported[backend][node.getName()] ? 1 : _exec_time->getMax();
}
void Scheduler::makeRank()
int64_t std = 0;
for (const auto backend : _all_backends)
{
- const auto exec_time = getTime(backend, node.getName(), quant, size);
+ const auto exec_time = getOpTime(backend, node.getName(), quant, size);
if (exec_time < _exec_time->getMax())
{
std += (exec_time - rank) * (exec_time - rank);
_exec_time->getPermuteTime(backend, other_backend, quant, operand.info().total_size());
if (transfer_cost == _exec_time->NOT_FOUND)
{
- // makes the scheduler prefer keeping computations on one backend
+ // Makes the scheduler prefer keeping computations on one backend
transfer_cost = operand.info().total_size() / 100;
- _run_cache[backend][other_backend->config()->id()] = transfer_cost;
}
avg_transfer_cost += transfer_cost;
}
return {_exec_time->getMax(), _exec_time->getMax()};
}
// get average exec time of the op on this backend
- auto exec_time = getTime(backend, node.getName(), quant, size);
+ auto exec_time = getOpTime(backend, node.getName(), quant, size);
if (backend->config()->id() == "cpu" && is_parallel_exec)
{
exec_time *= CPU_DELAY;
if (parent_backend != backend)
{
// Multiply operand size by 2 because size must describe input+output size
- int64_t transfer_cost = getTime(parent_backend, backend->config()->id(), quant,
- input_operand.info().total_size() * 2);
+ int64_t transfer_cost =
+ getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
}
}
* @param[in] backend_resolver backend resolver
*/
Scheduler(const neurun::model::Operands &operands, std::vector<const backend::Backend *> backends)
- : _run_cache{}, _backends_avail_time{}, _ops_eft{},
+ : _is_supported{}, _backends_avail_time{}, _ops_eft{},
_op_to_rank{std::make_shared<model::OperationIndexMap<int64_t>>()},
_all_backends(std::move(backends))
{
int64_t backendAvailableTime(const backend::Backend *backend, const int64_t &starting_time,
const int64_t &time_amount);
- int64_t getTime(const backend::Backend *backend, const std::string &operation, bool quant,
- uint32_t size);
+ int64_t getOpTime(const backend::Backend *backend, const std::string &operation, bool quant,
+ uint32_t size);
+
+ int64_t getPermuteTime(const backend::Backend *src_backend, const backend::Backend *dst_backend,
+ bool quant, uint32_t size);
void scheduleShufflingBackends();
int64_t tryBackend(const model::Operation &node, const backend::Backend *backend);
private:
- std::unordered_map<const backend::Backend *, std::unordered_map<std::string, int>> _run_cache;
+ // This variable stores backend/node pairs with unknown execution time, and hints scheduler
+ // whether it should assign these backends to these nodes:
+ // * It stores false for unsupported nodes
+ // * During rank calculation with enabled profiling mode it stores true for supported nodes
+ std::unordered_map<const backend::Backend *, std::unordered_map<std::string, bool>> _is_supported;
// Finishing and starting time of each backend
std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
model::OperationIndexMap<int64_t> _ops_eft;