2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "ExecutorFactory.h"
21 #include "exec/ExecutionObservers.h"
22 #include "exec/LinearExecutor.h"
23 #include "exec/DataflowExecutor.h"
24 #include "exec/ParallelExecutor.h"
25 #include "compiler/BackendManager.h"
26 #include "compiler/ExecutionBuilder.h"
27 #include "exec/ExecTime.h"
28 #include "compiler/Linear.h"
29 #include "backend/IPortableTensor.h"
30 #include "backend/controlflow/Config.h"
31 #include "backend/controlflow/KernelGenerator.h"
32 #include "backend/controlflow/UserTensor.h"
33 #include "backend/controlflow/TensorBuilder.h"
34 #include "util/TracingCtx.h"
43 class SyncFunction final : public exec::IFunction
46 virtual ~SyncFunction() = default;
47 SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
48 : _fn{std::move(fn)}, _config{config}
60 void prepare() override { _fn->prepare(); }
63 std::unique_ptr<exec::IFunction> _fn;
64 std::shared_ptr<backend::IConfig> _config;
67 void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
68 const ir::OperandIndexSequence &indices)
70 // TODO Store controlflow backend in BackendContext
71 std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
72 for (const auto &e : lowered_graph.backend_contexts())
74 auto backend = e.first;
75 auto &context = e.second;
76 if (backend->config()->id() == backend::controlflow::Config::ID)
79 std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
82 assert(cf_tensor_reg);
84 for (auto ind : indices)
86 const auto &operand = lowered_graph.graph().operands().at(ind);
87 auto tensor = std::make_unique<backend::controlflow::IOTensor>(
89 ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
92 // Add tensor to controlflow TensorRegistry.
93 cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
105 ExecutorFactory &ExecutorFactory::get()
107 static ExecutorFactory singleton;
111 ExecutorFactory::ExecutorFactory()
113 _map["Linear"] = createLinearExecutor;
114 _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
115 std::placeholders::_3, false);
116 _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
117 std::placeholders::_3, true);
120 exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
121 const compiler::CompilerOptions &options,
122 const std::shared_ptr<exec::ExecutorMap> &executor_map)
124 return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
127 void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
131 std::vector<backend::BackendContext::OperationInfo> operation_list;
132 std::vector<ir::OperandIndex> operand_list;
134 std::unordered_map<const backend::Backend *, Entry> backend_assets;
136 // Build lists for operations
137 lowered_graph->op_seqs().iterate(
138 [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
139 auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
140 auto backend = op_seq_li.at(op_seq_index)->backend();
141 for (auto &operation_idx : op_seq.operations())
143 backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
147 // Build lists for operands
148 lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
149 const auto lower_info = lowered_graph->getLowerInfo(ind);
150 for (auto factor : lower_info->def_factors())
152 auto backend = factor.backend();
153 backend_assets[backend].operand_list.emplace_back(ind);
157 for (auto &pair : backend_assets)
159 auto backend = pair.first;
160 auto &arg = pair.second;
161 lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
165 void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
167 TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
169 lowered_graph.op_seqs().iterate(
170 [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
171 auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
172 auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
173 for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
174 ir::Remove::UNDEFINED)
176 // If an OpSequence input/output tensor does not have a own tensor object,
177 // it must be using migrant tensors, so find the tensor from other tensor builders and
178 // set the tensor to this tensor builder if portable
179 if (!backend_ctx->tensor_registry->getITensor(ind))
181 auto tensor = tensor_regs.getITensor(ind);
182 assert(tensor); // The tensor must have been registered
183 auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
185 backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
192 ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
193 const compiler::CompilerOptions &options,
194 const std::shared_ptr<exec::ExecutorMap> &executor_map)
196 const auto &backend_contexts = lowered_graph->backend_contexts();
198 initializeBackendContext(lowered_graph.get());
200 TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
202 assert(!lowered_graph->graph().isBuildingPhase());
204 initializeSubgraphIOTensors(
205 *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
206 ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
209 auto order = Linear::linearize(*lowered_graph);
210 Linear::dump(*lowered_graph, order);
212 for (auto &pair : backend_contexts)
214 pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
217 prepareMigrantTensors(*lowered_graph);
219 // Give some runtime objects to controlflow KernelGenerator
220 for (auto &pair : backend_contexts)
222 auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
223 if (cf_context != nullptr)
225 auto cf_kernel_gen = cf_context->kernel_gen;
226 cf_kernel_gen->setTensorRegistries(tensor_regs);
227 cf_kernel_gen->setExecutorMap(executor_map);
231 ExecutionBuilder builder;
233 // Adjust the order of backends for the upcoming iteration
234 std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
235 for (auto &pair : backend_contexts)
237 // NOTE controlflow backend must be processed lastly.
238 // This is because of Permute layer's specialty which is the only operation that could have
239 // different ITensor objects for the input and the output. And it requires all other backends'
240 // tensors are ready to use.
241 if (pair.first->config()->id() == "controlflow")
242 ordered_contexts.emplace_back(pair.first, pair.second.get());
244 ordered_contexts.emplace_front(pair.first, pair.second.get());
248 for (auto &pair : ordered_contexts)
250 auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
251 for (auto &pair : codes)
253 auto &op_seq_ind = pair.first;
254 auto &fn_seq = pair.second;
255 auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
256 auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
257 if (options.he_profiling_mode)
258 fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
259 builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
263 auto code_map = builder.releaseCodeMap();
265 auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
266 order, options.tracing_ctx};
268 if (!options.trace_filepath.empty())
270 std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
271 options.trace_filepath, exec->graph(), options.tracing_ctx);
272 exec->addObserver(std::move(ctp));
278 exec::IExecutor *ExecutorFactory::createDataflowExecutor(
279 std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
280 const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
282 const auto &backend_contexts = lowered_graph->backend_contexts();
284 initializeBackendContext(lowered_graph.get());
286 TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
288 assert(!lowered_graph->graph().isBuildingPhase());
290 initializeSubgraphIOTensors(
291 *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
292 ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
295 // This order is just for giving topological order info to the backens
296 // TODO When we pass a partial graph to a backend, we can remove this
297 auto order = Linear::linearize(*lowered_graph);
298 for (auto &pair : backend_contexts)
300 pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
303 prepareMigrantTensors(*lowered_graph);
305 // Give some runtime objects to controlflow KernelGenerator
306 for (auto &pair : backend_contexts)
308 auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
309 if (cf_context != nullptr)
311 auto cf_kernel_gen = cf_context->kernel_gen;
312 cf_kernel_gen->setTensorRegistries(tensor_regs);
313 cf_kernel_gen->setExecutorMap(executor_map);
317 ExecutionBuilder builder;
319 // Adjust the order of backends for the upcoming iteration
320 std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
321 for (auto &pair : backend_contexts)
323 // NOTE controlflow backend must be processed lastly.
324 // This is because of Permute layer's specialty which is the only operation that could have
325 // different ITensor objects for the input and the output. And it requires all other backends'
326 // tensors are ready to use.
327 if (pair.first->config()->id() == "controlflow")
328 ordered_contexts.emplace_back(pair.first, pair.second.get());
330 ordered_contexts.emplace_front(pair.first, pair.second.get());
334 for (auto &pair : ordered_contexts)
336 auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
337 for (auto &pair : codes)
339 auto &op_seq_ind = pair.first;
340 auto &fn_seq = pair.second;
341 auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
342 auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
343 if (options.he_profiling_mode)
344 fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
345 builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
349 auto code_map = builder.releaseCodeMap();
351 exec::ExecutorBase *exec = nullptr;
354 exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
355 options.tracing_ctx};
359 auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs,
360 std::move(code_map), options.tracing_ctx};
361 if (options.he_profiling_mode)
363 std::vector<const backend::Backend *> backends;
364 for (const auto &pair : backend_contexts)
366 backends.push_back(pair.first);
368 auto et = std::make_shared<exec::ExecTime>(backends);
369 std::unique_ptr<exec::IExecutionObserver> obs =
370 std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
371 dataflow_exec->addObserver(std::move(obs));
373 exec = dataflow_exec;
376 if (!options.trace_filepath.empty())
378 std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
379 options.trace_filepath, exec->graph(), options.tracing_ctx);
380 exec->addObserver(std::move(ctp));
386 } // namespace compiler