Imported Upstream version 1.12.0
[platform/core/ml/nnfw.git] / runtime / onert / core / src / compiler / ExecutorFactory.cc
1 /*
2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "ExecutorFactory.h"
18
19 #include <deque>
20 #include <functional>
21 #include "exec/ExecutionObservers.h"
22 #include "exec/LinearExecutor.h"
23 #include "exec/DataflowExecutor.h"
24 #include "exec/ParallelExecutor.h"
25 #include "compiler/BackendManager.h"
26 #include "compiler/ExecutionBuilder.h"
27 #include "exec/ExecTime.h"
28 #include "compiler/Linear.h"
29 #include "backend/IPortableTensor.h"
30 #include "backend/controlflow/Config.h"
31 #include "backend/controlflow/KernelGenerator.h"
32 #include "backend/controlflow/UserTensor.h"
33 #include "backend/controlflow/TensorBuilder.h"
34 #include "util/TracingCtx.h"
35
36 #include <memory>
37
38 namespace onert
39 {
40 namespace
41 {
42
43 class SyncFunction final : public exec::IFunction
44 {
45 public:
46   virtual ~SyncFunction() = default;
47   SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
48       : _fn{std::move(fn)}, _config{config}
49   {
50     assert(_fn);
51     assert(_config);
52   }
53
54   void run() override
55   {
56     _fn->run();
57     _config->sync();
58   }
59
60   void prepare() override { _fn->prepare(); }
61
62 private:
63   std::unique_ptr<exec::IFunction> _fn;
64   std::shared_ptr<backend::IConfig> _config;
65 };
66
67 void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
68                                  const ir::OperandIndexSequence &indices)
69 {
70   // TODO Store controlflow backend in BackendContext
71   std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
72   for (const auto &e : lowered_graph.backend_contexts())
73   {
74     auto backend = e.first;
75     auto &context = e.second;
76     if (backend->config()->id() == backend::controlflow::Config::ID)
77     {
78       cf_tensor_reg =
79           std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
80     }
81   }
82   assert(cf_tensor_reg);
83
84   for (auto ind : indices)
85   {
86     const auto &operand = lowered_graph.graph().operands().at(ind);
87     auto tensor = std::make_unique<backend::controlflow::IOTensor>(
88         operand.info(),
89         ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
90         );
91
92     // Add tensor to controlflow TensorRegistry.
93     cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
94   }
95 }
96
97 } // namespace
98 } // namespace onert
99
100 namespace onert
101 {
102 namespace compiler
103 {
104
105 ExecutorFactory &ExecutorFactory::get()
106 {
107   static ExecutorFactory singleton;
108   return singleton;
109 }
110
111 ExecutorFactory::ExecutorFactory()
112 {
113   _map["Linear"] = createLinearExecutor;
114   _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
115                                std::placeholders::_3, false);
116   _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
117                                std::placeholders::_3, true);
118 }
119
120 exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
121                                          const compiler::CompilerOptions &options,
122                                          const std::shared_ptr<exec::ExecutorMap> &executor_map)
123 {
124   return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
125 }
126
127 void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
128 {
129   struct Entry
130   {
131     std::vector<backend::BackendContext::OperationInfo> operation_list;
132     std::vector<ir::OperandIndex> operand_list;
133   };
134   std::unordered_map<const backend::Backend *, Entry> backend_assets;
135
136   // Build lists for operations
137   lowered_graph->op_seqs().iterate(
138       [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
139         auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
140         auto backend = op_seq_li.at(op_seq_index)->backend();
141         for (auto &operation_idx : op_seq.operations())
142         {
143           backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
144         }
145       });
146
147   // Build lists for operands
148   lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
149     const auto lower_info = lowered_graph->getLowerInfo(ind);
150     for (auto factor : lower_info->def_factors())
151     {
152       auto backend = factor.backend();
153       backend_assets[backend].operand_list.emplace_back(ind);
154     }
155   });
156
157   for (auto &pair : backend_assets)
158   {
159     auto backend = pair.first;
160     auto &arg = pair.second;
161     lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
162   }
163 }
164
165 void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
166 {
167   TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
168
169   lowered_graph.op_seqs().iterate(
170       [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
171         auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
172         auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
173         for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
174                             ir::Remove::UNDEFINED)
175         {
176           // If an OpSequence input/output tensor does not have a own tensor object,
177           // it must be using migrant tensors, so find the tensor from other tensor builders and
178           // set the tensor to this tensor builder if portable
179           if (!backend_ctx->tensor_registry->getITensor(ind))
180           {
181             auto tensor = tensor_regs.getITensor(ind);
182             assert(tensor); // The tensor must have been registered
183             auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
184             if (ptensor)
185               backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
186           }
187         }
188       });
189 }
190
191 exec::IExecutor *
192 ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
193                                       const compiler::CompilerOptions &options,
194                                       const std::shared_ptr<exec::ExecutorMap> &executor_map)
195 {
196   const auto &backend_contexts = lowered_graph->backend_contexts();
197
198   initializeBackendContext(lowered_graph.get());
199
200   TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
201
202   assert(!lowered_graph->graph().isBuildingPhase());
203
204   initializeSubgraphIOTensors(
205       *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
206                           ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
207
208   // linearize
209   auto order = Linear::linearize(*lowered_graph);
210   Linear::dump(*lowered_graph, order);
211
212   for (auto &pair : backend_contexts)
213   {
214     pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
215   }
216
217   prepareMigrantTensors(*lowered_graph);
218
219   // Give some runtime objects to controlflow KernelGenerator
220   for (auto &pair : backend_contexts)
221   {
222     auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
223     if (cf_context != nullptr)
224     {
225       auto cf_kernel_gen = cf_context->kernel_gen;
226       cf_kernel_gen->setTensorRegistries(tensor_regs);
227       cf_kernel_gen->setExecutorMap(executor_map);
228     }
229   }
230
231   ExecutionBuilder builder;
232
233   // Adjust the order of backends for the upcoming iteration
234   std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
235   for (auto &pair : backend_contexts)
236   {
237     // NOTE controlflow backend must be processed lastly.
238     // This is because of Permute layer's specialty which is the only operation that could have
239     // different ITensor objects for the input and the output. And it requires all other backends'
240     // tensors are ready to use.
241     if (pair.first->config()->id() == "controlflow")
242       ordered_contexts.emplace_back(pair.first, pair.second.get());
243     else
244       ordered_contexts.emplace_front(pair.first, pair.second.get());
245   }
246
247   // Generate kernels
248   for (auto &pair : ordered_contexts)
249   {
250     auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
251     for (auto &pair : codes)
252     {
253       auto &op_seq_ind = pair.first;
254       auto &fn_seq = pair.second;
255       auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
256       auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
257       if (options.he_profiling_mode)
258         fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
259       builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
260     }
261   }
262
263   auto code_map = builder.releaseCodeMap();
264
265   auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
266                                        order, options.tracing_ctx};
267
268   if (!options.trace_filepath.empty())
269   {
270     std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
271         options.trace_filepath, exec->graph(), options.tracing_ctx);
272     exec->addObserver(std::move(ctp));
273   }
274
275   return exec;
276 }
277
278 exec::IExecutor *ExecutorFactory::createDataflowExecutor(
279     std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
280     const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
281 {
282   const auto &backend_contexts = lowered_graph->backend_contexts();
283
284   initializeBackendContext(lowered_graph.get());
285
286   TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
287
288   assert(!lowered_graph->graph().isBuildingPhase());
289
290   initializeSubgraphIOTensors(
291       *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
292                           ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
293
294   // linearize
295   // This order is just for giving topological order info to the backens
296   // TODO When we pass a partial graph to a backend, we can remove this
297   auto order = Linear::linearize(*lowered_graph);
298   for (auto &pair : backend_contexts)
299   {
300     pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
301   }
302
303   prepareMigrantTensors(*lowered_graph);
304
305   // Give some runtime objects to controlflow KernelGenerator
306   for (auto &pair : backend_contexts)
307   {
308     auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
309     if (cf_context != nullptr)
310     {
311       auto cf_kernel_gen = cf_context->kernel_gen;
312       cf_kernel_gen->setTensorRegistries(tensor_regs);
313       cf_kernel_gen->setExecutorMap(executor_map);
314     }
315   }
316
317   ExecutionBuilder builder;
318
319   // Adjust the order of backends for the upcoming iteration
320   std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
321   for (auto &pair : backend_contexts)
322   {
323     // NOTE controlflow backend must be processed lastly.
324     // This is because of Permute layer's specialty which is the only operation that could have
325     // different ITensor objects for the input and the output. And it requires all other backends'
326     // tensors are ready to use.
327     if (pair.first->config()->id() == "controlflow")
328       ordered_contexts.emplace_back(pair.first, pair.second.get());
329     else
330       ordered_contexts.emplace_front(pair.first, pair.second.get());
331   }
332
333   // Generate kernels
334   for (auto &pair : ordered_contexts)
335   {
336     auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
337     for (auto &pair : codes)
338     {
339       auto &op_seq_ind = pair.first;
340       auto &fn_seq = pair.second;
341       auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
342       auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
343       if (options.he_profiling_mode)
344         fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
345       builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
346     }
347   }
348
349   auto code_map = builder.releaseCodeMap();
350
351   exec::ExecutorBase *exec = nullptr;
352   if (parallel)
353   {
354     exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
355                                       options.tracing_ctx};
356   }
357   else
358   {
359     auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs,
360                                                     std::move(code_map), options.tracing_ctx};
361     if (options.he_profiling_mode)
362     {
363       std::vector<const backend::Backend *> backends;
364       for (const auto &pair : backend_contexts)
365       {
366         backends.push_back(pair.first);
367       }
368       auto et = std::make_shared<exec::ExecTime>(backends);
369       std::unique_ptr<exec::IExecutionObserver> obs =
370           std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
371       dataflow_exec->addObserver(std::move(obs));
372     }
373     exec = dataflow_exec;
374   }
375
376   if (!options.trace_filepath.empty())
377   {
378     std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
379         options.trace_filepath, exec->graph(), options.tracing_ctx);
380     exec->addObserver(std::move(ctp));
381   }
382
383   return exec;
384 }
385
386 } // namespace compiler
387 } // namespace onert