runtime/onert/core/src/compiler/ExecutorFactory.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "ExecutorFactory.h"
  18
  19 #include <deque>
  20 #include <functional>
  21 #include "exec/ExecutionObservers.h"
  22 #include "exec/LinearExecutor.h"
  23 #include "exec/DataflowExecutor.h"
  24 #include "exec/ParallelExecutor.h"
  25 #include "compiler/BackendManager.h"
  26 #include "compiler/ExecutionBuilder.h"
  27 #include "exec/ExecTime.h"
  28 #include "compiler/Linear.h"
  29 #include "backend/IPortableTensor.h"
  30 #include "backend/controlflow/Config.h"
  31 #include "backend/controlflow/KernelGenerator.h"
  32 #include "backend/controlflow/UserTensor.h"
  33 #include "backend/controlflow/TensorBuilder.h"
  34 #include "util/TracingCtx.h"
  35
  36 #include <memory>
  37
  38 namespace onert
  39 {
  40 namespace
  41 {
  42
  43 class SyncFunction final : public exec::IFunction
  44 {
  45 public:
  46   virtual ~SyncFunction() = default;
  47   SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
  48       : _fn{std::move(fn)}, _config{config}
  49   {
  50     assert(_fn);
  51     assert(_config);
  52   }
  53
  54   void run() override
  55   {
  56     _fn->run();
  57     _config->sync();
  58   }
  59
  60   void prepare() override { _fn->prepare(); }
  61
  62 private:
  63   std::unique_ptr<exec::IFunction> _fn;
  64   std::shared_ptr<backend::IConfig> _config;
  65 };
  66
  67 void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
  68                                  const ir::OperandIndexSequence &indices)
  69 {
  70   // TODO Store controlflow backend in BackendContext
  71   std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
  72   for (const auto &e : lowered_graph.backend_contexts())
  73   {
  74     auto backend = e.first;
  75     auto &context = e.second;
  76     if (backend->config()->id() == backend::controlflow::Config::ID)
  77     {
  78       cf_tensor_reg =
  79           std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
  80     }
  81   }
  82   assert(cf_tensor_reg);
  83
  84   for (auto ind : indices)
  85   {
  86     const auto &operand = lowered_graph.graph().operands().at(ind);
  87     auto tensor = std::make_unique<backend::controlflow::IOTensor>(
  88         operand.info(),
  89         ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
  90         );
  91
  92     // Add tensor to controlflow TensorRegistry.
  93     cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
  94   }
  95 }
  96
  97 } // namespace
  98 } // namespace onert
  99
 100 namespace onert
 101 {
 102 namespace compiler
 103 {
 104
 105 ExecutorFactory &ExecutorFactory::get()
 106 {
 107   static ExecutorFactory singleton;
 108   return singleton;
 109 }
 110
 111 ExecutorFactory::ExecutorFactory()
 112 {
 113   _map["Linear"] = createLinearExecutor;
 114   _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
 115                                std::placeholders::_3, false);
 116   _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
 117                                std::placeholders::_3, true);
 118 }
 119
 120 exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
 121                                          const compiler::CompilerOptions &options,
 122                                          const std::shared_ptr<exec::ExecutorMap> &executor_map)
 123 {
 124   return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
 125 }
 126
 127 void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
 128 {
 129   struct Entry
 130   {
 131     std::vector<backend::BackendContext::OperationInfo> operation_list;
 132     std::vector<ir::OperandIndex> operand_list;
 133   };
 134   std::unordered_map<const backend::Backend *, Entry> backend_assets;
 135
 136   // Build lists for operations
 137   lowered_graph->op_seqs().iterate(
 138       [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
 139         auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
 140         auto backend = op_seq_li.at(op_seq_index)->backend();
 141         for (auto &operation_idx : op_seq.operations())
 142         {
 143           backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
 144         }
 145       });
 146
 147   // Build lists for operands
 148   lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
 149     const auto lower_info = lowered_graph->getLowerInfo(ind);
 150     for (auto factor : lower_info->def_factors())
 151     {
 152       auto backend = factor.backend();
 153       backend_assets[backend].operand_list.emplace_back(ind);
 154     }
 155   });
 156
 157   for (auto &pair : backend_assets)
 158   {
 159     auto backend = pair.first;
 160     auto &arg = pair.second;
 161     lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
 162   }
 163 }
 164
 165 void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
 166 {
 167   TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
 168
 169   lowered_graph.op_seqs().iterate(
 170       [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
 171         auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
 172         auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
 173         for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
 174                             ir::Remove::UNDEFINED)
 175         {
 176           // If an OpSequence input/output tensor does not have a own tensor object,
 177           // it must be using migrant tensors, so find the tensor from other tensor builders and
 178           // set the tensor to this tensor builder if portable
 179           if (!backend_ctx->tensor_registry->getITensor(ind))
 180           {
 181             auto tensor = tensor_regs.getITensor(ind);
 182             assert(tensor); // The tensor must have been registered
 183             auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
 184             if (ptensor)
 185               backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
 186           }
 187         }
 188       });
 189 }
 190
 191 exec::IExecutor *
 192 ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
 193                                       const compiler::CompilerOptions &options,
 194                                       const std::shared_ptr<exec::ExecutorMap> &executor_map)
 195 {
 196   const auto &backend_contexts = lowered_graph->backend_contexts();
 197
 198   initializeBackendContext(lowered_graph.get());
 199
 200   TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
 201
 202   assert(!lowered_graph->graph().isBuildingPhase());
 203
 204   initializeSubgraphIOTensors(
 205       *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
 206                           ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
 207
 208   // linearize
 209   auto order = Linear::linearize(*lowered_graph);
 210   Linear::dump(*lowered_graph, order);
 211
 212   for (auto &pair : backend_contexts)
 213   {
 214     pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
 215   }
 216
 217   prepareMigrantTensors(*lowered_graph);
 218
 219   // Give some runtime objects to controlflow KernelGenerator
 220   for (auto &pair : backend_contexts)
 221   {
 222     auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
 223     if (cf_context != nullptr)
 224     {
 225       auto cf_kernel_gen = cf_context->kernel_gen;
 226       cf_kernel_gen->setTensorRegistries(tensor_regs);
 227       cf_kernel_gen->setExecutorMap(executor_map);
 228     }
 229   }
 230
 231   ExecutionBuilder builder;
 232
 233   // Adjust the order of backends for the upcoming iteration
 234   std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
 235   for (auto &pair : backend_contexts)
 236   {
 237     // NOTE controlflow backend must be processed lastly.
 238     // This is because of Permute layer's specialty which is the only operation that could have
 239     // different ITensor objects for the input and the output. And it requires all other backends'
 240     // tensors are ready to use.
 241     if (pair.first->config()->id() == "controlflow")
 242       ordered_contexts.emplace_back(pair.first, pair.second.get());
 243     else
 244       ordered_contexts.emplace_front(pair.first, pair.second.get());
 245   }
 246
 247   // Generate kernels
 248   for (auto &pair : ordered_contexts)
 249   {
 250     auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
 251     for (auto &pair : codes)
 252     {
 253       auto &op_seq_ind = pair.first;
 254       auto &fn_seq = pair.second;
 255       auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
 256       auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
 257       if (options.he_profiling_mode)
 258         fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
 259       builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
 260     }
 261   }
 262
 263   auto code_map = builder.releaseCodeMap();
 264
 265   auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
 266                                        order, options.tracing_ctx};
 267
 268   if (!options.trace_filepath.empty())
 269   {
 270     std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
 271         options.trace_filepath, exec->graph(), options.tracing_ctx);
 272     exec->addObserver(std::move(ctp));
 273   }
 274
 275   return exec;
 276 }
 277
 278 exec::IExecutor *ExecutorFactory::createDataflowExecutor(
 279     std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
 280     const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
 281 {
 282   const auto &backend_contexts = lowered_graph->backend_contexts();
 283
 284   initializeBackendContext(lowered_graph.get());
 285
 286   TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
 287
 288   assert(!lowered_graph->graph().isBuildingPhase());
 289
 290   initializeSubgraphIOTensors(
 291       *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
 292                           ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
 293
 294   // linearize
 295   // This order is just for giving topological order info to the backens
 296   // TODO When we pass a partial graph to a backend, we can remove this
 297   auto order = Linear::linearize(*lowered_graph);
 298   for (auto &pair : backend_contexts)
 299   {
 300     pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
 301   }
 302
 303   prepareMigrantTensors(*lowered_graph);
 304
 305   // Give some runtime objects to controlflow KernelGenerator
 306   for (auto &pair : backend_contexts)
 307   {
 308     auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
 309     if (cf_context != nullptr)
 310     {
 311       auto cf_kernel_gen = cf_context->kernel_gen;
 312       cf_kernel_gen->setTensorRegistries(tensor_regs);
 313       cf_kernel_gen->setExecutorMap(executor_map);
 314     }
 315   }
 316
 317   ExecutionBuilder builder;
 318
 319   // Adjust the order of backends for the upcoming iteration
 320   std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
 321   for (auto &pair : backend_contexts)
 322   {
 323     // NOTE controlflow backend must be processed lastly.
 324     // This is because of Permute layer's specialty which is the only operation that could have
 325     // different ITensor objects for the input and the output. And it requires all other backends'
 326     // tensors are ready to use.
 327     if (pair.first->config()->id() == "controlflow")
 328       ordered_contexts.emplace_back(pair.first, pair.second.get());
 329     else
 330       ordered_contexts.emplace_front(pair.first, pair.second.get());
 331   }
 332
 333   // Generate kernels
 334   for (auto &pair : ordered_contexts)
 335   {
 336     auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
 337     for (auto &pair : codes)
 338     {
 339       auto &op_seq_ind = pair.first;
 340       auto &fn_seq = pair.second;
 341       auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
 342       auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
 343       if (options.he_profiling_mode)
 344         fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
 345       builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
 346     }
 347   }
 348
 349   auto code_map = builder.releaseCodeMap();
 350
 351   exec::ExecutorBase *exec = nullptr;
 352   if (parallel)
 353   {
 354     exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
 355                                       options.tracing_ctx};
 356   }
 357   else
 358   {
 359     auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs,
 360                                                     std::move(code_map), options.tracing_ctx};
 361     if (options.he_profiling_mode)
 362     {
 363       std::vector<const backend::Backend *> backends;
 364       for (const auto &pair : backend_contexts)
 365       {
 366         backends.push_back(pair.first);
 367       }
 368       auto et = std::make_shared<exec::ExecTime>(backends);
 369       std::unique_ptr<exec::IExecutionObserver> obs =
 370           std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
 371       dataflow_exec->addObserver(std::move(obs));
 372     }
 373     exec = dataflow_exec;
 374   }
 375
 376   if (!options.trace_filepath.empty())
 377   {
 378     std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
 379         options.trace_filepath, exec->graph(), options.tracing_ctx);
 380     exec->addObserver(std::move(ctp));
 381   }
 382
 383   return exec;
 384 }
 385
 386 } // namespace compiler
 387 } // namespace onert