runtime/onert/core/src/exec/ExecutorBase.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "ExecutorBase.h"
  18
  19 #include "ShapeConverter.h"
  20
  21 #include <misc/polymorphic_downcast.h>
  22
  23 namespace onert
  24 {
  25 namespace exec
  26 {
  27
  28 ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
  29                            backend::BackendContexts &&backend_contexts,
  30                            const compiler::TensorRegistries &tensor_regs,
  31                            const util::TracingCtx *tracing_ctx)
  32   : _lowered_graph{std::move(lowered_graph)},
  33     _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(),
  34     _tracing_ctx(tracing_ctx)
  35 {
  36   auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
  37     assert(tensors.empty());
  38     for (auto ind : ind_seq)
  39     {
  40       backend::ITensor *tensor = tensor_regs.getITensor(ind);
  41       assert(tensor != nullptr);
  42       auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor);
  43       tensors.push_back(io_tensor);
  44     }
  45   };
  46   build_tensor_list(_graph.getInputs(), _input_tensors);
  47   build_tensor_list(_graph.getOutputs(), _output_tensors);
  48 }
  49
  50 void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs,
  51                            const std::vector<backend::IPortableTensor *> &outputs)
  52 {
  53   // For thread-safe, use mutex
  54   // TODO: if all used backends on this executor are thread-safe,
  55   //       do not need to use mutex (otherwise, use mutex)
  56   // Deadlock occurs when an Executor is called recursively.
  57   std::lock_guard<std::mutex> lock(_mutex);
  58
  59   assert(inputs.size() == _graph.getInputs().size());
  60   assert(inputs.size() == _input_tensors.size());
  61   for (uint32_t n = 0; n < inputs.size(); ++n)
  62   {
  63     const auto input = inputs[n];
  64     assert(input->buffer() != nullptr);
  65     auto input_tensor = _input_tensors[n];
  66     assert(input_tensor != nullptr);
  67     if (input != nullptr)
  68     {
  69       const auto orig_input_shape = input_tensor->orig_info().shape();
  70       const auto changed_input_shape =
  71         convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
  72       if (input_tensor->get_info().shape() != changed_input_shape)
  73       {
  74         // TODO Fix this workaround that is introduced since cpu based kernels directly use `_info`
  75         // rather than interface methods to avoid virtual function calls.
  76         input_tensor->setShapeOfIPortableTensor(changed_input_shape);
  77       }
  78       if (orig_input_shape != changed_input_shape)
  79       {
  80         input_tensor->set_dynamic();
  81       }
  82     }
  83     input_tensor->setTensor(input);
  84   }
  85
  86   assert(outputs.size() == _graph.getOutputs().size());
  87   assert(outputs.size() == _output_tensors.size());
  88   for (uint32_t n = 0; n < outputs.size(); ++n)
  89   {
  90     const auto output = outputs[n];
  91     // assert(dst_tensor->buffer() != nullptr);
  92     auto output_tensor = _output_tensors[n];
  93     assert(output_tensor != nullptr);
  94     output_tensor->setTensor(output);
  95   }
  96
  97   executeImpl();
  98 }
  99
 100 void ExecutorBase::execute(const IODescription &desc)
 101 {
 102   // For thread-safe, use mutex
 103   // TODO: if all used backends on this executor are thread-safe,
 104   //       do not need to use mutex (otherwise, use mutex)
 105   std::lock_guard<std::mutex> lock(_mutex);
 106
 107   // Set input(s)
 108   assert(_input_tensors.size() == desc.inputs.size());
 109   for (uint32_t i = 0; i < _input_tensors.size(); ++i)
 110   {
 111     auto tensor = _input_tensors[i];
 112
 113     // TODO Check if (desc.inputs[i] == nullptr)
 114     // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
 115     tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
 116                           desc.inputs[i]->size);
 117
 118     auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
 119     if (input_shape != desc.dynamic_input_shapes.end())
 120     {
 121       tensor->set_dynamic();
 122       tensor->setShape(input_shape->second);
 123       /*
 124        * Changes tensor shape and allocate memory since its shape was changed
 125        * perhaps by nnfw_set_input_tensorinfo()
 126        *
 127        * Cases are:
 128        * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 129        *                                                 (a)          (b)
 130        *
 131        * at (a), operand is static, tensor is static - memory dealloc is not needed
 132        *   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
 133        * at (b), operand is static, tensor is dynamic - memory dealloc is needed
 134        *
 135        * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 136        *                                                  (a)          (b)
 137        *
 138        * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
 139        *                                       since it has not been allocated yet
 140        * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
 141        */
 142       tensor->applyShape(input_shape->second);
 143     }
 144   }
 145
 146   assert(_output_tensors.size() == desc.outputs.size());
 147   for (uint32_t i = 0; i < _output_tensors.size(); ++i)
 148   {
 149     auto tensor = _output_tensors[i];
 150
 151     if (desc.outputs[i] == nullptr)
 152       throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
 153     tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
 154     tensor->set_dynamic(); // It can't be resized but shape could change
 155   }
 156
 157   executeImpl();
 158
 159   // Update output(s) desc
 160   for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
 161   {
 162     ir::IOIndex output_index{n};
 163     // Optional output
 164     if (desc.outputs.at(n) == nullptr)
 165     {
 166       continue;
 167     }
 168     auto &output = *desc.outputs.at(n);
 169
 170     // set shape of outputDesc to tensor shape since tensor can be dynamic
 171     const auto output_tensor_shape = _output_tensors[n]->getShape();
 172     output.info.shape(
 173       convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
 174   }
 175 }
 176
 177 bool ExecutorBase::hasDynamicInput()
 178 {
 179   for (auto &&tensor : _input_tensors)
 180   {
 181     if (tensor->is_dynamic())
 182       return true;
 183   }
 184   return false;
 185 }
 186
 187 } // namespace exec
 188 } // namespace onert