runtime/onert/core/src/exec/ExecutorBase.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "ExecutorBase.h"
  18 #include "ShapeConverter.h"
  19
  20 #include "backend/controlflow/UserTensor.h"
  21 #include "util/logging.h"
  22 #include "misc/polymorphic_downcast.h"
  23
  24 namespace onert
  25 {
  26 namespace exec
  27 {
  28
  29 ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
  30                            const compiler::TensorRegistries &tensor_regs,
  31                            const util::TracingCtx *tracing_ctx)
  32     : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex(),
  33       _tracing_ctx(tracing_ctx)
  34 {
  35   auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
  36     assert(tensors.empty());
  37     for (auto ind : ind_seq)
  38     {
  39       backend::ITensor *tensor = tensor_regs.getITensor(ind);
  40       assert(tensor != nullptr);
  41       auto io_tensor = nnfw::misc::polymorphic_downcast<backend::controlflow::IOTensor *>(tensor);
  42       tensors.push_back(io_tensor);
  43     }
  44   };
  45   build_tensor_list(_graph.getInputs(), _input_tensors);
  46   build_tensor_list(_graph.getOutputs(), _output_tensors);
  47 }
  48
  49 void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs,
  50                            const std::vector<backend::IPortableTensor *> &outputs)
  51 {
  52   // For thread-safe, use mutex
  53   // TODO: if all used backends on this executor are thread-safe,
  54   //       do not need to use mutex (otherwise, use mutex)
  55   // Deadlock occurs when an Executor is called recursively.
  56   std::lock_guard<std::mutex> lock(_mutex);
  57
  58   assert(inputs.size() == _graph.getInputs().size());
  59   assert(inputs.size() == _input_tensors.size());
  60   for (uint32_t n = 0; n < inputs.size(); ++n)
  61   {
  62     const auto input = inputs[n];
  63     assert(input->buffer() != nullptr);
  64     auto input_tensor = _input_tensors[n];
  65     assert(input_tensor != nullptr);
  66     if (input != nullptr)
  67     {
  68       const auto orig_input_shape = input_tensor->orig_info().shape();
  69       const auto changed_input_shape =
  70           convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
  71       if (orig_input_shape != changed_input_shape)
  72       {
  73         input_tensor->set_dynamic();
  74       }
  75     }
  76     input_tensor->setTensor(input);
  77   }
  78
  79   assert(outputs.size() == _graph.getOutputs().size());
  80   assert(outputs.size() == _output_tensors.size());
  81   for (uint32_t n = 0; n < outputs.size(); ++n)
  82   {
  83     const auto output = outputs[n];
  84     // assert(dst_tensor->buffer() != nullptr);
  85     auto output_tensor = _output_tensors[n];
  86     assert(output_tensor != nullptr);
  87     output_tensor->setTensor(output);
  88   }
  89
  90   executeImpl();
  91 }
  92
  93 void ExecutorBase::execute(const IODescription &desc)
  94 {
  95   // For thread-safe, use mutex
  96   // TODO: if all used backends on this executor are thread-safe,
  97   //       do not need to use mutex (otherwise, use mutex)
  98   std::lock_guard<std::mutex> lock(_mutex);
  99
 100   // Set input(s)
 101   assert(_input_tensors.size() == desc.inputs.size());
 102   for (uint32_t i = 0; i < _input_tensors.size(); ++i)
 103   {
 104     auto tensor = _input_tensors[i];
 105
 106     // TODO Check if (desc.inputs[i] == nullptr)
 107     // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
 108     tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
 109                           desc.inputs[i]->size);
 110
 111     auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
 112     if (input_shape != desc.dynamic_input_shapes.end())
 113     {
 114       tensor->set_dynamic();
 115       tensor->setShape(input_shape->second);
 116     }
 117
 118     handleDynamicInputTensor(ir::IOIndex{i}, desc);
 119   }
 120
 121   assert(_output_tensors.size() == desc.outputs.size());
 122   for (uint32_t i = 0; i < _output_tensors.size(); ++i)
 123   {
 124     auto tensor = _output_tensors[i];
 125
 126     if (desc.outputs[i] == nullptr)
 127       throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
 128     tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
 129     tensor->set_dynamic(); // It can't be resized but shape could change
 130   }
 131
 132   executeImpl();
 133
 134   // Update output(s) desc
 135   for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
 136   {
 137     ir::IOIndex output_index{n};
 138     // Optional output
 139     if (desc.outputs.at(n) == nullptr)
 140     {
 141       continue;
 142     }
 143     auto &output = *desc.outputs.at(n);
 144
 145     // set shape of outputDesc to tensor shape since tensor can be dynamic
 146     const auto output_tensor_shape = _output_tensors[n]->getShape();
 147     output.info.shape(
 148         convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
 149   }
 150 }
 151
 152 /**
 153  * @brief Changes tensor shape and allocate memory
 154  *        if input shape was changed by nnfw_set_input_tensorinfo()
 155  *
 156  * @note  Cases are:
 157  *        1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 158  *                                                        (a)          (b)
 159  *
 160  *           at (a), operand is static, tensor is static - memory dealloc is not needed
 161  *                   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
 162  *           at (b), operand is static, tensor is dynamic - memory dealloc is needed
 163  *
 164  *        2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 165  *                                                         (a)          (b)
 166  *
 167  *           at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
 168  *                                                           since it has not been allocated yet
 169  *           at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
 170  */
 171 void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
 172 {
 173   auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
 174   if (shape_sig_found != desc.dynamic_input_shapes.end())
 175   {
 176     auto changed_input_shape = shape_sig_found->second;
 177     _input_tensors[io_ind.value()]->applyShape(changed_input_shape);
 178   }
 179 }
 180
 181 bool ExecutorBase::hasDynamicInput()
 182 {
 183   for (auto &tensor : _input_tensors)
 184   {
 185     if (tensor->is_dynamic())
 186       return true;
 187   }
 188   return false;
 189 }
 190
 191 } // namespace exec
 192 } // namespace onert