runtime/onert/core/src/exec/ExecutorBase.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "ExecutorBase.h"
  18
  19 #include "backend/ITensor.h"
  20 #include "backend/controlflow/UserTensor.h"
  21 #include "backend/cpu_common/Tensor.h"
  22 #include "util/logging.h"
  23
  24 namespace onert
  25 {
  26 namespace exec
  27 {
  28
  29 ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
  30                            const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
  31                            const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
  32                            const compiler::TensorRegistries &tensor_regs,
  33                            backend::TensorManagerSet &&tensor_mgrs)
  34     : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
  35       _input_tensors{input_tensors}, _output_tensors{output_tensors},
  36       _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
  37 {
  38   // TODO Fix the way of knowing whether it is primary or not
  39   bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
  40   if (!primary_executor)
  41   {
  42     auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
  43       std::vector<std::shared_ptr<backend::ITensor>> list;
  44       for (auto ind : ind_seq)
  45       {
  46         std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
  47         assert(tensor != nullptr);
  48         DynAllocInfo dyn_alloc_info{ind};
  49         _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
  50         list.push_back(tensor);
  51       }
  52       return list;
  53     };
  54     auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
  55       std::vector<std::shared_ptr<backend::ITensor>> list;
  56       for (auto ind : ind_seq)
  57       {
  58         std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
  59         assert(tensor != nullptr);
  60         DynAllocInfo dyn_alloc_info{ind};
  61         _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
  62         list.push_back(tensor);
  63       }
  64       return list;
  65     };
  66     _input_tensors = build_input_tensor_list(_graph.getInputs());
  67     _output_tensors = build_output_tensor_list(_graph.getOutputs());
  68   }
  69   else
  70   {
  71     assert(input_tensors.size() == _graph.getInputs().size());
  72     assert(output_tensors.size() == _graph.getOutputs().size());
  73     for (uint32_t i = 0; i < input_tensors.size(); i++)
  74     {
  75       auto tensor = input_tensors[i];
  76       auto ind = _graph.getInputs().at(i);
  77       DynAllocInfo dyn_alloc_info{ind};
  78       _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
  79     }
  80     for (uint32_t i = 0; i < output_tensors.size(); i++)
  81     {
  82       auto tensor = output_tensors[i];
  83       auto ind = _graph.getOutputs().at(i);
  84       DynAllocInfo dyn_alloc_info{ind};
  85       _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
  86     }
  87   }
  88 }
  89
  90 void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
  91                            const std::shared_ptr<IPermuteFunction> &pre_fn)
  92 {
  93   // For thread-safe, use mutex
  94   // TODO: if all used backends on this executor are thread-safe,
  95   //       do not need to use mutex (otherwise, use mutex)
  96   // Deadlock occurs when an Executor is called recursively.
  97   std::lock_guard<std::mutex> lock(_mutex);
  98
  99   assert(src_tensors.size() == _graph.getInputs().size());
 100   assert(src_tensors.size() == _input_tensors.size());
 101   for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
 102   {
 103     // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
 104     // This code find the info to allocate dynamic tensor, and allocate memory based on the source
 105     // tensor's shape set by caller.
 106     const auto src_tensor = src_tensors[n];
 107     auto input_tensor = _input_tensors[n];
 108     // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
 109     if (src_tensor != nullptr && input_tensor != nullptr)
 110     {
 111       auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[n]);
 112       const auto orig_input_shape = input_tensor->getShape();
 113       const auto changed_input_shape =
 114           convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
 115       if (orig_input_shape != changed_input_shape)
 116       {
 117         if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
 118         {
 119           // The input_tensor is a dynamic tensor of backend that doesn't support dynamic tensor
 120           throw std::runtime_error("Unknown dim is found at execution time for a backend that "
 121                                    "does not support dynamic tensor");
 122         }
 123         else
 124         {
 125           input_tensor->set_dynamic();
 126         }
 127       }
 128     }
 129   }
 130
 131   // TODO Move calling permute_fn.run() into executeImpl()
 132   assert(pre_fn);
 133   pre_fn->run();
 134
 135   executeImpl();
 136 }
 137
 138 void ExecutorBase::execute(const IODescription &desc)
 139 {
 140   // For thread-safe, use mutex
 141   // TODO: if all used backends on this executor are thread-safe,
 142   //       do not need to use mutex (otherwise, use mutex)
 143   std::lock_guard<std::mutex> lock(_mutex);
 144
 145   // Set input(s)
 146   assert(_input_tensors.size() == desc.inputs.size());
 147   for (uint32_t i = 0; i < _input_tensors.size(); ++i)
 148   {
 149     // TODO Remove dynamic_cast
 150     auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
 151     assert(tensor);
 152     auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
 153     if (input_shape != desc.dynamic_input_shapes.end())
 154     {
 155       tensor->set_dynamic();
 156       tensor->setShape(input_shape->second);
 157     }
 158     // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
 159     tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
 160                       desc.inputs[i]->size);
 161
 162     handleDynamicInputTensor(ir::IOIndex{i}, desc);
 163   }
 164
 165   assert(_output_tensors.size() == desc.outputs.size());
 166   for (uint32_t i = 0; i < _output_tensors.size(); ++i)
 167   {
 168     // TODO Remove dynamic_cast
 169     auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_output_tensors[i]);
 170     assert(tensor);
 171     tensor->set_dynamic(); // It can't be resized but shape could change
 172     // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
 173     tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.outputs[i]->buffer)),
 174                       desc.outputs[i]->size);
 175   }
 176
 177   executeImpl();
 178
 179   // Update output(s) desc
 180   for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
 181   {
 182     ir::IOIndex output_index{n};
 183     // Optional output
 184     if (desc.outputs.at(n) == nullptr)
 185     {
 186       continue;
 187     }
 188     auto &output = *desc.outputs.at(n);
 189
 190     // set shape of outputDesc to tensor shape since tensor can be dynamic
 191     const auto output_tensor_shape = _output_tensors[n]->getShape();
 192     output.info.shape(
 193         convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
 194   }
 195 }
 196
 197 /**
 198  * @brief Changes tensor shape and allocate memory
 199  *        if input shape was changed by nnfw_set_input_tensorinfo()
 200  *
 201  * @note  Cases are:
 202  *        1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 203  *                                                        (a)          (b)
 204  *
 205  *           at (a), operand is static, tensor is static - memory dealloc is not needed
 206  *                   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
 207  *           at (b), operand is static, tensor is dynamic - memory dealloc is needed
 208  *
 209  *        2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 210  *                                                         (a)          (b)
 211  *
 212  *           at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
 213  *                                                           since it has not been allocated yet
 214  *           at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
 215  */
 216 void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
 217 {
 218   auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
 219   if (shape_sig_found != desc.dynamic_input_shapes.end())
 220   {
 221     auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
 222     if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
 223       throw std::runtime_error("Unknown dim is found at execution time for a backend that "
 224                                "does not support dynamic tensor");
 225
 226     auto changed_input_shape = shape_sig_found->second;
 227     auto operand_ind = dyn_alloc_info->second.ind;
 228
 229     auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
 230     assert(dyn_tensor_manager);
 231     dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
 232   }
 233 }
 234
 235 bool ExecutorBase::hasDynamicInput()
 236 {
 237   for (auto &tensor : _input_tensors)
 238   {
 239     if (tensor->is_dynamic())
 240       return true;
 241   }
 242   return false;
 243 }
 244
 245 } // namespace exec
 246 } // namespace onert