runtime/onert/core/src/exec/ExecutorBase.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "ExecutorBase.h"
  18
  19 #include "backend/ITensor.h"
  20 #include "backend/controlflow/UserTensor.h"
  21 #include "backend/cpu_common/Tensor.h"
  22 #include "util/logging.h"
  23
  24 namespace onert
  25 {
  26 namespace exec
  27 {
  28
  29 ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
  30                            const std::vector<backend::ITensor *> &input_tensors,
  31                            const std::vector<backend::ITensor *> &output_tensors,
  32                            const compiler::TensorRegistries &tensor_regs)
  33     : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
  34       _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
  35 {
  36   // TODO Fix the way of knowing whether it is primary or not
  37   bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
  38   if (!primary_executor)
  39   {
  40     auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
  41       std::vector<backend::ITensor *> list;
  42       for (auto ind : ind_seq)
  43       {
  44         backend::ITensor *tensor = tensor_regs.getITensor(ind);
  45         assert(tensor != nullptr);
  46         list.push_back(tensor);
  47       }
  48       return list;
  49     };
  50     auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
  51       std::vector<backend::ITensor *> list;
  52       for (auto ind : ind_seq)
  53       {
  54         backend::ITensor *tensor = tensor_regs.getITensor(ind);
  55         assert(tensor != nullptr);
  56         list.push_back(tensor);
  57       }
  58       return list;
  59     };
  60     _input_tensors = build_input_tensor_list(_graph.getInputs());
  61     _output_tensors = build_output_tensor_list(_graph.getOutputs());
  62   }
  63 }
  64
  65 void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors,
  66                            const std::shared_ptr<IPermuteFunction> &pre_fn)
  67 {
  68   // For thread-safe, use mutex
  69   // TODO: if all used backends on this executor are thread-safe,
  70   //       do not need to use mutex (otherwise, use mutex)
  71   // Deadlock occurs when an Executor is called recursively.
  72   std::lock_guard<std::mutex> lock(_mutex);
  73
  74   assert(src_tensors.size() == _graph.getInputs().size());
  75   assert(src_tensors.size() == _input_tensors.size());
  76   for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
  77   {
  78     // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
  79     // This code find the info to allocate dynamic tensor, and allocate memory based on the source
  80     // tensor's shape set by caller.
  81     const auto src_tensor = src_tensors[n];
  82     auto input_tensor = _input_tensors[n];
  83     // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
  84     if (src_tensor != nullptr && input_tensor != nullptr)
  85     {
  86       const auto orig_input_shape = input_tensor->getShape();
  87       const auto changed_input_shape =
  88           convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
  89       if (orig_input_shape != changed_input_shape)
  90       {
  91         input_tensor->set_dynamic();
  92       }
  93     }
  94   }
  95
  96   // TODO Move calling permute_fn.run() into executeImpl()
  97   assert(pre_fn);
  98   pre_fn->run();
  99
 100   executeImpl();
 101 }
 102
 103 void ExecutorBase::execute(const IODescription &desc)
 104 {
 105   // For thread-safe, use mutex
 106   // TODO: if all used backends on this executor are thread-safe,
 107   //       do not need to use mutex (otherwise, use mutex)
 108   std::lock_guard<std::mutex> lock(_mutex);
 109
 110   // Set input(s)
 111   assert(_input_tensors.size() == desc.inputs.size());
 112   for (uint32_t i = 0; i < _input_tensors.size(); ++i)
 113   {
 114     // TODO Remove dynamic_cast
 115     auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_input_tensors[i]);
 116     assert(tensor);
 117     auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
 118     if (input_shape != desc.dynamic_input_shapes.end())
 119     {
 120       tensor->set_dynamic();
 121       tensor->setShape(input_shape->second);
 122     }
 123     // TODO Check if (desc.inputs[i] == nullptr)
 124     // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
 125     tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
 126                       desc.inputs[i]->size);
 127
 128     handleDynamicInputTensor(ir::IOIndex{i}, desc);
 129   }
 130
 131   assert(_output_tensors.size() == desc.outputs.size());
 132   for (uint32_t i = 0; i < _output_tensors.size(); ++i)
 133   {
 134     // TODO Remove dynamic_cast
 135     auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_output_tensors[i]);
 136     assert(tensor);
 137     tensor->set_dynamic(); // It can't be resized but shape could change
 138     if (desc.outputs[i] == nullptr)
 139       throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
 140     tensor->setBuffer(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
 141   }
 142
 143   executeImpl();
 144
 145   // Update output(s) desc
 146   for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
 147   {
 148     ir::IOIndex output_index{n};
 149     // Optional output
 150     if (desc.outputs.at(n) == nullptr)
 151     {
 152       continue;
 153     }
 154     auto &output = *desc.outputs.at(n);
 155
 156     // set shape of outputDesc to tensor shape since tensor can be dynamic
 157     const auto output_tensor_shape = _output_tensors[n]->getShape();
 158     output.info.shape(
 159         convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
 160   }
 161 }
 162
 163 /**
 164  * @brief Changes tensor shape and allocate memory
 165  *        if input shape was changed by nnfw_set_input_tensorinfo()
 166  *
 167  * @note  Cases are:
 168  *        1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 169  *                                                        (a)          (b)
 170  *
 171  *           at (a), operand is static, tensor is static - memory dealloc is not needed
 172  *                   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
 173  *           at (b), operand is static, tensor is dynamic - memory dealloc is needed
 174  *
 175  *        2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 176  *                                                         (a)          (b)
 177  *
 178  *           at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
 179  *                                                           since it has not been allocated yet
 180  *           at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
 181  */
 182 void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
 183 {
 184   auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
 185   if (shape_sig_found != desc.dynamic_input_shapes.end())
 186   {
 187     auto changed_input_shape = shape_sig_found->second;
 188     _input_tensors[io_ind.value()]->applyShape(changed_input_shape);
 189   }
 190 }
 191
 192 bool ExecutorBase::hasDynamicInput()
 193 {
 194   for (auto &tensor : _input_tensors)
 195   {
 196     if (tensor->is_dynamic())
 197       return true;
 198   }
 199   return false;
 200 }
 201
 202 } // namespace exec
 203 } // namespace onert