runtime/onert/core/src/exec/ExecutorBase.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "ExecutorBase.h"
  18
  19 #include "backend/ITensor.h"
  20 #include "backend/controlflow/UserTensor.h"
  21 #include "backend/cpu_common/Tensor.h"
  22 #include "util/logging.h"
  23
  24 namespace onert
  25 {
  26 namespace exec
  27 {
  28
  29 ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
  30                            const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
  31                            const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
  32                            const compiler::TensorBuilders &tensor_builders)
  33     : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
  34       _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
  35 {
  36   // TODO Fix the way of knowing whether it is primary or not
  37   bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
  38   if (!primary_executor)
  39   {
  40     auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
  41       std::vector<std::shared_ptr<backend::ITensor>> list;
  42       for (auto ind : ind_seq)
  43       {
  44         std::shared_ptr<backend::ITensor> tensor;
  45         for (auto &tensor_builder : tensor_builders)
  46         {
  47           auto tensor_registry = tensor_builder->tensorRegistry();
  48           assert(tensor_registry);
  49           tensor = tensor_registry->getNativeITensor(ind);
  50           if (tensor != nullptr)
  51           {
  52             if (tensor_builder->supportDynamicTensor())
  53             {
  54               DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
  55               _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
  56             }
  57             break;
  58           }
  59         }
  60         assert(tensor != nullptr);
  61         list.push_back(tensor);
  62       }
  63       return list;
  64     };
  65     auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
  66       std::vector<std::shared_ptr<backend::ITensor>> list;
  67       for (auto ind : ind_seq)
  68       {
  69         std::shared_ptr<backend::ITensor> tensor;
  70         for (auto &tensor_builder : tensor_builders)
  71         {
  72           auto tensor_registry = tensor_builder->tensorRegistry();
  73           assert(tensor_registry);
  74           tensor = tensor_registry->getNativeITensor(ind);
  75           if (tensor != nullptr)
  76           {
  77             if (tensor_builder->supportDynamicTensor())
  78             {
  79               DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
  80               _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
  81             }
  82             break;
  83           }
  84         }
  85         assert(tensor != nullptr);
  86         list.push_back(tensor);
  87       }
  88       return list;
  89     };
  90     _input_tensors = build_input_tensor_list(_graph.getInputs());
  91     _output_tensors = build_output_tensor_list(_graph.getOutputs());
  92   }
  93   else
  94   {
  95     // If primary graph, all the inputs and outputs belong to controlflow backend
  96     auto cf_dyn_tensor_builder = tensor_builders.getControlflowTensorBuilder();
  97     assert(cf_dyn_tensor_builder);
  98
  99     assert(input_tensors.size() == _graph.getInputs().size());
 100     assert(output_tensors.size() == _graph.getOutputs().size());
 101     for (uint32_t i = 0; i < input_tensors.size(); i++)
 102     {
 103       auto tensor = input_tensors[i];
 104       auto ind = _graph.getInputs().at(i);
 105       DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
 106       _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
 107     }
 108     for (uint32_t i = 0; i < output_tensors.size(); i++)
 109     {
 110       auto tensor = output_tensors[i];
 111       auto ind = _graph.getOutputs().at(i);
 112       DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
 113       _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
 114     }
 115   }
 116
 117   // Prepare each TensorManager on each backend
 118   for (auto &tensor_builder : tensor_builders)
 119   {
 120     auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
 121     if (s_tensor_manager != nullptr)
 122       _tensor_mgrs.insert(std::move(s_tensor_manager));
 123
 124     if (tensor_builder->supportDynamicTensor())
 125     {
 126       auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
 127       if (d_tensor_manager != nullptr)
 128         _tensor_mgrs.insert(std::move(d_tensor_manager));
 129     }
 130   }
 131 }
 132
 133 void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
 134                            const std::shared_ptr<IPermuteFunction> &pre_fn)
 135 {
 136   // For thread-safe, use mutex
 137   // TODO: if all used backends on this executor are thread-safe,
 138   //       do not need to use mutex (otherwise, use mutex)
 139   // Deadlock occurs when an Executor is called recursively.
 140   std::lock_guard<std::mutex> lock(_mutex);
 141
 142   assert(src_tensors.size() == _graph.getInputs().size());
 143   assert(src_tensors.size() == _input_tensors.size());
 144   for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
 145   {
 146     // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
 147     // This code find the info to allocate dynamic tensor, and allocate memory based on the source
 148     // tensor's shape set by caller.
 149     const auto src_tensor = src_tensors[n];
 150     auto input_tensor = _input_tensors[n];
 151     // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
 152     if (src_tensor != nullptr && input_tensor != nullptr)
 153     {
 154       auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[n]);
 155       const auto orig_input_shape = input_tensor->getShape();
 156       const auto changed_input_shape =
 157           convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
 158       if (orig_input_shape != changed_input_shape)
 159       {
 160         if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
 161         {
 162           // The input_tensor is a dynamic tensor of backend that doesn't support dynamic tensor
 163           throw std::runtime_error("Unknown dim is found at execution time for a backend that "
 164                                    "does not support dynamic tensor");
 165         }
 166         else
 167         {
 168           input_tensor->set_dynamic();
 169         }
 170       }
 171     }
 172   }
 173
 174   // TODO Move calling permute_fn.run() into executeImpl()
 175   assert(pre_fn);
 176   pre_fn->run();
 177
 178   executeImpl();
 179 }
 180
 181 void ExecutorBase::execute(const IODescription &desc)
 182 {
 183   // For thread-safe, use mutex
 184   // TODO: if all used backends on this executor are thread-safe,
 185   //       do not need to use mutex (otherwise, use mutex)
 186   std::lock_guard<std::mutex> lock(_mutex);
 187
 188   // Set input(s)
 189   assert(_input_tensors.size() == desc.inputs.size());
 190   for (uint32_t i = 0; i < _input_tensors.size(); ++i)
 191   {
 192     // TODO Remove dynamic_cast
 193     auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
 194     assert(tensor);
 195     auto input_shape = desc.input_shape_signature.find(ir::IOIndex{i});
 196     if (input_shape != desc.input_shape_signature.end())
 197     {
 198       tensor->set_dynamic();
 199       tensor->setShape(input_shape->second);
 200     }
 201     // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
 202     tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
 203                       desc.inputs[i]->size);
 204
 205     handleDynamicInputTensor(ir::IOIndex{i}, desc);
 206   }
 207
 208   assert(_output_tensors.size() == desc.outputs.size());
 209   for (uint32_t i = 0; i < _output_tensors.size(); ++i)
 210   {
 211     // TODO Remove dynamic_cast
 212     auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_output_tensors[i]);
 213     assert(tensor);
 214     tensor->set_dynamic(); // It can't be resized but shape could change
 215     // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
 216     tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.outputs[i]->buffer)),
 217                       desc.outputs[i]->size);
 218   }
 219
 220   executeImpl();
 221
 222   // Update output(s) desc
 223   for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
 224   {
 225     ir::IOIndex output_index{n};
 226     // Optional output
 227     if (desc.outputs.at(n) == nullptr)
 228     {
 229       continue;
 230     }
 231     auto &output = *desc.outputs.at(n);
 232
 233     // set shape of outputDesc to tensor shape since tensor can be dynamic
 234     const auto output_tensor_shape = _output_tensors[n]->getShape();
 235     output.info.shape(
 236         convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
 237   }
 238 }
 239
 240 /**
 241  * @brief Changes tensor shape and allocate memory
 242  *        if input shape was changed by nnfw_set_input_tensorinfo()
 243  *
 244  * @note  Cases are:
 245  *        1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 246  *                                                        (a)          (b)
 247  *
 248  *           at (a), operand is static, tensor is static - memory dealloc is not needed
 249  *                   (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
 250  *           at (b), operand is static, tensor is dynamic - memory dealloc is needed
 251  *
 252  *        2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
 253  *                                                         (a)          (b)
 254  *
 255  *           at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
 256  *                                                           since it has not been allocated yet
 257  *           at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
 258  */
 259 void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
 260 {
 261   auto shape_sig_found = desc.input_shape_signature.find(io_ind);
 262   if (shape_sig_found != desc.input_shape_signature.end())
 263   {
 264     auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
 265     if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
 266       throw std::runtime_error("Unknown dim is found at execution time for a backend that "
 267                                "does not support dynamic tensor");
 268
 269     auto changed_input_shape = shape_sig_found->second;
 270     auto operand_ind = dyn_alloc_info->second.ind;
 271
 272     dyn_alloc_info->second.dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
 273   }
 274 }
 275
 276 bool ExecutorBase::hasDynamicInput()
 277 {
 278   for (auto &tensor : _input_tensors)
 279   {
 280     if (tensor->is_dynamic())
 281       return true;
 282   }
 283   return false;
 284 }
 285
 286 } // namespace exec
 287 } // namespace onert