runtime/onert/core/src/exec/Executors.cc

   1 /*
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "Executors.h"
  18
  19 #include "../backend/builtin/IOTensor.h"
  20
  21 namespace
  22 {
  23
  24 using namespace onert;
  25
  26 int32_t find_input_index(const std::vector<ir::IODesc> &pkg_inputs,
  27                          const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
  28                          const ir::IOIndex &io_index)
  29 {
  30   for (size_t i = 0; i < pkg_inputs.size(); i++)
  31   {
  32     auto &input_desc = pkg_inputs[i];
  33     if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
  34         (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
  35         (std::get<ir::IOIndex>(input_desc) == io_index))
  36       return static_cast<int32_t>(i);
  37   }
  38   return -1;
  39 }
  40
  41 int32_t find_output_index(const std::vector<ir::IODesc> &pkg_outputs,
  42                           const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
  43                           const ir::IOIndex &io_index)
  44 {
  45   for (size_t i = 0; i < pkg_outputs.size(); i++)
  46   {
  47     auto &input_desc = pkg_outputs[i];
  48     if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
  49         (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
  50         (std::get<ir::IOIndex>(input_desc) == io_index))
  51       return static_cast<int32_t>(i);
  52   }
  53   return -1;
  54 }
  55
  56 } // namespace
  57
  58 namespace onert
  59 {
  60 namespace exec
  61 {
  62
  63 class Executors::EdgeTensor : public backend::builtin::IOTensor
  64 {
  65 public:
  66   EdgeTensor(const ir::OperandInfo &info, ir::Layout layout)
  67     : backend::builtin::IOTensor(info, layout), _buffer{nullptr}, _ref_count{0}
  68   {
  69   }
  70   ~EdgeTensor() = default;
  71
  72   void allocate_buffer()
  73   {
  74     const auto total_size = orig_info().total_size();
  75     _buffer = std::make_unique<uint8_t[]>(total_size);
  76     _ref_count = 1;
  77
  78     // NOTE Executor's inputs/outputs are always IPortableTensor. If backend of inputs/outputs
  79     //      is using tensor that does not inherit IPortableTensor, Permute operation is added
  80     //      and all inputs/outputs become IPortableTensor at compile stage.
  81     //      This allows user's buffers to be set to inputs/outputs of executors.
  82     setUserTensor(_buffer.get(), total_size);
  83   }
  84
  85   void increase_ref() { _ref_count++; }
  86
  87   void decrease_ref()
  88   {
  89     assert(_ref_count > 0);
  90     _ref_count--;
  91     if (_ref_count == 0)
  92     {
  93       _buffer.reset();
  94       setUserTensor(nullptr, orig_info().total_size());
  95     }
  96   }
  97
  98 private:
  99   std::unique_ptr<uint8_t[]> _buffer;
 100   int32_t _ref_count;
 101 };
 102
 103 void Executors::emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
 104                         std::unique_ptr<IExecutor> exec)
 105 {
 106   _executors.emplace(std::make_pair(model_index, subg_index), std::move(exec));
 107 }
 108
 109 IExecutor *Executors::at(const ir::ModelIndex &model_index,
 110                          const ir::SubgraphIndex &subg_index) const
 111 {
 112   return _executors.at(std::make_pair(model_index, subg_index)).get();
 113 }
 114
 115 uint32_t Executors::inputSize() const { return _model_edges->pkg_inputs.size(); }
 116
 117 uint32_t Executors::outputSize() const { return _model_edges->pkg_outputs.size(); }
 118
 119 const ir::OperandInfo &Executors::inputInfo(const ir::IOIndex &index) const
 120 {
 121   auto const desc = _model_edges->pkg_inputs[index.value()];
 122   auto const model_index = std::get<0>(desc);
 123   auto const subg_index = std::get<1>(desc);
 124   auto const io_index = std::get<2>(desc);
 125   auto const executor = at(model_index, subg_index);
 126   return executor->getInputTensors().at(io_index.value())->orig_info();
 127 }
 128
 129 const ir::OperandInfo &Executors::outputInfo(const ir::IOIndex &index) const
 130 {
 131   auto const desc = _model_edges->pkg_outputs[index.value()];
 132   auto const model_index = std::get<0>(desc);
 133   auto const subg_index = std::get<1>(desc);
 134   auto const io_index = std::get<2>(desc);
 135   auto const executor = at(model_index, subg_index);
 136   return executor->getOutputTensors().at(io_index.value())->orig_info();
 137 }
 138
 139 // Allow below edges only
 140 //  m1 < m2, s1 == 0 and s2 == 0 if m1:s1:o1 -> m2:s2:o2'
 141 void Executors::checkSupportedMultimodel() const
 142 {
 143   // If package includes no-connection model, model_count is less than real model count in package.
 144   // Then this method will throw exception based on model index
 145   //  1st model: input assumption
 146   //  Otherwise: edges assumption
 147
 148   // Assumption: edges
 149   // m1 < m2, s1 == 0 and s2 == 0 if edge 'm1:s1:o1 -> m2:s2:o2'
 150   for (auto &&edge : _model_edges->edges)
 151   {
 152     auto const model_from = std::get<ir::ModelIndex>(edge.from);
 153     auto const model_to = std::get<ir::ModelIndex>(edge.to);
 154     auto const subg_from = std::get<ir::SubgraphIndex>(edge.from);
 155     auto const subg_to = std::get<ir::SubgraphIndex>(edge.to);
 156
 157     if (model_from.value() == model_to.value())
 158     {
 159       throw std::runtime_error{"Multi model's edge set has invalid edge"};
 160     }
 161
 162     if ((model_from.value() > model_to.value()) || (subg_from != ir::SubgraphIndex{0}) ||
 163         (subg_to != ir::SubgraphIndex{0}))
 164       throw std::runtime_error{"NYI: Multi model execution for this edge set is not supported yet"};
 165   }
 166
 167   // Assumption: package inputs
 168   //  All 1st model inputs come from package input if always m1 < m2
 169   {
 170     auto first_executor = at(ir::ModelIndex{0}, ir::SubgraphIndex{0});
 171     auto search_first_model = [&](const ir::IOIndex &input_index) {
 172       for (const auto &input : _model_edges->pkg_inputs)
 173       {
 174         if ((std::get<ir::ModelIndex>(input) == ir::ModelIndex{0}) ||
 175             (std::get<ir::SubgraphIndex>(input) == ir::SubgraphIndex{0}) ||
 176             (std::get<ir::IOIndex>(input) == input_index))
 177           return true;
 178       }
 179
 180       return false;
 181     };
 182
 183     for (uint32_t i = 0; i < first_executor->getInputTensors().size(); i++)
 184     {
 185       if (!search_first_model(ir::IOIndex{i}))
 186         throw std::runtime_error{"Cannot find 1st model's input buffer"};
 187     }
 188   }
 189
 190   // Check whether nnpkg outputs and Edge `from` are duplicated
 191   for (const auto &edge : _model_edges->edges)
 192   {
 193     if (std::find(_model_edges->pkg_outputs.begin(), _model_edges->pkg_outputs.end(), edge.from) !=
 194         _model_edges->pkg_outputs.end())
 195     {
 196       throw std::runtime_error{"Multi model execution does not support duplicating nnpkg outputs "
 197                                "with `from` of edges yet"};
 198     }
 199   }
 200 }
 201
 202 void Executors::createEdgeQuantLayers()
 203 {
 204   if (_is_created_edge_quant_layers)
 205   {
 206     return;
 207   }
 208
 209   // Create EdgeTensor for edges between executors
 210   for (const auto &pair : _edge_map)
 211   {
 212     const auto &from_iodesc = pair.first;
 213     const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
 214     const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
 215     const auto &from_io_index = std::get<ir::IOIndex>(from_iodesc);
 216
 217     const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
 218     const auto from_tensor = from_executor->getOutputTensors().at(from_io_index.value());
 219
 220     const auto &from_info = from_tensor->orig_info();
 221     const auto from_layout = from_tensor->orig_layout();
 222     _edge_tensors[from_iodesc] = std::make_unique<EdgeTensor>(from_info, from_layout);
 223   }
 224
 225   // Append type-aware quantization layer for edges between executors
 226   for (const auto &executor_pair : _executors)
 227   {
 228     const auto &executor_index = executor_pair.first;
 229     const auto &model_index = executor_index.first;
 230     const auto &subg_index = executor_index.second;
 231
 232     std::vector<backend::ITensor *> inputs;
 233     std::vector<backend::ITensor *> outputs;
 234     for (const auto &pair : _edge_map)
 235     {
 236       const auto &from_iodesc = pair.first;
 237       if (std::get<ir::ModelIndex>(from_iodesc) == model_index &&
 238           std::get<ir::SubgraphIndex>(from_iodesc) == subg_index)
 239       {
 240         const auto from_tensor = _edge_tensors[from_iodesc].get();
 241         const auto &to_list = pair.second;
 242
 243         for (const auto &to_iodesc : to_list)
 244         {
 245           const auto &to_model_index = std::get<ir::ModelIndex>(to_iodesc);
 246           const auto &to_subg_index = std::get<ir::SubgraphIndex>(to_iodesc);
 247           const auto &to_io_index = std::get<ir::IOIndex>(to_iodesc);
 248
 249           const auto to_executor = _executors.at({to_model_index, to_subg_index}).get();
 250           const auto to_tensor = to_executor->getInputTensors().at(to_io_index.value());
 251
 252           // TODO Unify tensors with the same `from` tensor and same type
 253           if (from_tensor->data_type() != to_tensor->data_type())
 254           {
 255             assert(inputs.size() == outputs.size());
 256             const auto &to_info =
 257               to_executor->getInputTensors().at(to_io_index.value())->orig_info();
 258             const auto to_layout = to_tensor->orig_layout();
 259             inputs.emplace_back(from_tensor);
 260
 261             auto type_aware_quant_tensor = std::make_unique<EdgeTensor>(to_info, to_layout);
 262             outputs.emplace_back(type_aware_quant_tensor.get());
 263
 264             _edge_quant_tensors[to_iodesc] = std::move(type_aware_quant_tensor);
 265           }
 266         }
 267       }
 268     }
 269
 270     auto layer = std::make_unique<PermuteLayer>(inputs, outputs);
 271     layer->prepare();
 272     _edge_quant_layers[{model_index, subg_index}] = std::move(layer);
 273   }
 274
 275   _is_created_edge_quant_layers = true;
 276 }
 277
 278 void Executors::CreatePkgIOTensors(const IODescription &desc)
 279 {
 280   for (const auto &pkg_input : _model_edges->pkg_inputs)
 281   {
 282     // Create IOTensor for nnpkg inputs
 283     const auto &model_index = std::get<ir::ModelIndex>(pkg_input);
 284     const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_input);
 285     const auto &io_index = std::get<ir::IOIndex>(pkg_input);
 286     const auto input_pkg_index =
 287       find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
 288     if (input_pkg_index == -1)
 289       throw std::runtime_error{"Cannot find multi model input index"};
 290     auto input_desc = desc.inputs[input_pkg_index].get();
 291     _pkg_input_tensors[pkg_input] =
 292       std::make_unique<backend::builtin::IOTensor>(input_desc->info, input_desc->layout);
 293   }
 294
 295   for (const auto &pkg_output : _model_edges->pkg_outputs)
 296   {
 297     // Create IOTensor for nnpkg outputs
 298     const auto &model_index = std::get<ir::ModelIndex>(pkg_output);
 299     const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_output);
 300     const auto &io_index = std::get<ir::IOIndex>(pkg_output);
 301     const auto output_pkg_index =
 302       find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
 303     if (output_pkg_index == -1)
 304       throw std::runtime_error{"Cannot find multi model output index"};
 305     auto output_desc = desc.outputs[output_pkg_index].get();
 306     _pkg_output_tensors[pkg_output] =
 307       std::make_unique<backend::builtin::IOTensor>(output_desc->info, output_desc->layout);
 308   }
 309 }
 310
 311 void Executors::createPkgIOQuantLayers(const IODescription &desc)
 312 {
 313   // Append type-aware quantization layer for nnpkg inputs/outputs between executors
 314   for (const auto &pair : _executors)
 315   {
 316     const auto &executor_index = pair.first;
 317     const auto &model_index = executor_index.first;
 318     const auto &subg_index = executor_index.second;
 319     const auto executor = pair.second.get();
 320
 321     // Find pkg inputs of current executor
 322     std::vector<ir::IODesc> pkg_inputs;
 323     for (const auto &pkg_input : _model_edges->pkg_inputs)
 324     {
 325       if (std::get<ir::ModelIndex>(pkg_input) == model_index &&
 326           std::get<ir::SubgraphIndex>(pkg_input) == subg_index)
 327       {
 328         pkg_inputs.emplace_back(pkg_input);
 329       }
 330     }
 331     std::vector<backend::ITensor *> src_tensors;
 332     std::vector<backend::ITensor *> dst_tensors;
 333     for (const auto &pkg_input : pkg_inputs)
 334     {
 335       const auto &io_index = std::get<ir::IOIndex>(pkg_input);
 336       const auto input_pkg_index =
 337         find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
 338       if (input_pkg_index == -1)
 339         throw std::runtime_error{"Cannot find multi model input index"};
 340       auto input_desc = desc.inputs[input_pkg_index].get();
 341
 342       // Create EdgeTensor for nnpkg input if type is different
 343       const auto input_tensor =
 344         executor->getInputTensors().at(std::get<ir::IOIndex>(pkg_input).value());
 345       const auto &orig_info = input_tensor->orig_info();
 346       if (input_desc->info.typeInfo().type() != input_tensor->orig_info().typeInfo().type())
 347       {
 348         const auto orig_layout = input_tensor->orig_layout();
 349         auto pkg_input_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
 350         _pkg_input_quant_tensors[pkg_input] = std::move(pkg_input_edge_tensor);
 351
 352         // Append type-aware quantization layer's inputs/outputs
 353         src_tensors.emplace_back(_pkg_input_tensors[pkg_input].get());
 354         dst_tensors.emplace_back(_pkg_input_quant_tensors[pkg_input].get());
 355       }
 356     }
 357
 358     // Create type-aware quantization layer for nnpkg inputs
 359     auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
 360     pkg_input_layer->prepare();
 361     _pkg_input_quant_layers[{model_index, subg_index}] = std::move(pkg_input_layer);
 362
 363     // Find pkg outputs of current executor
 364     std::vector<ir::IODesc> pkg_outputs;
 365     for (const auto &pkg_output : _model_edges->pkg_outputs)
 366     {
 367       if (std::get<ir::ModelIndex>(pkg_output) == model_index &&
 368           std::get<ir::SubgraphIndex>(pkg_output) == subg_index)
 369       {
 370         pkg_outputs.emplace_back(pkg_output);
 371       }
 372     }
 373     src_tensors.clear();
 374     dst_tensors.clear();
 375     // Create Tensors of nnpkg outputs for type-aware quantization
 376     for (const auto &pkg_output : pkg_outputs)
 377     {
 378       const auto &io_index = std::get<ir::IOIndex>(pkg_output);
 379       const auto output_pkg_index =
 380         find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
 381       if (output_pkg_index == -1)
 382         throw std::runtime_error{"Cannot find multi model output index"};
 383       auto output_desc = desc.outputs[output_pkg_index].get();
 384
 385       // Create EdgeTensor for nnpkg output if type is different
 386       const auto output_tensor =
 387         executor->getOutputTensors().at(std::get<ir::IOIndex>(pkg_output).value());
 388       const auto &orig_info = output_tensor->orig_info();
 389       if (output_desc->info.typeInfo().type() != output_tensor->orig_info().typeInfo().type())
 390       {
 391         const auto orig_layout = output_tensor->orig_layout();
 392         auto pkg_output_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
 393         _pkg_output_quant_tensors[pkg_output] = std::move(pkg_output_edge_tensor);
 394
 395         // Append type-aware quantization layer's inputs/outputs
 396         src_tensors.emplace_back(_pkg_output_quant_tensors[pkg_output].get());
 397         dst_tensors.emplace_back(_pkg_output_tensors[pkg_output].get());
 398       }
 399     }
 400
 401     // Create type-aware quantization layer for nnpkg outputs
 402     auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
 403     pkg_output_layer->prepare();
 404     _pkg_output_quant_layers[{model_index, subg_index}] = std::move(pkg_output_layer);
 405   }
 406 }
 407
 408 void Executors::execute(const IODescription &desc)
 409 {
 410   // Check supported multi model package
 411   checkSupportedMultimodel();
 412
 413   // TODO Move creating type-aware quantization layers for edges in compilation stage
 414   createEdgeQuantLayers();
 415
 416   // TODO Create IOTensors only once and recreate them only if nnpkg info changes
 417   CreatePkgIOTensors(desc);
 418
 419   // TODO Create type-aware quantization layers only once and recreate them only if type changes
 420   createPkgIOQuantLayers(desc);
 421
 422   // TODO Find better way to schedule order of executors
 423   auto const model_count = modelCount();
 424
 425   auto find_from = [&](const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
 426                        const ir::IOIndex &io_index) {
 427     for (const auto &edge : _model_edges->edges)
 428     {
 429       if ((std::get<ir::ModelIndex>(edge.to) == model_index) &&
 430           (std::get<ir::SubgraphIndex>(edge.to) == subg_index) &&
 431           (std::get<ir::IOIndex>(edge.to) == io_index))
 432         return edge.from;
 433     }
 434
 435     throw std::runtime_error{"Cannot find edge for model input"};
 436   };
 437
 438   // Execute each model
 439   // NOTE May be better to use vector instead of unordered_map for _executors
 440   for (auto model_index = ir::ModelIndex{0}; model_index.value() < model_count; model_index++)
 441   {
 442     // Find executor
 443     auto executor = at(model_index, ir::SubgraphIndex{0});
 444
 445     // Set IOTensors
 446     // TODO Set internal IOTensors only once
 447     std::vector<backend::IPortableTensor *> inputs_inter;
 448     std::vector<backend::IPortableTensor *> outputs_inter;
 449     const auto &input_tensors = executor->getInputTensors();
 450     const auto &output_tensors = executor->getOutputTensors();
 451     auto const input_size = input_tensors.size();
 452     auto const output_size = output_tensors.size();
 453     inputs_inter.resize(input_size);
 454     outputs_inter.resize(output_size);
 455
 456     // Set inputs of executor
 457     // TODO Create layer to allocate/deallocate buffers of EdgeTensor for each executor
 458     for (uint32_t i = 0; i < input_size; i++)
 459     {
 460       const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
 461                                                     ir::SubgraphIndex{0}, ir::IOIndex{i});
 462       const auto input_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
 463       if (input_pkg_index != -1)
 464       {
 465         // Allocate type-aware quantization tensors for nnpkg inputs and set internal tensors
 466         if (_pkg_input_quant_tensors.find(input_io_desc) != _pkg_input_quant_tensors.end())
 467         {
 468           _pkg_input_quant_tensors[input_io_desc]->allocate_buffer();
 469
 470           inputs_inter[i] = _pkg_input_quant_tensors[input_io_desc].get();
 471         }
 472         else
 473         {
 474           inputs_inter[i] = _pkg_input_tensors[input_io_desc].get();
 475         }
 476
 477         // Set buffer of IOTensor
 478         auto input_desc = desc.inputs[input_pkg_index].get();
 479         // TODO Remove const_cast (we need const_cast as ITensor is writable)
 480         _pkg_input_tensors[input_io_desc]->setUserTensor(
 481           reinterpret_cast<uint8_t *>(const_cast<void *>(input_desc->buffer)), input_desc->size);
 482       }
 483       else
 484       {
 485         auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
 486         const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
 487         const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
 488         const auto &from_ioindex = std::get<ir::IOIndex>(from_iodesc).value();
 489
 490         // Supported only sequantial execution of models
 491         assert(from_model_index.value() < model_index.value());
 492         assert(from_subg_index.value() == 0);
 493         const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
 494         const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
 495         if (_edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end())
 496         {
 497           inputs_inter[i] = from_executor->getOutputTensors().at(from_ioindex);
 498         }
 499         else
 500         {
 501           inputs_inter[i] = _edge_quant_tensors.at(to_iodesc).get();
 502         }
 503         assert(inputs_inter[i]->buffer() != nullptr);
 504       }
 505     }
 506
 507     // Set outputs of executor
 508     for (uint32_t i = 0; i < output_size; i++)
 509     {
 510       const auto output_pkg_index = find_output_index(_model_edges->pkg_outputs, model_index,
 511                                                       ir::SubgraphIndex{0}, ir::IOIndex{i});
 512       const auto output_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
 513       if (output_pkg_index != -1)
 514       {
 515         // Allocate type-aware quantization tensors for nnpkg outputs and set internal tensors
 516         if (_pkg_output_quant_tensors.find(output_io_desc) != _pkg_output_quant_tensors.end())
 517         {
 518           _pkg_output_quant_tensors[output_io_desc]->allocate_buffer();
 519
 520           outputs_inter[i] = _pkg_output_quant_tensors[output_io_desc].get();
 521         }
 522         else
 523         {
 524           outputs_inter[i] = _pkg_output_tensors[output_io_desc].get();
 525         }
 526
 527         // Set buffer of IOTensor
 528         auto output_desc = desc.outputs[output_pkg_index].get();
 529         _pkg_output_tensors[output_io_desc]->setUserTensor(
 530           reinterpret_cast<uint8_t *>(output_desc->buffer), output_desc->size);
 531       }
 532       else
 533       {
 534         // Allocate buffer of `from` tensors
 535         const auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
 536         _edge_tensors[from_iodesc]->allocate_buffer();
 537         outputs_inter[i] = _edge_tensors[from_iodesc].get();
 538
 539         // Allocate buffer of tensors for type-aware quantization
 540         for (const auto &to_iodesc : _edge_map[from_iodesc])
 541         {
 542           _edge_tensors[from_iodesc]->increase_ref();
 543           if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
 544           {
 545             auto type_aware_quant_tensor = _edge_quant_tensors.at(to_iodesc).get();
 546             type_aware_quant_tensor->allocate_buffer();
 547
 548             _edge_tensors[from_iodesc]->decrease_ref();
 549           }
 550         }
 551       }
 552     }
 553
 554     _pkg_input_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
 555
 556     executor->execute(inputs_inter, outputs_inter);
 557
 558     _edge_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
 559     _pkg_output_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
 560
 561     // Release input buffers that are no longer needed
 562     for (uint32_t i = 0; i < input_size; i++)
 563     {
 564       const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
 565                                                     ir::SubgraphIndex{0}, ir::IOIndex{i});
 566
 567       const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
 568       if (input_pkg_index == -1)
 569       {
 570         if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
 571         {
 572           // Decrease reference count of tensor for type-aware quantization if input tensor is the
 573           // tensor
 574           const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
 575           if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
 576           {
 577             _edge_quant_tensors[to_iodesc]->decrease_ref();
 578           }
 579         }
 580         else
 581         {
 582           // Decrease reference count of `from` tensor if input tensor is the `from` tensor
 583           const auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
 584           _edge_tensors[from_iodesc]->decrease_ref();
 585
 586           // Decrease reference count of nnpkg inputs
 587           if (_pkg_input_quant_tensors.find(to_iodesc) != _pkg_input_quant_tensors.end())
 588           {
 589             _pkg_input_quant_tensors[to_iodesc]->decrease_ref();
 590           }
 591         }
 592       }
 593     }
 594
 595     // Release output buffers if those buffers are no longer used other executors because of
 596     // type-aware quantization
 597     // FIXME if tensors for type-aware quantization unified for the same `from` tensor and same type
 598     for (uint32_t i = 0; i < output_size; i++)
 599     {
 600       auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
 601
 602       // Check if other executors will use the buffer of edge tensor
 603       const auto &to_list = _edge_map[from_iodesc];
 604       if (to_list.size() == 0)
 605       {
 606         // This condition means `from_iodesc` tensor is an output of nnpkg
 607         continue;
 608       }
 609
 610       bool to_be_release =
 611         !std::any_of(to_list.begin(), to_list.end(), [&](const ir::IODesc &to_iodesc) {
 612           // This condition means another executor uses the buffer of edge tensor
 613           return _edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end();
 614         });
 615
 616       if (to_be_release)
 617       {
 618         // This edge tensor's buffer won't be used in other executors
 619         // Tensors for type-aware quantization take over the role of this edge tensor instead
 620         _edge_tensors[from_iodesc]->decrease_ref();
 621       }
 622
 623       // Decrease reference count of nnpkg outputs
 624       if (_pkg_output_quant_tensors.find(from_iodesc) != _pkg_output_quant_tensors.end())
 625       {
 626         _pkg_output_quant_tensors[from_iodesc]->decrease_ref();
 627       }
 628     }
 629   }
 630 }
 631
 632 // modelCount() iterates _executors.
 633 // It assumes that Compiler will generate Executor for all models and _executors includes all
 634 // generated Executor.
 635 // If nnpackage includes model(s) which has no connection and Compiler does not
 636 // generate Executor for them, modelCount() return less value than real model count.
 637 uint16_t Executors::modelCount() const
 638 {
 639   uint16_t model_count = 0;
 640   for (; _executors.find(std::make_pair(ir::ModelIndex{model_count}, ir::SubgraphIndex{0})) !=
 641          _executors.end();
 642        model_count++)
 643     ;
 644
 645   return model_count;
 646 }
 647
 648 } // namespace exec
 649 } // namespace onert