runtime/onert/core/src/compiler/HEScheduler.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "ir/Operand.h"
  18 #include "compiler/HEScheduler.h"
  19 #include "ir/Graph.h"
  20 #include "util/ConfigSource.h"
  21 #include "compiler/BackendResolver.h"
  22 #include "util/logging.h"
  23 #include "util/Utils.h"
  24 #include "exec/FunctionSequence.h"
  25 #include <cassert>
  26 #include <cmath>
  27 #include <chrono>
  28
  29 namespace onert
  30 {
  31
  32 namespace compiler
  33 {
  34 static uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node)
  35 {
  36   uint32_t size = 0;
  37   for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
  38   {
  39     size += graph.operands().at(ind).info().total_size();
  40   }
  41   return size;
  42 }
  43
  44 static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
  45 {
  46   for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED)
  47   {
  48     const auto &obj = graph.operands().at(input);
  49     if (obj.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
  50     {
  51       return true;
  52     }
  53   }
  54   return false;
  55 }
  56
  57 static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend,
  58                              const ir::Operation &node, bool quant)
  59 {
  60   /* TODO: this is workaround, come up with better solution if have.
  61       Adding exception in stage doesn't help. Because if there is a record for add without
  62       broadcast, scheduling will select it since it doesn't distinguish broadcast and
  63       non-broadcast like it does for quant non-quantized*/
  64   if (backend->config()->id() == "cpu" &&
  65       (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub ||
  66        node.opcode() == ir::OpCode::Mul))
  67   {
  68     const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
  69     const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
  70     /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
  71      * without broadcasting*/
  72     if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
  73     {
  74       return true;
  75     }
  76   }
  77   /* TODO: this is workaround, come up with better solution if have.
  78           Adding exception in stage doesn't help. Because if there is a record for Mul without
  79           broadcast, scheduling will select it since it doesn't distinguish broadcast and
  80           non-broadcast like it does for quant non-quantized*/
  81   else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul)
  82   {
  83     const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
  84     const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
  85
  86     // Nontrivial broadcasting isn't supported yet
  87     if (quant ||
  88         !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
  89     {
  90       return true;
  91     }
  92   }
  93   return false;
  94 }
  95
  96 // if a node can be merged into op_seq
  97 static bool isMergeable(const ir::Graph &graph, const ir::Operation &node)
  98 {
  99   size_t prev_op_cnt = 0;
 100   for (const auto &input : node.getInputs())
 101   {
 102     // only valid_inputs
 103     const auto &operand = graph.operands().at(input);
 104     if (operand.isConstant())
 105       continue;
 106
 107     // This operand is output of operation, not weight or bias
 108     if (operand.getDef().valid())
 109       ++prev_op_cnt;
 110
 111     // Current node has multiple inputs as concat or at the beginning of the separated branch
 112     if (prev_op_cnt > 1 || operand.getUses().size() > 1)
 113     {
 114       return false;
 115     }
 116   }
 117   return true;
 118 }
 119
 120 void HEScheduler::scheduleShufflingBackends()
 121 {
 122   VERBOSE(HEScheduler::schedule)
 123       << "Started task scheduling: uses all backends to get more metrics for data transfer"
 124       << std::endl;
 125   size_t backend_ind = 0;
 126   for (const auto &rank : _rank_to_op)
 127   {
 128     VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl;
 129     const auto &node = _graph->operations().at(rank.second);
 130     const bool quant = isQuant(*_graph, node);
 131     const auto size = getOperationsFlattenedIOSize(*_graph, node);
 132     for (size_t i = 0;; ++i)
 133     {
 134       if (i == _all_backends.size())
 135       {
 136         // wasn't able to find backend
 137         assert(false);
 138         break;
 139       }
 140       if (backend_ind == _all_backends.size())
 141       {
 142         backend_ind = 0;
 143       }
 144       if (isWorkaroundSkip(*_graph, _all_backends[backend_ind], node, quant))
 145       {
 146         ++backend_ind;
 147         continue;
 148       }
 149       const auto exec_time =
 150           _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
 151       // Scheduling to measure data transfer must be done after measuring all backends separately
 152       assert(exec_time != _exec_time->NOT_FOUND);
 153       if (exec_time == _exec_time->getMax())
 154       {
 155         ++backend_ind;
 156         continue;
 157       }
 158       _backend_resolver->setBackend(rank.second, _all_backends[backend_ind]);
 159       VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is "
 160                                      << _all_backends[backend_ind]->config()->id() << std::endl;
 161       ++backend_ind;
 162       break;
 163     }
 164   }
 165 }
 166
 167 bool HEScheduler::isNodeProfiled(const ir::Operation &node)
 168 {
 169   const bool quant = isQuant(*_graph, node);
 170   const auto size = getOperationsFlattenedIOSize(*_graph, node);
 171   for (const auto *backend : _all_backends)
 172   {
 173     const auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size);
 174     if (exec_time == _exec_time->NOT_FOUND)
 175       return false;
 176   }
 177   return true;
 178 }
 179
 180 void HEScheduler::scheduleBranch(const ir::OperationIndex &index,
 181                                  ir::OperationIndexMap<bool> &scheduled)
 182 {
 183   auto loc_index = index;
 184   const backend::Backend *parent_backend = nullptr;
 185   while (true)
 186   {
 187     if (scheduled[loc_index])
 188     {
 189       return;
 190     }
 191     if (!schedule(loc_index, parent_backend))
 192     {
 193       return;
 194     }
 195     scheduled[loc_index] = true;
 196     parent_backend = _backend_resolver->getBackend(loc_index);
 197
 198     const auto &node = _graph->operations().at(loc_index);
 199     /* get the only output operand, that is input of the next single operation
 200      *   and just this nodes output.*/
 201     if (node.getOutputs().size() != 1)
 202     {
 203       return;
 204     }
 205     const auto &only_out_operand = _graph->operands().at(*node.getOutputs().begin());
 206     // One of the last nodes
 207     if (only_out_operand.getUses().size() == 0)
 208     {
 209       return;
 210     }
 211     loc_index = *only_out_operand.getUses().begin();
 212     /* verify, that next node is neither beginning nor ending node of a branch*/
 213     const auto &next_node = _graph->operations().at(loc_index);
 214     if (!isMergeable(*_graph, next_node))
 215     {
 216       return;
 217     }
 218   }
 219 }
 220
 221 std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph &graph)
 222 {
 223   _graph = &graph;
 224   VERBOSE(HEScheduler::schedule) << "task scheduling started" << std::endl;
 225   // Make ranks and save in descending order
 226   makeRank();
 227
 228   for (const auto *backend : _all_backends)
 229   {
 230     _backends_avail_time.emplace(backend, std::map<int64_t, int64_t>{{0, 0}});
 231   }
 232
 233   if (_is_profiling_mode)
 234   {
 235     // Check if profiling info about all backend/node pairs already exists
 236     bool all_nodes_are_profiled = true;
 237     _graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
 238       if (all_nodes_are_profiled)
 239         all_nodes_are_profiled = isNodeProfiled(op);
 240     });
 241
 242     // If all nodes are already profiled - schedule backends in such order, so more profiling
 243     // information about between-backends data transfer could be collected
 244     if (all_nodes_are_profiled)
 245     {
 246       scheduleShufflingBackends();
 247       VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
 248       return std::move(_backend_resolver);
 249     }
 250   }
 251
 252   ir::OperationIndexMap<bool> visited;
 253   graph.operations().iterate(
 254       [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
 255   // for each task select the backend with the smallest earliest finishing time(eft)
 256   for (const auto &rank : _rank_to_op)
 257   {
 258     scheduleBranch(rank.second, visited);
 259   }
 260   VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
 261   return std::move(_backend_resolver);
 262 }
 263
 264 int64_t HEScheduler::getOpTime(const backend::Backend *backend, const std::string &operation,
 265                                bool quant, uint32_t size)
 266 {
 267   const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
 268   if (time != _exec_time->NOT_FOUND)
 269     return time;
 270
 271   return _is_supported.at(backend).at(operation) ? 1 : _exec_time->getMax();
 272 }
 273
 274 int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend,
 275                                     const backend::Backend *dst_backend, bool quant, uint32_t size)
 276 {
 277   // TODO Change it to getOperationExecTime()
 278   const auto time = _exec_time->getPermuteTime(src_backend, dst_backend, quant, size);
 279
 280   if (time != _exec_time->NOT_FOUND)
 281     return time;
 282
 283   // Makes the scheduler prefer keeping computations on one backend
 284   return size / 200;
 285 }
 286
 287 int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend)
 288 {
 289   // if there is no profiling info don't use this backend during scheduling
 290   if (!_is_profiling_mode)
 291   {
 292     VERBOSE(HEScheduler::tryBackend)
 293         << "Trying to HE schedule while there is no profiling info for " << node.name()
 294         << " on backend " << backend->config()->id() << ". So this backend won't be used. "
 295         << std::endl;
 296     _is_supported[backend][node.name()] = false;
 297     return _exec_time->getMax();
 298   }
 299   auto iter = _is_supported.find(backend);
 300   if (iter != _is_supported.end())
 301   {
 302     auto it2 = iter->second.find(node.name());
 303     if (it2 != iter->second.end())
 304     {
 305       return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax();
 306     }
 307   }
 308   try
 309   {
 310     // DO NOTHING
 311
 312     _is_supported[backend][node.name()] = true;
 313   }
 314   catch (std::runtime_error &e)
 315   {
 316     _is_supported[backend][node.name()] = false;
 317   }
 318   return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax();
 319 }
 320
 321 void HEScheduler::makeRank()
 322 {
 323   VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl;
 324
 325   _graph->operations().iterate(
 326       [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
 327
 328   // Check that ranks are calculated for all operations(nodes)
 329   _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
 330     UNUSED_RELEASE(index);
 331     assert(_op_to_rank->find(index) != _op_to_rank->end());
 332   });
 333   VERBOSE(HEScheduler::makeRank) << "task prioritizing finished" << std::endl;
 334 }
 335
 336 int64_t HEScheduler::DFSMaxRank(const ir::OperationIndex &index)
 337 {
 338   auto op_to_rank_it = _op_to_rank->find(index);
 339   if (op_to_rank_it != _op_to_rank->end())
 340     return op_to_rank_it->second;
 341
 342   const auto &node = _graph->operations().at(index);
 343   int64_t rank = 0;
 344   const bool quant = isQuant(*_graph, node);
 345   const auto size = getOperationsFlattenedIOSize(*_graph, node);
 346   auto supported_backends_quantity = static_cast<int64_t>(_all_backends.size());
 347
 348   const auto max_child_rank = DFSChildrenMaxRank(index);
 349
 350   // get average exec time of this op
 351   for (const auto &backend : _all_backends)
 352   {
 353     auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size);
 354     if (exec_time == _exec_time->NOT_FOUND)
 355     {
 356       exec_time = tryBackend(node, backend);
 357     }
 358     if (exec_time < _exec_time->getMax())
 359     {
 360       rank += exec_time;
 361     }
 362     else
 363     {
 364       // this operation isn't supported in this backend
 365       --supported_backends_quantity;
 366     }
 367   }
 368   if (supported_backends_quantity == 0)
 369   {
 370     throw std::runtime_error{"Encountered unsupported op: " + node.name()};
 371   }
 372   rank /= supported_backends_quantity;
 373
 374   // get standard deviation
 375   int64_t std = 0;
 376   for (const auto backend : _all_backends)
 377   {
 378     const auto exec_time = getOpTime(backend, node.name(), quant, size);
 379     if (exec_time < _exec_time->getMax())
 380     {
 381       std += (exec_time - rank) * (exec_time - rank);
 382     }
 383   }
 384   std /= supported_backends_quantity;
 385   if (std > 0)
 386   {
 387     std = static_cast<int>(std::sqrt(std));
 388     rank *= std;
 389   }
 390   rank += max_child_rank;
 391
 392   assert(rank >= 0);
 393   _rank_to_op.emplace(rank, index);
 394   _op_to_rank->emplace(index, rank);
 395   VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name()
 396                                    << " is " << rank << std::endl;
 397
 398   return rank;
 399 }
 400
 401 int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
 402 {
 403   const auto &node = _graph->operations().at(index);
 404   int64_t max_child_rank = 0;
 405   for (const auto &output : node.getOutputs())
 406   {
 407     const auto &operand = _graph->operands().at(output);
 408     const bool quant = operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
 409     // average data transfer cost of this operand's data
 410     int64_t avg_transfer_cost = 1;
 411     for (const auto *backend : _all_backends)
 412     {
 413       for (const auto *other_backend : _all_backends)
 414       {
 415         if (backend == other_backend)
 416         {
 417           continue;
 418         }
 419         // TODO Change it to controlflow backend
 420         auto transfer_cost =
 421             getPermuteTime(backend, other_backend, quant, operand.info().total_size());
 422         avg_transfer_cost += transfer_cost;
 423       }
 424     }
 425     avg_transfer_cost /= _all_backends.size();
 426     for (const auto &use : operand.getUses())
 427     {
 428       const auto cur_child_rank = DFSMaxRank(use);
 429       max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
 430     }
 431   }
 432   return max_child_rank;
 433 }
 434
 435 int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
 436                                           const int64_t &starting_time, const int64_t &time_amount)
 437 {
 438   const auto backend_times = _backends_avail_time.at(backend);
 439   // finishing and starting times of an op, that will come after current op
 440   auto next_op_fst = backend_times.upper_bound(starting_time);
 441   // finishing time of an op, that will come before current op
 442   auto prev_op_ft = starting_time;
 443   // until reach the "hole/gap", that is enough to run this op
 444   while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft <= time_amount)
 445   {
 446     prev_op_ft = next_op_fst->first + 1;
 447     ++next_op_fst;
 448   }
 449   return prev_op_ft;
 450 }
 451
 452 bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend)
 453 {
 454   VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl;
 455   int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
 456   const auto &node = _graph->operations().at(index);
 457
 458   std::multimap<int64_t, int64_t> selected_transfer_st_exec_time;
 459   // select the backend with the smallest eft of this task
 460   const backend::Backend *chosen_backend = nullptr;
 461   for (const auto *backend : _all_backends)
 462   {
 463     std::multimap<int64_t, int64_t> transfer_st_exec_time;
 464     const auto est_and_et = ESTAndExecTime(backend, index, transfer_st_exec_time);
 465
 466     if (eft > est_and_et.first + est_and_et.second)
 467     {
 468       eft = est_and_et.first + est_and_et.second;
 469       selected_exec_time = est_and_et.second;
 470       chosen_backend = backend;
 471       selected_transfer_st_exec_time = transfer_st_exec_time;
 472     }
 473   }
 474
 475   if (chosen_backend == nullptr)
 476   {
 477     throw std::runtime_error{"Fail to choose backend on scheduler"};
 478   }
 479
 480   // this is part of a branch and it is assigned another backend
 481   if (parent_backend && parent_backend != chosen_backend)
 482   {
 483     return false;
 484   }
 485   for (const auto &it : selected_transfer_st_exec_time)
 486   {
 487     auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
 488     _backends_avail_time[_cpu_backend].insert({prev_op_ft + it.second, prev_op_ft});
 489   }
 490
 491   _ops_eft[index] = eft;
 492   _backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time);
 493   _backend_resolver->setBackend(index, chosen_backend);
 494
 495   VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is "
 496                                  << chosen_backend->config()->id() << ". Its eft: " << eft
 497                                  << std::endl;
 498   return true;
 499 }
 500
 501 std::pair<int64_t, int64_t>
 502 HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::OperationIndex &index,
 503                             std::multimap<int64_t, int64_t> &transfer_st_exec_time)
 504 {
 505   // Permutation will cause creating a separate op_seq that contains just this permutation node.
 506   // This isn't needed for Linear executor since it doesn't use op_seqs
 507   // Number 1 ms is picked experimentally
 508   int64_t permute_fine = 1000;
 509   // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
 510   // permutation on other branches or non-nnfw specific tasks and have to wait for it.
 511   // Number 2 is picked experimentally
 512   const int64_t CPU_DELAY = 2;
 513   const auto &node = _graph->operations().at(index);
 514   const bool quant = isQuant(*_graph, node);
 515   const auto size = getOperationsFlattenedIOSize(*_graph, node);
 516   // if this node can be part of a op_seq, then assigning different backend will cause creating
 517   // another op_seq
 518   if (isMergeable(*_graph, node))
 519   {
 520     permute_fine *= 2;
 521   }
 522   if (isWorkaroundSkip(*_graph, backend, node, quant))
 523   {
 524     return {_exec_time->getMax(), _exec_time->getMax()};
 525   }
 526   // get average exec time of the op on this backend
 527   auto exec_time = getOpTime(backend, node.name(), quant, size);
 528   if (backend->config()->id() == "cpu" && _is_parallel_exec)
 529   {
 530     exec_time *= CPU_DELAY;
 531   }
 532
 533   // get max eft of direct (one level above) predecessors
 534   auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
 535
 536   int64_t total_transfer_cost = 0;
 537   std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
 538   // Find free time for data transferring and insert it into backend taskset. This is needed:
 539   //  1. Time for multiple permutations for this node's input is found correctly
 540   //  2. If backend==cpu, then free time for this node must come after permutations
 541   for (auto &it : transfer_st_exec_time)
 542   {
 543     if (_is_parallel_exec)
 544     {
 545       it.second *= CPU_DELAY;
 546     }
 547     if (!_is_linear_exec)
 548     {
 549       it.second += permute_fine;
 550     }
 551     total_transfer_cost += it.second;
 552
 553     const auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
 554
 555     max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second);
 556
 557     const auto tmp = _backends_avail_time[_cpu_backend].emplace(prev_op_ft + it.second, prev_op_ft);
 558     inserted_permutations.push_back(tmp.first);
 559   }
 560   // find the hole/gap, where this op can be put or the finishing time of the last assigned op
 561   auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time);
 562
 563   // Remove inserted permutation from cpu's task set
 564   for (const auto &it : inserted_permutations)
 565   {
 566     _backends_avail_time[_cpu_backend].erase(it);
 567   }
 568
 569   /* In case non-parallel executor measure just exec time and data transfer time
 570    * because EFT(prev_op_ft) is the same for all backends. Since two operations
 571    * can't be run simultaneously, finish of running operation must be waited for.
 572    * When an operation starts, all backends are free. So, they need time just for
 573    * data transfer.*/
 574   if (!_is_parallel_exec)
 575   {
 576     VERBOSE(HEScheduler::ESTAndExecTime)
 577         << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
 578         << backend->config()->id() << " is " << exec_time
 579         << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
 580
 581     return {total_transfer_cost, exec_time};
 582   }
 583   VERBOSE(HEScheduler::ESTAndExecTime)
 584       << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
 585       << backend->config()->id() << ": " << exec_time
 586       << " microseconds. Backend available time: " << prev_op_ft
 587       << " Parent's max eft: " << max_pred_eft - total_transfer_cost
 588       << " data transfer cost: " << total_transfer_cost << std::endl;
 589
 590   return {prev_op_ft, exec_time};
 591 }
 592
 593 int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
 594                                 std::multimap<int64_t, int64_t> &transfer_st_exec_time)
 595 {
 596   int64_t max_pred_eft = 0;
 597   for (const auto &input_operand_idx : node.getInputs())
 598   {
 599     const auto &input_operand = _graph->operands().at(input_operand_idx);
 600     const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
 601
 602     auto input_node_idx = input_operand.getDef();
 603     if (input_node_idx.valid())
 604     {
 605       // Data transfer cost from parent's node backend to current node's backend:
 606       auto parent_backend = _backend_resolver->getBackend(input_node_idx);
 607
 608       max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx));
 609       if (parent_backend != backend)
 610       {
 611         // Multiply operand size by 2 because size must describe input+output size
 612         int64_t transfer_cost =
 613             getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
 614         transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
 615       }
 616     }
 617   }
 618   return max_pred_eft;
 619 }
 620
 621 } // namespace compiler
 622
 623 } // namespace onert