runtime/neurun/core/src/compiler/HEScheduler.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "model/Operand.h"
  18 #include "compiler/HEScheduler.h"
  19 #include "ir/Graph.h"
  20 #include "util/ConfigSource.h"
  21 #include "compiler/IExecutionBuilder.h"
  22 #include "compiler/BackendResolver.h"
  23 #include "backend/IShapeFixer.h"
  24 #include "util/logging.h"
  25 #include "util/Utils.h"
  26 #include "exec/FunctionSequence.h"
  27 #include <cassert>
  28 #include <cmath>
  29 #include <chrono>
  30
  31 namespace neurun
  32 {
  33
  34 namespace compiler
  35 {
  36 static uint32_t getOperationsFlattenedIOSize(const graph::Graph &graph,
  37                                              const model::Operation &node)
  38 {
  39   uint32_t size = 0;
  40   for (const auto &input : node.getInputs())
  41   {
  42     size += graph.operands().at(input).info().total_size();
  43   }
  44   for (const auto &output : node.getOutputs())
  45   {
  46     size += graph.operands().at(output).info().total_size();
  47   }
  48   return size;
  49 }
  50
  51 static bool isQuant(const graph::Graph &graph, const model::Operation &node)
  52 {
  53   for (const auto &input : node.getInputs())
  54   {
  55     const auto &obj = graph.operands().at(input);
  56     if (obj.typeInfo().type() == ir::DataType::QUANT8_ASYMM)
  57     {
  58       return true;
  59     }
  60   }
  61   return false;
  62 }
  63
  64 static bool isWorkaroundSkip(const graph::Graph &graph, const backend::Backend *backend,
  65                              const model::Operation &node, bool quant)
  66 {
  67   /* TODO: this is workaround, come up with better solution if have.
  68       Adding exception in stage doesn't help. Because if there is a record for add without
  69       broadcast, scheduling will select it since it doesn't distinguish broadcast and
  70       non-broadcast like it does for quant non-quantized*/
  71   if (backend->config()->id() == "cpu" &&
  72       (node.opcode() == model::OpCode::Add || node.opcode() == model::OpCode::Sub ||
  73        node.opcode() == model::OpCode::Mul))
  74   {
  75     const auto lhs_index{node.getInputs().at(model::operation::Add::Input::LHS)};
  76     const auto rhs_index{node.getInputs().at(model::operation::Add::Input::RHS)};
  77     /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
  78      * without broadcasting*/
  79     if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
  80     {
  81       return true;
  82     }
  83   }
  84   /* TODO: this is workaround, come up with better solution if have.
  85           Adding exception in stage doesn't help. Because if there is a record for Mul without
  86           broadcast, scheduling will select it since it doesn't distinguish broadcast and
  87           non-broadcast like it does for quant non-quantized*/
  88   else if (backend->config()->id() == "acl_neon" && node.opcode() == model::OpCode::Mul)
  89   {
  90     const auto lhs_index{node.getInputs().at(model::operation::Mul::Input::LHS)};
  91     const auto rhs_index{node.getInputs().at(model::operation::Mul::Input::RHS)};
  92
  93     // Nontrivial broadcasting isn't supported yet
  94     if (quant ||
  95         !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
  96     {
  97       return true;
  98     }
  99   }
 100   return false;
 101 }
 102
 103 // if a node can be merged into subgraph
 104 static bool isMergable(const graph::Graph &graph, const model::Operation &node)
 105 {
 106   size_t prev_op_cnt = 0;
 107   for (const auto &input : node.getInputs())
 108   {
 109     // only valid_inputs
 110     const auto &operand = graph.operands().at(input);
 111     if (operand.isConstant())
 112       continue;
 113
 114     // This operand is output of operation, not weight or bias
 115     if (operand.getDef().list().size() > 0)
 116       ++prev_op_cnt;
 117
 118     // Current node has multiple inputs as concat or at the beginning of the separated branch
 119     if (prev_op_cnt > 1 || operand.getUses().list().size() > 1)
 120     {
 121       return false;
 122     }
 123   }
 124   return true;
 125 }
 126
 127 void HEScheduler::scheduleShufflingBackends()
 128 {
 129   VERBOSE(HEScheduler::schedule)
 130       << "Started task scheduling: uses all backends to get more metrics for data transfer"
 131       << std::endl;
 132   size_t backend_ind = 0;
 133   for (const auto &rank : _rank_to_op)
 134   {
 135     VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl;
 136     const auto &node = _graph->operations().at(rank.second);
 137     const bool quant = isQuant(*_graph, node);
 138     const auto size = getOperationsFlattenedIOSize(*_graph, node);
 139     for (size_t i = 0;; ++i)
 140     {
 141       if (i == _all_backends.size())
 142       {
 143         // wasn't able to find backend
 144         assert(false);
 145         break;
 146       }
 147       if (backend_ind == _all_backends.size())
 148       {
 149         backend_ind = 0;
 150       }
 151       if (isWorkaroundSkip(*_graph, _all_backends[backend_ind], node, quant))
 152       {
 153         ++backend_ind;
 154         continue;
 155       }
 156       const auto exec_time =
 157           _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
 158       // Scheduling to measure data transfer must be done after measuring all backends separately
 159       assert(exec_time != _exec_time->NOT_FOUND);
 160       if (exec_time == _exec_time->getMax())
 161       {
 162         ++backend_ind;
 163         continue;
 164       }
 165       _backend_resolver->setBackend(rank.second, _all_backends[backend_ind]);
 166       VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is "
 167                                      << _all_backends[backend_ind]->config()->id() << std::endl;
 168       ++backend_ind;
 169       break;
 170     }
 171   }
 172 }
 173
 174 bool HEScheduler::isNodeProfiled(const model::Operation &node)
 175 {
 176   const bool quant = isQuant(*_graph, node);
 177   const auto size = getOperationsFlattenedIOSize(*_graph, node);
 178   for (const auto *backend : _all_backends)
 179   {
 180     const auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size);
 181     if (exec_time == _exec_time->NOT_FOUND)
 182       return false;
 183   }
 184   return true;
 185 }
 186
 187 void HEScheduler::scheduleBranch(const model::OperationIndex &index,
 188                                  model::OperationIndexMap<bool> &scheduled)
 189 {
 190   auto loc_index = index;
 191   const backend::Backend *parent_backend = nullptr;
 192   while (true)
 193   {
 194     if (scheduled[loc_index])
 195     {
 196       return;
 197     }
 198     if (!schedule(loc_index, parent_backend))
 199     {
 200       return;
 201     }
 202     scheduled[loc_index] = true;
 203     parent_backend = _backend_resolver->getBackend(loc_index);
 204
 205     const auto &node = _graph->operations().at(loc_index);
 206     model::OperandIndex tmp;
 207     /* get the only output operand, that is input of the next single operation
 208      *   and just this nodes output.*/
 209     if (node.getOutputs().size() != 1)
 210     {
 211       return;
 212     }
 213     const auto &only_out_operand = _graph->operands().at(*node.getOutputs().begin());
 214     loc_index = only_out_operand.getUses().list().front();
 215     /* verify, that next node is neither beginning nor ending node of a branch*/
 216     const auto &next_node = _graph->operations().at(loc_index);
 217     if (!isMergable(*_graph, next_node))
 218     {
 219       return;
 220     }
 221   }
 222 }
 223
 224 std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const graph::Graph &graph)
 225 {
 226   _graph = &graph;
 227   VERBOSE(HEScheduler::schedule) << "task scheduling started" << std::endl;
 228   // Make ranks and save in descending order
 229   makeRank();
 230
 231   for (const auto *backend : _all_backends)
 232   {
 233     _backends_avail_time.emplace(backend, std::map<int64_t, int64_t>{{0, 0}});
 234   }
 235
 236   const bool is_profiling = util::getConfigBool(util::config::PROFILING_MODE);
 237   if (is_profiling)
 238   {
 239     // Check if profiling info about all backend/node pairs already exists
 240     bool all_nodes_are_profiled = true;
 241     _graph->operations().iterate([&](const model::OperationIndex &, const model::Operation &op) {
 242       if (all_nodes_are_profiled)
 243         all_nodes_are_profiled = isNodeProfiled(op);
 244     });
 245
 246     // If all nodes are already profiled - schedule backends in such order, so more profiling
 247     // information about between-backends data transfer could be collected
 248     if (all_nodes_are_profiled)
 249     {
 250       scheduleShufflingBackends();
 251       VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
 252       return std::move(_backend_resolver);
 253     }
 254   }
 255
 256   model::OperationIndexMap<bool> visited;
 257   graph.operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
 258     visited[index] = false;
 259   });
 260   // for each task select the backend with the smallest earliest finishing time(eft)
 261   for (const auto &rank : _rank_to_op)
 262   {
 263     scheduleBranch(rank.second, visited);
 264   }
 265   VERBOSE(HEScheduler::schedule) << "task scheduling finished" << std::endl;
 266   return std::move(_backend_resolver);
 267 }
 268
 269 int64_t HEScheduler::getOpTime(const backend::Backend *backend, const std::string &operation,
 270                                bool quant, uint32_t size)
 271 {
 272   const auto time = _exec_time->getOperationExecTime(backend, operation, quant, size);
 273   if (time != _exec_time->NOT_FOUND)
 274     return time;
 275
 276   return _is_supported.at(backend).at(operation) ? 1 : _exec_time->getMax();
 277 }
 278
 279 int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend,
 280                                     const backend::Backend *dst_backend, bool quant, uint32_t size)
 281 {
 282   const auto time = _exec_time->getPermuteTime(src_backend, dst_backend, quant, size);
 283   if (time != _exec_time->NOT_FOUND)
 284     return time;
 285
 286   // Makes the scheduler prefer keeping computations on one backend
 287   return size / 200;
 288 }
 289
 290 int64_t HEScheduler::tryBackend(const model::Operation &node, const backend::Backend *backend)
 291 {
 292   // if there is no profiling info don't use this backend during scheduling
 293   if (!util::getConfigBool(util::config::PROFILING_MODE))
 294   {
 295     VERBOSE(HEScheduler::tryBackend)
 296         << "Trying to HE schedule while there is no profiling info for " << node.name()
 297         << " on backend " << backend->config()->id() << ". So this backend won't be used. "
 298         << std::endl;
 299     _is_supported[backend][node.name()] = false;
 300     return _exec_time->getMax();
 301   }
 302   auto iter = _is_supported.find(backend);
 303   if (iter != _is_supported.end())
 304   {
 305     auto it2 = iter->second.find(node.name());
 306     if (it2 != iter->second.end())
 307     {
 308       return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax();
 309     }
 310   }
 311   try
 312   {
 313     _backend_resolver->getBackendContext(backend)->shape_fixer->fix(node);
 314
 315     _is_supported[backend][node.name()] = true;
 316   }
 317   catch (std::runtime_error &e)
 318   {
 319     _is_supported[backend][node.name()] = false;
 320   }
 321   return _is_supported[backend][node.name()] ? 1 : _exec_time->getMax();
 322 }
 323
 324 void HEScheduler::makeRank()
 325 {
 326   VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl;
 327
 328   _graph->operations().iterate(
 329       [&](const model::OperationIndex &index, const model::Operation &) { DFSMaxRank(index); });
 330
 331   // Check that ranks are calculated for all operations(nodes)
 332   _graph->operations().iterate([&](const model::OperationIndex &index, const model::Operation &) {
 333     UNUSED_RELEASE(index);
 334     assert(_op_to_rank->find(index) != _op_to_rank->end());
 335   });
 336   VERBOSE(HEScheduler::makeRank) << "task prioritizing finished" << std::endl;
 337 }
 338
 339 int64_t HEScheduler::DFSMaxRank(const model::OperationIndex &index)
 340 {
 341   auto op_to_rank_it = _op_to_rank->find(index);
 342   if (op_to_rank_it != _op_to_rank->end())
 343     return op_to_rank_it->second;
 344
 345   const auto &node = _graph->operations().at(index);
 346   int64_t rank = 0;
 347   const bool quant = isQuant(*_graph, node);
 348   const auto size = getOperationsFlattenedIOSize(*_graph, node);
 349   auto supported_backends_quantity = static_cast<int64_t>(_all_backends.size());
 350
 351   const auto max_child_rank = DFSChildrenMaxRank(index);
 352
 353   // get average exec time of this op
 354   for (const auto &backend : _all_backends)
 355   {
 356     auto exec_time = _exec_time->getOperationExecTime(backend, node.name(), quant, size);
 357     if (exec_time == _exec_time->NOT_FOUND)
 358     {
 359       exec_time = tryBackend(node, backend);
 360     }
 361     if (exec_time < _exec_time->getMax())
 362     {
 363       rank += exec_time;
 364     }
 365     else
 366     {
 367       // this operation isn't supported in this backend
 368       --supported_backends_quantity;
 369     }
 370   }
 371   if (supported_backends_quantity == 0)
 372   {
 373     throw std::runtime_error{"Encountered unsupported op: " + node.name()};
 374   }
 375   rank /= supported_backends_quantity;
 376
 377   // get standard deviation
 378   int64_t std = 0;
 379   for (const auto backend : _all_backends)
 380   {
 381     const auto exec_time = getOpTime(backend, node.name(), quant, size);
 382     if (exec_time < _exec_time->getMax())
 383     {
 384       std += (exec_time - rank) * (exec_time - rank);
 385     }
 386   }
 387   std /= supported_backends_quantity;
 388   if (std > 0)
 389   {
 390     std = static_cast<int>(std::sqrt(std));
 391     rank *= std;
 392   }
 393   rank += max_child_rank;
 394
 395   assert(rank >= 0);
 396   _rank_to_op.emplace(rank, index);
 397   _op_to_rank->emplace(index, rank);
 398   VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name()
 399                                    << " is " << rank << std::endl;
 400
 401   return rank;
 402 }
 403
 404 int64_t HEScheduler::DFSChildrenMaxRank(const model::OperationIndex &index)
 405 {
 406   const auto &node = _graph->operations().at(index);
 407   int64_t max_child_rank = 0;
 408   for (const auto &output : node.getOutputs())
 409   {
 410     const auto &operand = _graph->operands().at(output);
 411     const bool quant = operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM;
 412     // average data transfer cost of this operand's data
 413     int64_t avg_transfer_cost = 1;
 414     for (const auto *backend : _all_backends)
 415     {
 416       for (const auto *other_backend : _all_backends)
 417       {
 418         if (backend == other_backend)
 419         {
 420           continue;
 421         }
 422         auto transfer_cost =
 423             _exec_time->getPermuteTime(backend, other_backend, quant, operand.info().total_size());
 424         if (transfer_cost == _exec_time->NOT_FOUND)
 425         {
 426           // Makes the scheduler prefer keeping computations on one backend
 427           transfer_cost = operand.info().total_size() / 100;
 428         }
 429         avg_transfer_cost += transfer_cost;
 430       }
 431     }
 432     avg_transfer_cost /= _all_backends.size();
 433     for (const auto &use : operand.getUses().list())
 434     {
 435       const auto cur_child_rank = DFSMaxRank(use);
 436       max_child_rank = std::max(max_child_rank, cur_child_rank + avg_transfer_cost);
 437     }
 438   }
 439   return max_child_rank;
 440 }
 441
 442 int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
 443                                           const int64_t &starting_time, const int64_t &time_amount)
 444 {
 445   const auto backend_times = _backends_avail_time.at(backend);
 446   // finishing and starting times of an op, that will come after current op
 447   auto next_op_fst = backend_times.upper_bound(starting_time);
 448   // finishing time of an op, that will come before current op
 449   auto prev_op_ft = starting_time;
 450   // until reach the "hole/gap", that is enough to run this op
 451   while (next_op_fst != backend_times.end() && next_op_fst->second - prev_op_ft <= time_amount)
 452   {
 453     prev_op_ft = next_op_fst->first + 1;
 454     ++next_op_fst;
 455   }
 456   return prev_op_ft;
 457 }
 458
 459 bool HEScheduler::schedule(const model::OperationIndex &index,
 460                            const backend::Backend *parent_backend)
 461 {
 462   VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl;
 463   int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
 464   const auto &node = _graph->operations().at(index);
 465
 466   std::multimap<int64_t, int64_t> selected_transfer_st_exec_time;
 467   // select the backend with the smallest eft of this task
 468   const backend::Backend *chosen_backend = nullptr;
 469   for (const auto *backend : _all_backends)
 470   {
 471     std::multimap<int64_t, int64_t> transfer_st_exec_time;
 472     const auto est_and_et = ESTAndExecTime(backend, index, transfer_st_exec_time);
 473
 474     if (eft > est_and_et.first + est_and_et.second)
 475     {
 476       eft = est_and_et.first + est_and_et.second;
 477       selected_exec_time = est_and_et.second;
 478       chosen_backend = backend;
 479       selected_transfer_st_exec_time = transfer_st_exec_time;
 480     }
 481   }
 482
 483   if (chosen_backend == nullptr)
 484   {
 485     throw std::runtime_error{"Fail to choose backend on scheduler"};
 486   }
 487
 488   // this is part of a branch and it is assigned another backend
 489   if (parent_backend && parent_backend != chosen_backend)
 490   {
 491     return false;
 492   }
 493   for (const auto &it : selected_transfer_st_exec_time)
 494   {
 495     auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
 496     _backends_avail_time[_cpu_backend].insert({prev_op_ft + it.second, prev_op_ft});
 497   }
 498
 499   _ops_eft[index] = eft;
 500   _backends_avail_time[chosen_backend].emplace(eft, eft - selected_exec_time);
 501   _backend_resolver->setBackend(index, chosen_backend);
 502
 503   VERBOSE(HEScheduler::schedule) << "backend for " << node.name() << " is "
 504                                  << chosen_backend->config()->id() << ". Its eft: " << eft
 505                                  << std::endl;
 506   return true;
 507 }
 508
 509 std::pair<int64_t, int64_t>
 510 HEScheduler::ESTAndExecTime(const backend::Backend *backend, const model::OperationIndex &index,
 511                             std::multimap<int64_t, int64_t> &transfer_st_exec_time)
 512 {
 513   const bool is_linear_exec = "Linear" == util::getConfigString(util::config::EXECUTOR);
 514   const bool is_parallel_exec = "Parallel" == util::getConfigString(util::config::EXECUTOR);
 515   // Permutation will cause creating a separate subgraph that contains just this permutation node.
 516   // This isn't needed for Linear executor since it doesn't use subgraphs
 517   // Number 1 ms is picked experimentally
 518   int64_t permute_fine = 1000;
 519   // Multiply cpu operations' exec time by 2 because in parallel executor it might be busy with
 520   // permutation on other branches or non-nnfw specific tasks and have to wait for it.
 521   // Number 2 is picked experimentally
 522   const int64_t CPU_DELAY = 2;
 523   const auto &node = _graph->operations().at(index);
 524   const bool quant = isQuant(*_graph, node);
 525   const auto size = getOperationsFlattenedIOSize(*_graph, node);
 526   // if this node can be part of a subgraph, then assigning different backend will cause creating
 527   // another subgraph
 528   if (isMergable(*_graph, node))
 529   {
 530     permute_fine *= 2;
 531   }
 532   if (isWorkaroundSkip(*_graph, backend, node, quant))
 533   {
 534     return {_exec_time->getMax(), _exec_time->getMax()};
 535   }
 536   // get average exec time of the op on this backend
 537   auto exec_time = getOpTime(backend, node.name(), quant, size);
 538   if (backend->config()->id() == "cpu" && is_parallel_exec)
 539   {
 540     exec_time *= CPU_DELAY;
 541   }
 542
 543   // get max eft of direct (one level above) predecessors
 544   auto max_pred_eft = predMaxEFT(backend, node, transfer_st_exec_time);
 545
 546   int64_t total_transfer_cost = 0;
 547   std::vector<std::multimap<int64_t, int64_t>::iterator> inserted_permutations;
 548   // Find free time for data transferring and insert it into backend taskset. This is needed:
 549   //  1. Time for multiple permutations for this node's input is found correctly
 550   //  2. If backend==cpu, then free time for this node must come after permutations
 551   for (auto &it : transfer_st_exec_time)
 552   {
 553     if (is_parallel_exec)
 554     {
 555       it.second *= CPU_DELAY;
 556     }
 557     if (!is_linear_exec)
 558     {
 559       it.second += permute_fine;
 560     }
 561     total_transfer_cost += it.second;
 562
 563     const auto prev_op_ft = backendAvailableTime(_cpu_backend, it.first, it.second);
 564
 565     max_pred_eft = std::max(max_pred_eft, prev_op_ft + it.second);
 566
 567     const auto tmp = _backends_avail_time[_cpu_backend].emplace(prev_op_ft + it.second, prev_op_ft);
 568     inserted_permutations.push_back(tmp.first);
 569   }
 570   // find the hole/gap, where this op can be put or the finishing time of the last assigned op
 571   auto prev_op_ft = backendAvailableTime(backend, max_pred_eft, exec_time);
 572
 573   // Remove inserted permutation from cpu's task set
 574   for (const auto &it : inserted_permutations)
 575   {
 576     _backends_avail_time[_cpu_backend].erase(it);
 577   }
 578
 579   /* In case non-parallel executor measure just exec time and data transfer time
 580    * because EFT(prev_op_ft) is the same for all backends. Since two operations
 581    * can't be run simultaneously, finish of running operation must be waited for.
 582    * When an operation starts, all backends are free. So, they need time just for
 583    * data transfer.*/
 584   if (!is_parallel_exec)
 585   {
 586     VERBOSE(HEScheduler::ESTAndExecTime)
 587         << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
 588         << backend->config()->id() << " is " << exec_time
 589         << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
 590
 591     return {total_transfer_cost, exec_time};
 592   }
 593   VERBOSE(HEScheduler::ESTAndExecTime)
 594       << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
 595       << backend->config()->id() << ": " << exec_time
 596       << " microseconds. Backend available time: " << prev_op_ft
 597       << " Parent's max eft: " << max_pred_eft - total_transfer_cost
 598       << " data transfer cost: " << total_transfer_cost << std::endl;
 599
 600   return {prev_op_ft, exec_time};
 601 }
 602
 603 int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const model::Operation &node,
 604                                 std::multimap<int64_t, int64_t> &transfer_st_exec_time)
 605 {
 606   int64_t max_pred_eft = 0;
 607   for (const auto &input_operand_idx : node.getInputs())
 608   {
 609     const auto &input_operand = _graph->operands().at(input_operand_idx);
 610     const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT8_ASYMM;
 611
 612     for (const auto &input_node_idx : input_operand.getDef().list())
 613     {
 614       // Data transfer cost from parent's node backend to current node's backend:
 615       auto parent_backend = _backend_resolver->getBackend(input_node_idx);
 616
 617       max_pred_eft = std::max(max_pred_eft, _ops_eft.at(input_node_idx));
 618       if (parent_backend != backend)
 619       {
 620         // Multiply operand size by 2 because size must describe input+output size
 621         int64_t transfer_cost =
 622             getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
 623         transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
 624       }
 625     }
 626   }
 627   return max_pred_eft;
 628 }
 629
 630 } // namespace compiler
 631
 632 } // namespace neurun