runtime/onert/test/core/compiler/Scheduler.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <compiler/HEScheduler.h>
  18 #include <exec/ExecTime.h>
  19
  20 #include <ir/Shape.h>
  21 #include <ir/InternalType.h>
  22 #include <ir/TypeInfo.h>
  23 #include <ir/DataType.h>
  24
  25 #include <ir/operation/BinaryArithmetic.h>
  26 #include <ir/operation/FullyConnected.h>
  27
  28 #include <gtest/gtest.h>
  29
  30 namespace
  31 {
  32 using namespace onert;
  33 using namespace ir;
  34 using namespace backend;
  35 using namespace operation;
  36 using namespace exec;
  37
  38 //
  39 // Mock backends classes
  40 //
  41
  42 struct MockConfigCPU : public IConfig
  43 {
  44   std::string id() override { return "cpu"; }
  45   bool initialize() override { return true; };
  46   bool supportPermutation() override { return false; }
  47   Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
  48   bool supportDynamicTensor() override { return false; }
  49   bool supportFP16() override { return false; }
  50 };
  51
  52 struct MockBackendCPU : public Backend
  53 {
  54   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
  55   std::unique_ptr<BackendContext>
  56   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
  57   {
  58     return std::unique_ptr<BackendContext>(
  59         new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
  60   }
  61 };
  62
  63 struct MockConfigGPU : public IConfig
  64 {
  65   std::string id() override { return "gpu"; }
  66   bool initialize() override { return true; };
  67   bool supportPermutation() override { return false; }
  68   ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
  69   {
  70     return ir::Layout::UNKNOWN;
  71   }
  72   bool supportDynamicTensor() override { return false; }
  73   bool supportFP16() override { return false; }
  74 };
  75
  76 struct MockBackendGPU : public Backend
  77 {
  78   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
  79   std::unique_ptr<BackendContext>
  80   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
  81   {
  82     return std::unique_ptr<BackendContext>(
  83         new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
  84   }
  85 };
  86
  87 struct MockConfigNPU : public IConfig
  88 {
  89   std::string id() override { return "npu"; }
  90   bool initialize() override { return true; };
  91   bool supportPermutation() override { return false; }
  92   ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
  93   {
  94     return ir::Layout::UNKNOWN;
  95   }
  96   bool supportDynamicTensor() override { return false; }
  97   bool supportFP16() override { return false; }
  98 };
  99
 100 struct MockBackendNPU : public Backend
 101 {
 102   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
 103   std::unique_ptr<BackendContext>
 104   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
 105   {
 106     return std::unique_ptr<BackendContext>(
 107         new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
 108   }
 109 };
 110
 111 //
 112 // Constants
 113 //
 114
 115 const int OPERAND_ELEMS = 268203;
 116 const int OPERAND_SIZE = OPERAND_ELEMS * 4;
 117 const int OPERATION_SIZE = OPERAND_SIZE * 3;
 118
 119 const std::string LINEAR("Linear");
 120 const std::string DATAFLOW("Dataflow");
 121 const std::string PARALLEL("Parallel");
 122
 123 //
 124 // Helper functions
 125 //
 126
 127 // Set executor through environment variable
 128 void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
 129
 130 // Set profiling mode through environment variable
 131 void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
 132
 133 // Calculate operation size by addition sizes of all input and output operands
 134 uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
 135 {
 136   uint32_t size = 0;
 137   const auto &op = graph->operations().at(op_idx);
 138   for (const auto &ind : op.getInputs() + op.getOutputs())
 139     size += graph->operands().at(ind).info().total_size();
 140   return size;
 141 }
 142
 143 // Set execution operation time. This method is needed since ExecutionTime has only
 144 // 'updateOperationExecTime' method.
 145 void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
 146                           bool quant, uint32_t op_size, int64_t time)
 147 {
 148   // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
 149   assert(time > 0);
 150   int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
 151   int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
 152   et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
 153   assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
 154 }
 155
 156 // Set same execution time for all given backends/operations
 157 void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
 158                                 const std::vector<std::string> &op_names,
 159                                 const std::vector<uint32_t> &op_sizes, int64_t exec_time)
 160 {
 161   assert(op_names.size() == op_sizes.size());
 162   ExecTime et(backends);
 163   for (int i = 0; i < op_names.size(); ++i)
 164   {
 165     for (auto &backend : backends)
 166       setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
 167   }
 168   et.uploadOperationsExecTime();
 169 }
 170
 171 // Set permute time from one backend to another. This method is needed since ExecutionTime has only
 172 // 'updatePermuteTime' method.
 173 void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
 174                         bool quant, uint32_t op_size, int64_t time)
 175 {
 176   // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
 177   assert(time > 0);
 178   int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
 179   int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
 180   et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
 181   assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
 182 }
 183
 184 // Set same permutation time between all given backends
 185 void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
 186                                   const int operand_size, const int64_t exec_time)
 187 {
 188   ExecTime et(backends);
 189   for (const auto &backend : backends)
 190   {
 191     for (auto &other_backend : backends)
 192     {
 193       if (backend == other_backend)
 194         continue;
 195       setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
 196     }
 197   }
 198   et.uploadOperationsExecTime();
 199 }
 200
 201 //
 202 // Functions for creating graphs
 203 //
 204
 205 using OIS = OperandIndexSequence;
 206
 207 template <typename NodeT, typename... Types>
 208 OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
 209 {
 210   auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
 211   auto op_idx = graph->addOperation(std::move(op));
 212   // For now in scheduler test all operations in tested graphs has same size (for simplicity)
 213   assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
 214   return op_idx;
 215 }
 216
 217 // Create straight graph: Add->Sub->Mul
 218 std::shared_ptr<Graph> createStraightGraph()
 219 {
 220   auto graph = std::make_shared<Graph>();
 221   const TypeInfo float_op(DataType::FLOAT32);
 222
 223   // Create add node
 224   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 225   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 226   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 227   BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
 228   create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 229
 230   // Create sub node
 231   auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 232   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 233   BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
 234   create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
 235
 236   // Create mul node
 237   auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 238   auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 239   BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 240   create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
 241
 242   graph->finishBuilding();
 243   return graph;
 244 }
 245
 246 /* Create branched graph:
 247  *       [Add]
 248  *      //   \\
 249  *   [Mul1]  [FC2]
 250  *     ||     ||
 251  *   [Mul2]  [FC2]
 252  *      \\   //
 253  *       [Sub]
 254  */
 255 std::shared_ptr<Graph> createBranchedGraph()
 256 {
 257   auto graph = std::make_shared<Graph>();
 258   const TypeInfo float_op(DataType::FLOAT32);
 259
 260   // Create add node
 261   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 262   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 263   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 264   BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
 265   create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 266
 267   // Create mul1 node
 268   auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 269   auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 270   BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 271   create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
 272                            mul1_op_params);
 273
 274   // Create mul2 node
 275   auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 276   auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 277   BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 278   create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
 279                            mul2_op_params);
 280
 281   // Create fc1 node
 282   auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 283   auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 284   FullyConnected::Param fc1_op_params{Activation::NONE};
 285   create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
 286
 287   // Create fc2 node
 288   auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 289   auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 290   FullyConnected::Param fc2_op_params{Activation::NONE};
 291   create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
 292
 293   // Create sub node
 294   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 295   BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
 296   create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
 297
 298   graph->finishBuilding();
 299   return graph;
 300 }
 301
 302 //
 303 // Tests setup/teardown
 304 //
 305
 306 // SetUp/TearDown methods runs before/after each test and performs actions common for each test
 307 class SchedulerTest : public ::testing::Test
 308 {
 309 protected:
 310   void SetUp() override
 311   {
 312     // Initialize mock backends
 313     _cpu_backend = new MockBackendCPU();
 314     _gpu_backend = new MockBackendGPU();
 315     _npu_backend = new MockBackendNPU();
 316     _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
 317
 318     // Remove previous profile data if it exists
 319     if (!remove("exec_time.json"))
 320     {
 321       // DO NOTHING (no profile data)
 322     }
 323
 324     // Remember original value of 'EXECUTOR' environment variable
 325     char *executor = std::getenv("EXECUTOR");
 326     _original_executor = executor == nullptr ? "" : executor;
 327
 328     // Remember original value of 'PROFILING_MODE' environment variable
 329     char *profiling_mode = std::getenv("PROFILING_MODE");
 330     _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
 331   }
 332
 333   void TearDown() override
 334   {
 335     delete _cpu_backend;
 336     delete _gpu_backend;
 337     delete _npu_backend;
 338     EXPECT_EQ(remove("exec_time.json"), 0);
 339     setenv("EXECUTOR", _original_executor.c_str(), true);
 340     setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
 341   }
 342
 343   backend::BackendContexts buildBackendContexts(const Graph &graph)
 344   {
 345     backend::BackendContexts contexts;
 346     for (auto backend : _mock_backends)
 347     {
 348       contexts.emplace(backend, backend->newContext(graph, nullptr, false));
 349     }
 350     return contexts;
 351   }
 352
 353   const MockBackendCPU *_cpu_backend{nullptr};
 354   const MockBackendGPU *_gpu_backend{nullptr};
 355   const MockBackendNPU *_npu_backend{nullptr};
 356   std::vector<const Backend *> _mock_backends;
 357
 358   std::string _original_executor;
 359   std::string _original_profiling_mode;
 360 };
 361
 362 class SchedulerTestWithExecutorParam : public SchedulerTest,
 363                                        public testing::WithParamInterface<std::string>
 364 {
 365 };
 366
 367 //
 368 // HEScheduler tests
 369 //
 370
 371 // Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
 372 TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
 373 {
 374   setExecutor(GetParam());
 375
 376   // Prepare graph
 377   ir::Subgraphs subgs;
 378   auto graph(createStraightGraph());
 379   subgs.push(ir::SubgraphIndex{0}, graph);
 380   OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
 381
 382   // Set default execution and transfer time
 383   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
 384   setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
 385                              {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
 386
 387   // Test 1
 388   // Expected behaviour: scheduler assigns different backend to each node
 389   {
 390     // For each backend reduce execution time of one node
 391     ExecTime et(_mock_backends);
 392     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
 393     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
 394     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
 395     et.uploadOperationsExecTime();
 396
 397     // Test scheduler
 398     auto backend_contexts = buildBackendContexts(*graph);
 399     auto scheduler = compiler::HEScheduler(backend_contexts,
 400                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 401     const auto br = scheduler.schedule(*graph);
 402     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
 403     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
 404     ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
 405   }
 406
 407   // Test 2
 408   // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
 409   {
 410     // Increase transfer time
 411     setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
 412
 413     // Test scheduler
 414     auto backend_contexts = buildBackendContexts(*graph);
 415     auto scheduler = compiler::HEScheduler(backend_contexts,
 416                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 417     const auto br = scheduler.schedule(*graph);
 418     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
 419     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
 420     ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
 421   }
 422 }
 423
 424 // Test scheduler behavior for branched graph with known execution time of all nodes and permutes
 425 TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
 426 {
 427   const int64_t NPU_ET = 5000;
 428   setExecutor(GetParam());
 429
 430   // Prepare graph
 431   ir::Subgraphs subgs;
 432   auto graph(createBranchedGraph());
 433   subgs.push(ir::SubgraphIndex{0}, graph);
 434   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
 435       sub_op_idx(5);
 436
 437   // Set default execution and transfer time
 438   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
 439   setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
 440                              {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
 441
 442   // Test 1
 443   // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
 444   // nodes, in case of parallel executor scheduler assigns different backends to branches.
 445   {
 446     // Reduce execution time
 447     ExecTime et(_mock_backends);
 448     setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
 449     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
 450     setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
 451     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
 452     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
 453     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
 454     et.uploadOperationsExecTime();
 455
 456     // Test scheduler
 457     auto backend_contexts = buildBackendContexts(*graph);
 458     auto scheduler = compiler::HEScheduler(backend_contexts,
 459                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 460     const auto br = scheduler.schedule(*graph);
 461
 462     std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
 463     if (GetParam() == PARALLEL)
 464     {
 465       branch1_expected_backend =
 466           br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
 467       branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
 468     }
 469
 470     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
 471     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
 472     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
 473     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
 474     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
 475     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
 476   }
 477
 478   // Test 2
 479   // Expected behaviour: scheduler assigns single backend to all nodes
 480   {
 481     // Increase execution time for GPU backend
 482     ExecTime et(_mock_backends);
 483     /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
 484      * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
 485      * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
 486      * branching or scheduler assigns another backend to a node*/
 487     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
 488     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
 489     et.uploadOperationsExecTime();
 490
 491     // Test scheduler
 492     auto backend_contexts = buildBackendContexts(*graph);
 493     auto scheduler = compiler::HEScheduler(backend_contexts,
 494                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 495     const auto br = scheduler.schedule(*graph);
 496     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
 497     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
 498     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
 499     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
 500     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
 501     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
 502   }
 503 }
 504
 505 // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
 506 // one time for each executor
 507 INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam,
 508                         testing::Values(LINEAR, DATAFLOW, PARALLEL));
 509
 510 // Test scheduler behavior for branched graph and enabled profiling mode
 511 TEST_F(SchedulerTest, branched_graph_profiling_mode)
 512 {
 513   const int ET = 1e5;
 514
 515   // Turn on profiling mode
 516   setProfilingMode(true);
 517   setExecutor(DATAFLOW);
 518
 519   // Prepare graph
 520   ir::Subgraphs subgs;
 521   auto graph(createBranchedGraph());
 522   subgs.push(ir::SubgraphIndex{0}, graph);
 523   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
 524       sub_op_idx(5);
 525
 526   // Test 1
 527   // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
 528   {
 529     // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
 530     ExecTime et(_mock_backends);
 531     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
 532     setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 533     setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 534     setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
 535     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 536     setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
 537     setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
 538     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 539     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
 540     et.uploadOperationsExecTime();
 541
 542     // Test scheduler
 543     auto backend_contexts = buildBackendContexts(*graph);
 544     auto scheduler = compiler::HEScheduler(backend_contexts,
 545                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 546     const auto br = scheduler.schedule(*graph);
 547     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
 548     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
 549     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
 550     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
 551     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
 552   }
 553
 554   // Test 2
 555   // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
 556   // neighbor nodes
 557   {
 558     // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
 559     ExecTime et(_mock_backends);
 560     setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
 561     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 562     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 563     et.uploadOperationsExecTime();
 564
 565     // Test scheduler
 566     auto backend_contexts = buildBackendContexts(*graph);
 567     auto scheduler = compiler::HEScheduler(backend_contexts,
 568                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 569     const auto br = scheduler.schedule(*graph);
 570     ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
 571               br->getBackend(mul1_op_idx)->config()->id());
 572     ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
 573               br->getBackend(fc1_op_idx)->config()->id());
 574     ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
 575               br->getBackend(mul2_op_idx)->config()->id());
 576     ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
 577               br->getBackend(fc2_op_idx)->config()->id());
 578     ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
 579               br->getBackend(sub_op_idx)->config()->id());
 580     ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
 581               br->getBackend(sub_op_idx)->config()->id());
 582   }
 583 }
 584
 585 // TODO: Add tests with unknown execution and permutation time
 586
 587 } // unnamed namespace