runtime/onert/core/src/compiler/HEScheduler.test.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "HEScheduler.h"
  18 #include "../exec/ExecTime.h"
  19
  20 #include <ir/DataType.h>
  21 #include <ir/InternalType.h>
  22 #include <ir/Shape.h>
  23 #include <ir/TypeInfo.h>
  24 #include <ir/operation/BinaryArithmetic.h>
  25 #include <ir/operation/FullyConnected.h>
  26
  27 #include <gtest/gtest.h>
  28
  29 namespace
  30 {
  31 using namespace onert;
  32 using namespace ir;
  33 using namespace backend;
  34 using namespace operation;
  35 using namespace exec;
  36
  37 //
  38 // Mock backends classes
  39 //
  40
  41 struct MockConfigCPU : public IConfig
  42 {
  43   std::string id() override { return "cpu"; }
  44   bool initialize() override { return true; };
  45   bool supportPermutation() override { return false; }
  46   Layout supportLayout(const IOperation &, Layout) override { return Layout::UNKNOWN; }
  47   bool supportDynamicTensor() override { return false; }
  48   bool supportFP16() override { return false; }
  49 };
  50
  51 class MockBackendContext : public BackendContext
  52 {
  53 public:
  54   using BackendContext::BackendContext;
  55   ITensorRegistry *genTensors() override { return nullptr; }
  56   FunctionMap genKernels() override { return {}; }
  57 };
  58
  59 struct MockBackendCPU : public Backend
  60 {
  61   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
  62   std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
  63   {
  64     return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
  65   }
  66 };
  67
  68 struct MockConfigGPU : public IConfig
  69 {
  70   std::string id() override { return "gpu"; }
  71   bool initialize() override { return true; };
  72   bool supportPermutation() override { return false; }
  73   ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override
  74   {
  75     return ir::Layout::UNKNOWN;
  76   }
  77   bool supportDynamicTensor() override { return false; }
  78   bool supportFP16() override { return false; }
  79 };
  80
  81 struct MockBackendGPU : public Backend
  82 {
  83   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
  84   std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
  85   {
  86     return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
  87   }
  88 };
  89
  90 struct MockConfigNPU : public IConfig
  91 {
  92   std::string id() override { return "npu"; }
  93   bool initialize() override { return true; };
  94   bool supportPermutation() override { return false; }
  95   ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override
  96   {
  97     return ir::Layout::UNKNOWN;
  98   }
  99   bool supportDynamicTensor() override { return false; }
 100   bool supportFP16() override { return false; }
 101 };
 102
 103 struct MockBackendNPU : public Backend
 104 {
 105   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
 106   std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
 107   {
 108     return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
 109   }
 110 };
 111
 112 //
 113 // Constants
 114 //
 115
 116 const int OPERAND_ELEMS = 268203;
 117 const int OPERAND_SIZE = OPERAND_ELEMS * 4;
 118 const int OPERATION_SIZE = OPERAND_SIZE * 3;
 119
 120 const std::string LINEAR("Linear");
 121 const std::string DATAFLOW("Dataflow");
 122 const std::string PARALLEL("Parallel");
 123
 124 //
 125 // Helper functions
 126 //
 127
 128 // Set executor through environment variable
 129 void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
 130
 131 // Set profiling mode through environment variable
 132 void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
 133
 134 // Calculate operation size by addition sizes of all input and output operands
 135 uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
 136 {
 137   uint32_t size = 0;
 138   const auto &op = graph->operations().at(op_idx);
 139   for (const auto &ind : op.getInputs() + op.getOutputs())
 140     size += graph->operands().at(ind).info().total_size();
 141   return size;
 142 }
 143
 144 // Set execution operation time. This method is needed since ExecutionTime has only
 145 // 'updateOperationExecTime' method.
 146 void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
 147                           bool quant, uint32_t op_size, int64_t time)
 148 {
 149   // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
 150   assert(time > 0);
 151   int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
 152   int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
 153   et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
 154   assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
 155 }
 156
 157 // Set same execution time for all given backends/operations
 158 void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
 159                                 const std::vector<std::string> &op_names,
 160                                 const std::vector<uint32_t> &op_sizes, int64_t exec_time)
 161 {
 162   assert(op_names.size() == op_sizes.size());
 163   ExecTime et(backends);
 164   for (int i = 0; i < op_names.size(); ++i)
 165   {
 166     for (const auto backend : backends)
 167       setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
 168   }
 169   et.storeOperationsExecTime();
 170 }
 171
 172 // Set permute time from one backend to another. This method is needed since ExecutionTime has only
 173 // 'updatePermuteTime' method.
 174 void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
 175                         bool quant, uint32_t op_size, int64_t time)
 176 {
 177   // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
 178   assert(time > 0);
 179   int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
 180   int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
 181   et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
 182   assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
 183 }
 184
 185 // Set same permutation time between all given backends
 186 void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
 187                                   const int operand_size, const int64_t exec_time)
 188 {
 189   ExecTime et(backends);
 190   for (const auto &backend : backends)
 191   {
 192     for (const auto other_backend : backends)
 193     {
 194       if (backend == other_backend)
 195         continue;
 196       setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
 197     }
 198   }
 199   et.storeOperationsExecTime();
 200 }
 201
 202 //
 203 // Functions for creating graphs
 204 //
 205
 206 using OIS = OperandIndexSequence;
 207
 208 template <typename NodeT, typename... Types>
 209 OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
 210 {
 211   auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
 212   auto op_idx = graph->addOperation(std::move(op));
 213   // For now in scheduler test all operations in tested graphs has same size (for simplicity)
 214   assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
 215   return op_idx;
 216 }
 217
 218 // Create straight graph: Add->Sub->Mul
 219 std::shared_ptr<Graph> createStraightGraph()
 220 {
 221   auto graph = std::make_shared<Graph>();
 222   const TypeInfo float_op(DataType::FLOAT32);
 223
 224   // Create add node
 225   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 226   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 227   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 228   BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
 229   create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 230
 231   // Create sub node
 232   auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 233   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 234   BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
 235   create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
 236
 237   // Create mul node
 238   auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 239   auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 240   BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 241   create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
 242
 243   graph->verify();
 244   return graph;
 245 }
 246
 247 /* Create branched graph:
 248  *       [Add]
 249  *      //   \\
 250  *   [Mul1]  [FC2]
 251  *     ||     ||
 252  *   [Mul2]  [FC2]
 253  *      \\   //
 254  *       [Sub]
 255  */
 256 std::shared_ptr<Graph> createBranchedGraph()
 257 {
 258   auto graph = std::make_shared<Graph>();
 259   const TypeInfo float_op(DataType::FLOAT32);
 260
 261   // Create add node
 262   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 263   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 264   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 265   BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
 266   create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 267
 268   // Create mul1 node
 269   auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 270   auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 271   BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 272   create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
 273                            mul1_op_params);
 274
 275   // Create mul2 node
 276   auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 277   auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 278   BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 279   create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
 280                            mul2_op_params);
 281
 282   // Create fc1 node
 283   auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 284   auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 285   FullyConnected::Param fc1_op_params{Activation::NONE};
 286   create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
 287
 288   // Create fc2 node
 289   auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 290   auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 291   FullyConnected::Param fc2_op_params{Activation::NONE};
 292   create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
 293
 294   // Create sub node
 295   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 296   BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
 297   create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
 298
 299   graph->verify();
 300   return graph;
 301 }
 302
 303 //
 304 // Tests setup/teardown
 305 //
 306
 307 // SetUp/TearDown methods runs before/after each test and performs actions common for each test
 308 class HESchedulerTest : public ::testing::Test
 309 {
 310 protected:
 311   void SetUp() override
 312   {
 313     // Initialize mock backends
 314     _cpu_backend = new MockBackendCPU();
 315     _gpu_backend = new MockBackendGPU();
 316     _npu_backend = new MockBackendNPU();
 317     _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
 318
 319     // Remove previous profile data if it exists
 320     if (!remove("exec_time.json"))
 321     {
 322       // DO NOTHING (no profile data)
 323     }
 324
 325     // Remember original value of 'EXECUTOR' environment variable
 326     char *executor = std::getenv("EXECUTOR");
 327     _original_executor = executor == nullptr ? "" : executor;
 328
 329     // Remember original value of 'PROFILING_MODE' environment variable
 330     char *profiling_mode = std::getenv("PROFILING_MODE");
 331     _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
 332   }
 333
 334   void TearDown() override
 335   {
 336     delete _cpu_backend;
 337     delete _gpu_backend;
 338     delete _npu_backend;
 339     EXPECT_EQ(remove("exec_time.json"), 0);
 340     setenv("EXECUTOR", _original_executor.c_str(), true);
 341     setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
 342   }
 343
 344   const MockBackendCPU *_cpu_backend{nullptr};
 345   const MockBackendGPU *_gpu_backend{nullptr};
 346   const MockBackendNPU *_npu_backend{nullptr};
 347   std::vector<const Backend *> _mock_backends;
 348
 349   std::string _original_executor;
 350   std::string _original_profiling_mode;
 351 };
 352
 353 //
 354 // HEScheduler tests
 355 //
 356
 357 class HESchedulerTestWithExecutorParam : public HESchedulerTest,
 358                                          public testing::WithParamInterface<std::string>
 359 {
 360 };
 361
 362 // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
 363 // one time for each executor
 364 INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam,
 365                          testing::Values(LINEAR, DATAFLOW, PARALLEL));
 366
 367 // Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
 368 TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
 369 {
 370   setExecutor(GetParam());
 371
 372   // Prepare graph
 373   ir::Model model;
 374   auto graph(createStraightGraph());
 375   model.push(ir::SubgraphIndex{0}, graph);
 376   OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
 377
 378   // Set default execution and transfer time
 379   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
 380   setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
 381                              {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
 382
 383   // Test 1
 384   // Expected behaviour: scheduler assigns different backend to each node
 385   {
 386     // For each backend reduce execution time of one node
 387     ExecTime et(_mock_backends);
 388     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
 389     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
 390     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
 391     et.storeOperationsExecTime();
 392
 393     // Test scheduler
 394     auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
 395     auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
 396     const auto br = scheduler.schedule(*graph);
 397     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
 398     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
 399     ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
 400   }
 401
 402   // Test 2
 403   // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
 404   {
 405     // Increase transfer time
 406     setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
 407
 408     // Test scheduler
 409     auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
 410     auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
 411     const auto br = scheduler.schedule(*graph);
 412     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
 413     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
 414     ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
 415   }
 416 }
 417
 418 // Test scheduler behavior for branched graph with known execution time of all nodes and permutes
 419 TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
 420 {
 421   const int64_t NPU_ET = 5000;
 422   setExecutor(GetParam());
 423
 424   // Prepare graph
 425   ir::Model model;
 426   auto graph(createBranchedGraph());
 427   model.push(ir::SubgraphIndex{0}, graph);
 428   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
 429     sub_op_idx(5);
 430
 431   // Set default execution and transfer time
 432   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
 433   setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
 434                              {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
 435
 436   // Test 1
 437   // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
 438   // nodes, in case of parallel executor scheduler assigns different backends to branches.
 439   {
 440     // Reduce execution time
 441     ExecTime et(_mock_backends);
 442     setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
 443     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
 444     setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
 445     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
 446     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
 447     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
 448     et.storeOperationsExecTime();
 449
 450     // Test scheduler
 451     auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
 452     auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
 453     const auto br = scheduler.schedule(*graph);
 454
 455     std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
 456     if (GetParam() == PARALLEL)
 457     {
 458       branch1_expected_backend =
 459         br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
 460       branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
 461     }
 462
 463     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
 464     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
 465     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
 466     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
 467     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
 468     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
 469   }
 470
 471   // Test 2
 472   // Expected behaviour: scheduler assigns single backend to all nodes
 473   {
 474     // Increase execution time for GPU backend
 475     ExecTime et(_mock_backends);
 476     /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
 477      * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
 478      * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
 479      * branching or scheduler assigns another backend to a node*/
 480     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
 481     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
 482     et.storeOperationsExecTime();
 483
 484     // Test scheduler
 485     auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
 486     auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
 487     const auto br = scheduler.schedule(*graph);
 488     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
 489     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
 490     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
 491     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
 492     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
 493     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
 494   }
 495 }
 496
 497 // Test scheduler behavior for branched graph and enabled profiling mode
 498 TEST_F(HESchedulerTest, branched_graph_profiling_mode)
 499 {
 500   const int ET = 1e5;
 501
 502   // Turn on profiling mode
 503   setProfilingMode(true);
 504   setExecutor(DATAFLOW);
 505
 506   // Prepare graph
 507   ir::Model model;
 508   auto graph(createBranchedGraph());
 509   model.push(ir::SubgraphIndex{0}, graph);
 510   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
 511     sub_op_idx(5);
 512
 513   // Test 1
 514   // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
 515   {
 516     // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
 517     ExecTime et(_mock_backends);
 518     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
 519     setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 520     setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 521     setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
 522     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 523     setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
 524     setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
 525     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 526     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
 527     et.storeOperationsExecTime();
 528
 529     // Test scheduler
 530     auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
 531     auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
 532     const auto br = scheduler.schedule(*graph);
 533     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
 534     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
 535     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
 536     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
 537     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
 538   }
 539
 540   // Test 2
 541   // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
 542   // neighbor nodes
 543   {
 544     // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
 545     ExecTime et(_mock_backends);
 546     setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
 547     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 548     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 549     et.storeOperationsExecTime();
 550
 551     // Test scheduler
 552     auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
 553     auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
 554     const auto br = scheduler.schedule(*graph);
 555     ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
 556               br->getBackend(mul1_op_idx)->config()->id());
 557     ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
 558               br->getBackend(fc1_op_idx)->config()->id());
 559     ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
 560               br->getBackend(mul2_op_idx)->config()->id());
 561     ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
 562               br->getBackend(fc2_op_idx)->config()->id());
 563     ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
 564               br->getBackend(sub_op_idx)->config()->id());
 565     ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
 566               br->getBackend(sub_op_idx)->config()->id());
 567   }
 568 }
 569
 570 // TODO: Add tests with unknown execution and permutation time
 571
 572 } // unnamed namespace