runtime/onert/test/core/compiler/HEScheduler.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <compiler/HEScheduler.h>
  18 #include <exec/ExecTime.h>
  19
  20 #include <ir/Shape.h>
  21 #include <ir/InternalType.h>
  22 #include <ir/TypeInfo.h>
  23 #include <ir/DataType.h>
  24
  25 #include <ir/operation/BinaryArithmetic.h>
  26 #include <ir/operation/FullyConnected.h>
  27
  28 #include <gtest/gtest.h>
  29
  30 namespace
  31 {
  32 using namespace onert;
  33 using namespace ir;
  34 using namespace backend;
  35 using namespace operation;
  36 using namespace exec;
  37
  38 //
  39 // Mock backends classes
  40 //
  41
  42 struct MockConfigCPU : public IConfig
  43 {
  44   std::string id() override { return "cpu"; }
  45   bool initialize() override { return true; };
  46   bool supportPermutation() override { return false; }
  47   Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
  48   bool supportDynamicTensor() override { return false; }
  49   bool supportFP16() override { return false; }
  50 };
  51
  52 struct MockBackendCPU : public Backend
  53 {
  54   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
  55   std::unique_ptr<BackendContext>
  56   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
  57   {
  58     return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
  59   }
  60 };
  61
  62 struct MockConfigGPU : public IConfig
  63 {
  64   std::string id() override { return "gpu"; }
  65   bool initialize() override { return true; };
  66   bool supportPermutation() override { return false; }
  67   ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
  68   {
  69     return ir::Layout::UNKNOWN;
  70   }
  71   bool supportDynamicTensor() override { return false; }
  72   bool supportFP16() override { return false; }
  73 };
  74
  75 struct MockBackendGPU : public Backend
  76 {
  77   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
  78   std::unique_ptr<BackendContext>
  79   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
  80   {
  81     return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
  82   }
  83 };
  84
  85 struct MockConfigNPU : public IConfig
  86 {
  87   std::string id() override { return "npu"; }
  88   bool initialize() override { return true; };
  89   bool supportPermutation() override { return false; }
  90   ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
  91   {
  92     return ir::Layout::UNKNOWN;
  93   }
  94   bool supportDynamicTensor() override { return false; }
  95   bool supportFP16() override { return false; }
  96 };
  97
  98 struct MockBackendNPU : public Backend
  99 {
 100   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
 101   std::unique_ptr<BackendContext>
 102   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
 103   {
 104     return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
 105   }
 106 };
 107
 108 //
 109 // Constants
 110 //
 111
 112 const int OPERAND_ELEMS = 268203;
 113 const int OPERAND_SIZE = OPERAND_ELEMS * 4;
 114 const int OPERATION_SIZE = OPERAND_SIZE * 3;
 115
 116 const std::string LINEAR("Linear");
 117 const std::string DATAFLOW("Dataflow");
 118 const std::string PARALLEL("Parallel");
 119
 120 //
 121 // Helper functions
 122 //
 123
 124 // Set executor through environment variable
 125 void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
 126
 127 // Set profiling mode through environment variable
 128 void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
 129
 130 // Calculate operation size by addition sizes of all input and output operands
 131 uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
 132 {
 133   uint32_t size = 0;
 134   const auto &op = graph->operations().at(op_idx);
 135   for (const auto &ind : op.getInputs() + op.getOutputs())
 136     size += graph->operands().at(ind).info().total_size();
 137   return size;
 138 }
 139
 140 // Set execution operation time. This method is needed since ExecutionTime has only
 141 // 'updateOperationExecTime' method.
 142 void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
 143                           bool quant, uint32_t op_size, int64_t time)
 144 {
 145   // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
 146   assert(time > 0);
 147   int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
 148   int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
 149   et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
 150   assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
 151 }
 152
 153 // Set same execution time for all given backends/operations
 154 void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
 155                                 const std::vector<std::string> &op_names,
 156                                 const std::vector<uint32_t> &op_sizes, int64_t exec_time)
 157 {
 158   assert(op_names.size() == op_sizes.size());
 159   ExecTime et(backends);
 160   for (int i = 0; i < op_names.size(); ++i)
 161   {
 162     for (auto &backend : backends)
 163       setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
 164   }
 165   et.storeOperationsExecTime();
 166 }
 167
 168 // Set permute time from one backend to another. This method is needed since ExecutionTime has only
 169 // 'updatePermuteTime' method.
 170 void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
 171                         bool quant, uint32_t op_size, int64_t time)
 172 {
 173   // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
 174   assert(time > 0);
 175   int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
 176   int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
 177   et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
 178   assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
 179 }
 180
 181 // Set same permutation time between all given backends
 182 void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
 183                                   const int operand_size, const int64_t exec_time)
 184 {
 185   ExecTime et(backends);
 186   for (const auto &backend : backends)
 187   {
 188     for (auto &other_backend : backends)
 189     {
 190       if (backend == other_backend)
 191         continue;
 192       setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
 193     }
 194   }
 195   et.storeOperationsExecTime();
 196 }
 197
 198 //
 199 // Functions for creating graphs
 200 //
 201
 202 using OIS = OperandIndexSequence;
 203
 204 template <typename NodeT, typename... Types>
 205 OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
 206 {
 207   auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
 208   auto op_idx = graph->addOperation(std::move(op));
 209   // For now in scheduler test all operations in tested graphs has same size (for simplicity)
 210   assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
 211   return op_idx;
 212 }
 213
 214 // Create straight graph: Add->Sub->Mul
 215 std::shared_ptr<Graph> createStraightGraph()
 216 {
 217   auto graph = std::make_shared<Graph>();
 218   const TypeInfo float_op(DataType::FLOAT32);
 219
 220   // Create add node
 221   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 222   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 223   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 224   BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
 225   create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 226
 227   // Create sub node
 228   auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 229   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 230   BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
 231   create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
 232
 233   // Create mul node
 234   auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 235   auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 236   BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 237   create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
 238
 239   graph->finishBuilding();
 240   return graph;
 241 }
 242
 243 /* Create branched graph:
 244  *       [Add]
 245  *      //   \\
 246  *   [Mul1]  [FC2]
 247  *     ||     ||
 248  *   [Mul2]  [FC2]
 249  *      \\   //
 250  *       [Sub]
 251  */
 252 std::shared_ptr<Graph> createBranchedGraph()
 253 {
 254   auto graph = std::make_shared<Graph>();
 255   const TypeInfo float_op(DataType::FLOAT32);
 256
 257   // Create add node
 258   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 259   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 260   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 261   BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
 262   create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 263
 264   // Create mul1 node
 265   auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 266   auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 267   BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 268   create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
 269                            mul1_op_params);
 270
 271   // Create mul2 node
 272   auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 273   auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 274   BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 275   create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
 276                            mul2_op_params);
 277
 278   // Create fc1 node
 279   auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 280   auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 281   FullyConnected::Param fc1_op_params{Activation::NONE};
 282   create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
 283
 284   // Create fc2 node
 285   auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 286   auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 287   FullyConnected::Param fc2_op_params{Activation::NONE};
 288   create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
 289
 290   // Create sub node
 291   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 292   BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
 293   create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
 294
 295   graph->finishBuilding();
 296   return graph;
 297 }
 298
 299 //
 300 // Tests setup/teardown
 301 //
 302
 303 // SetUp/TearDown methods runs before/after each test and performs actions common for each test
 304 class HESchedulerTest : public ::testing::Test
 305 {
 306 protected:
 307   void SetUp() override
 308   {
 309     // Initialize mock backends
 310     _cpu_backend = new MockBackendCPU();
 311     _gpu_backend = new MockBackendGPU();
 312     _npu_backend = new MockBackendNPU();
 313     _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
 314
 315     // Remove previous profile data if it exists
 316     if (!remove("exec_time.json"))
 317     {
 318       // DO NOTHING (no profile data)
 319     }
 320
 321     // Remember original value of 'EXECUTOR' environment variable
 322     char *executor = std::getenv("EXECUTOR");
 323     _original_executor = executor == nullptr ? "" : executor;
 324
 325     // Remember original value of 'PROFILING_MODE' environment variable
 326     char *profiling_mode = std::getenv("PROFILING_MODE");
 327     _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
 328   }
 329
 330   void TearDown() override
 331   {
 332     delete _cpu_backend;
 333     delete _gpu_backend;
 334     delete _npu_backend;
 335     EXPECT_EQ(remove("exec_time.json"), 0);
 336     setenv("EXECUTOR", _original_executor.c_str(), true);
 337     setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
 338   }
 339
 340   backend::BackendContexts buildBackendContexts(const Graph &graph)
 341   {
 342     backend::BackendContexts contexts;
 343     for (auto backend : _mock_backends)
 344     {
 345       contexts.emplace(backend, backend->newContext(graph, nullptr, false));
 346     }
 347     return contexts;
 348   }
 349
 350   const MockBackendCPU *_cpu_backend{nullptr};
 351   const MockBackendGPU *_gpu_backend{nullptr};
 352   const MockBackendNPU *_npu_backend{nullptr};
 353   std::vector<const Backend *> _mock_backends;
 354
 355   std::string _original_executor;
 356   std::string _original_profiling_mode;
 357 };
 358
 359 class HESchedulerTestWithExecutorParam : public HESchedulerTest,
 360                                          public testing::WithParamInterface<std::string>
 361 {
 362 };
 363
 364 //
 365 // HEScheduler tests
 366 //
 367
 368 // Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
 369 TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
 370 {
 371   setExecutor(GetParam());
 372
 373   // Prepare graph
 374   ir::Subgraphs subgs;
 375   auto graph(createStraightGraph());
 376   subgs.push(ir::SubgraphIndex{0}, graph);
 377   OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
 378
 379   // Set default execution and transfer time
 380   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
 381   setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
 382                              {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
 383
 384   // Test 1
 385   // Expected behaviour: scheduler assigns different backend to each node
 386   {
 387     // For each backend reduce execution time of one node
 388     ExecTime et(_mock_backends);
 389     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
 390     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
 391     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
 392     et.storeOperationsExecTime();
 393
 394     // Test scheduler
 395     auto backend_contexts = buildBackendContexts(*graph);
 396     auto scheduler = compiler::HEScheduler(backend_contexts,
 397                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 398     const auto br = scheduler.schedule(*graph);
 399     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
 400     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
 401     ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
 402   }
 403
 404   // Test 2
 405   // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
 406   {
 407     // Increase transfer time
 408     setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
 409
 410     // Test scheduler
 411     auto backend_contexts = buildBackendContexts(*graph);
 412     auto scheduler = compiler::HEScheduler(backend_contexts,
 413                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 414     const auto br = scheduler.schedule(*graph);
 415     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
 416     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
 417     ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
 418   }
 419 }
 420
 421 // Test scheduler behavior for branched graph with known execution time of all nodes and permutes
 422 TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
 423 {
 424   const int64_t NPU_ET = 5000;
 425   setExecutor(GetParam());
 426
 427   // Prepare graph
 428   ir::Subgraphs subgs;
 429   auto graph(createBranchedGraph());
 430   subgs.push(ir::SubgraphIndex{0}, graph);
 431   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
 432     sub_op_idx(5);
 433
 434   // Set default execution and transfer time
 435   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
 436   setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
 437                              {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
 438
 439   // Test 1
 440   // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
 441   // nodes, in case of parallel executor scheduler assigns different backends to branches.
 442   {
 443     // Reduce execution time
 444     ExecTime et(_mock_backends);
 445     setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
 446     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
 447     setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
 448     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
 449     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
 450     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
 451     et.storeOperationsExecTime();
 452
 453     // Test scheduler
 454     auto backend_contexts = buildBackendContexts(*graph);
 455     auto scheduler = compiler::HEScheduler(backend_contexts,
 456                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 457     const auto br = scheduler.schedule(*graph);
 458
 459     std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
 460     if (GetParam() == PARALLEL)
 461     {
 462       branch1_expected_backend =
 463         br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
 464       branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
 465     }
 466
 467     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
 468     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
 469     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
 470     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
 471     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
 472     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
 473   }
 474
 475   // Test 2
 476   // Expected behaviour: scheduler assigns single backend to all nodes
 477   {
 478     // Increase execution time for GPU backend
 479     ExecTime et(_mock_backends);
 480     /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
 481      * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
 482      * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
 483      * branching or scheduler assigns another backend to a node*/
 484     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
 485     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
 486     et.storeOperationsExecTime();
 487
 488     // Test scheduler
 489     auto backend_contexts = buildBackendContexts(*graph);
 490     auto scheduler = compiler::HEScheduler(backend_contexts,
 491                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 492     const auto br = scheduler.schedule(*graph);
 493     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
 494     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
 495     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
 496     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
 497     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
 498     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
 499   }
 500 }
 501
 502 // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
 503 // one time for each executor
 504 INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
 505                         testing::Values(LINEAR, DATAFLOW, PARALLEL));
 506
 507 // Test scheduler behavior for branched graph and enabled profiling mode
 508 TEST_F(HESchedulerTest, branched_graph_profiling_mode)
 509 {
 510   const int ET = 1e5;
 511
 512   // Turn on profiling mode
 513   setProfilingMode(true);
 514   setExecutor(DATAFLOW);
 515
 516   // Prepare graph
 517   ir::Subgraphs subgs;
 518   auto graph(createBranchedGraph());
 519   subgs.push(ir::SubgraphIndex{0}, graph);
 520   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
 521     sub_op_idx(5);
 522
 523   // Test 1
 524   // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
 525   {
 526     // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
 527     ExecTime et(_mock_backends);
 528     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
 529     setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 530     setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 531     setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
 532     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 533     setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
 534     setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
 535     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 536     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
 537     et.storeOperationsExecTime();
 538
 539     // Test scheduler
 540     auto backend_contexts = buildBackendContexts(*graph);
 541     auto scheduler = compiler::HEScheduler(backend_contexts,
 542                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 543     const auto br = scheduler.schedule(*graph);
 544     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
 545     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
 546     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
 547     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
 548     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
 549   }
 550
 551   // Test 2
 552   // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
 553   // neighbor nodes
 554   {
 555     // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
 556     ExecTime et(_mock_backends);
 557     setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
 558     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 559     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 560     et.storeOperationsExecTime();
 561
 562     // Test scheduler
 563     auto backend_contexts = buildBackendContexts(*graph);
 564     auto scheduler = compiler::HEScheduler(backend_contexts,
 565                                            compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 566     const auto br = scheduler.schedule(*graph);
 567     ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
 568               br->getBackend(mul1_op_idx)->config()->id());
 569     ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
 570               br->getBackend(fc1_op_idx)->config()->id());
 571     ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
 572               br->getBackend(mul2_op_idx)->config()->id());
 573     ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
 574               br->getBackend(fc2_op_idx)->config()->id());
 575     ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
 576               br->getBackend(sub_op_idx)->config()->id());
 577     ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
 578               br->getBackend(sub_op_idx)->config()->id());
 579   }
 580 }
 581
 582 // TODO: Add tests with unknown execution and permutation time
 583
 584 } // unnamed namespace