runtime/onert/test/core/compiler/HEScheduler.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <compiler/HEScheduler.h>
  18 #include <exec/ExecTime.h>
  19
  20 #include <ir/Shape.h>
  21 #include <ir/InternalType.h>
  22 #include <ir/TypeInfo.h>
  23 #include <ir/DataType.h>
  24
  25 #include <ir/operation/BinaryArithmetic.h>
  26 #include <ir/operation/FullyConnected.h>
  27
  28 #include <gtest/gtest.h>
  29
  30 namespace
  31 {
  32 using namespace onert;
  33 using namespace ir;
  34 using namespace backend;
  35 using namespace operation;
  36 using namespace exec;
  37
  38 //
  39 // Mock backends classes
  40 //
  41
  42 struct MockConfigCPU : public IConfig
  43 {
  44   std::string id() override { return "cpu"; }
  45   bool initialize() override { return true; };
  46   bool supportPermutation() override { return false; }
  47   Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
  48   bool supportDynamicTensor() override { return false; }
  49   bool supportFP16() override { return false; }
  50 };
  51
  52 class MockBackendContext : public BackendContext
  53 {
  54 public:
  55   using BackendContext::BackendContext;
  56   ITensorRegistry *genTensors() override { return nullptr; }
  57   FunctionMap genKernels() override { return {}; }
  58 };
  59
  60 struct MockBackendCPU : public Backend
  61 {
  62   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
  63   std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
  64   {
  65     return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
  66   }
  67 };
  68
  69 struct MockConfigGPU : public IConfig
  70 {
  71   std::string id() override { return "gpu"; }
  72   bool initialize() override { return true; };
  73   bool supportPermutation() override { return false; }
  74   ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
  75   {
  76     return ir::Layout::UNKNOWN;
  77   }
  78   bool supportDynamicTensor() override { return false; }
  79   bool supportFP16() override { return false; }
  80 };
  81
  82 struct MockBackendGPU : public Backend
  83 {
  84   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
  85   std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
  86   {
  87     return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
  88   }
  89 };
  90
  91 struct MockConfigNPU : public IConfig
  92 {
  93   std::string id() override { return "npu"; }
  94   bool initialize() override { return true; };
  95   bool supportPermutation() override { return false; }
  96   ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
  97   {
  98     return ir::Layout::UNKNOWN;
  99   }
 100   bool supportDynamicTensor() override { return false; }
 101   bool supportFP16() override { return false; }
 102 };
 103
 104 struct MockBackendNPU : public Backend
 105 {
 106   std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
 107   std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
 108   {
 109     return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
 110   }
 111 };
 112
 113 //
 114 // Constants
 115 //
 116
 117 const int OPERAND_ELEMS = 268203;
 118 const int OPERAND_SIZE = OPERAND_ELEMS * 4;
 119 const int OPERATION_SIZE = OPERAND_SIZE * 3;
 120
 121 const std::string LINEAR("Linear");
 122 const std::string DATAFLOW("Dataflow");
 123 const std::string PARALLEL("Parallel");
 124
 125 //
 126 // Helper functions
 127 //
 128
 129 // Set executor through environment variable
 130 void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
 131
 132 // Set profiling mode through environment variable
 133 void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
 134
 135 // Calculate operation size by addition sizes of all input and output operands
 136 uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
 137 {
 138   uint32_t size = 0;
 139   const auto &op = graph->operations().at(op_idx);
 140   for (const auto &ind : op.getInputs() + op.getOutputs())
 141     size += graph->operands().at(ind).info().total_size();
 142   return size;
 143 }
 144
 145 // Set execution operation time. This method is needed since ExecutionTime has only
 146 // 'updateOperationExecTime' method.
 147 void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
 148                           bool quant, uint32_t op_size, int64_t time)
 149 {
 150   // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
 151   assert(time > 0);
 152   int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
 153   int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
 154   et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
 155   assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
 156 }
 157
 158 // Set same execution time for all given backends/operations
 159 void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
 160                                 const std::vector<std::string> &op_names,
 161                                 const std::vector<uint32_t> &op_sizes, int64_t exec_time)
 162 {
 163   assert(op_names.size() == op_sizes.size());
 164   ExecTime et(backends);
 165   for (int i = 0; i < op_names.size(); ++i)
 166   {
 167     for (auto &backend : backends)
 168       setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
 169   }
 170   et.storeOperationsExecTime();
 171 }
 172
 173 // Set permute time from one backend to another. This method is needed since ExecutionTime has only
 174 // 'updatePermuteTime' method.
 175 void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
 176                         bool quant, uint32_t op_size, int64_t time)
 177 {
 178   // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
 179   assert(time > 0);
 180   int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
 181   int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
 182   et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
 183   assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
 184 }
 185
 186 // Set same permutation time between all given backends
 187 void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
 188                                   const int operand_size, const int64_t exec_time)
 189 {
 190   ExecTime et(backends);
 191   for (const auto &backend : backends)
 192   {
 193     for (auto &other_backend : backends)
 194     {
 195       if (backend == other_backend)
 196         continue;
 197       setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
 198     }
 199   }
 200   et.storeOperationsExecTime();
 201 }
 202
 203 //
 204 // Functions for creating graphs
 205 //
 206
 207 using OIS = OperandIndexSequence;
 208
 209 template <typename NodeT, typename... Types>
 210 OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
 211 {
 212   auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
 213   auto op_idx = graph->addOperation(std::move(op));
 214   // For now in scheduler test all operations in tested graphs has same size (for simplicity)
 215   assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
 216   return op_idx;
 217 }
 218
 219 // Create straight graph: Add->Sub->Mul
 220 std::shared_ptr<Graph> createStraightGraph()
 221 {
 222   auto graph = std::make_shared<Graph>();
 223   const TypeInfo float_op(DataType::FLOAT32);
 224
 225   // Create add node
 226   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 227   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 228   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 229   BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
 230   create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 231
 232   // Create sub node
 233   auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 234   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 235   BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
 236   create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
 237
 238   // Create mul node
 239   auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 240   auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 241   BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 242   create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
 243
 244   graph->verify();
 245   return graph;
 246 }
 247
 248 /* Create branched graph:
 249  *       [Add]
 250  *      //   \\
 251  *   [Mul1]  [FC2]
 252  *     ||     ||
 253  *   [Mul2]  [FC2]
 254  *      \\   //
 255  *       [Sub]
 256  */
 257 std::shared_ptr<Graph> createBranchedGraph()
 258 {
 259   auto graph = std::make_shared<Graph>();
 260   const TypeInfo float_op(DataType::FLOAT32);
 261
 262   // Create add node
 263   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 264   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 265   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 266   BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
 267   create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 268
 269   // Create mul1 node
 270   auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 271   auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 272   BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 273   create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
 274                            mul1_op_params);
 275
 276   // Create mul2 node
 277   auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 278   auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 279   BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
 280   create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
 281                            mul2_op_params);
 282
 283   // Create fc1 node
 284   auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 285   auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 286   FullyConnected::Param fc1_op_params{Activation::NONE};
 287   create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
 288
 289   // Create fc2 node
 290   auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 291   auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 292   FullyConnected::Param fc2_op_params{Activation::NONE};
 293   create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
 294
 295   // Create sub node
 296   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
 297   BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
 298   create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
 299
 300   graph->verify();
 301   return graph;
 302 }
 303
 304 //
 305 // Tests setup/teardown
 306 //
 307
 308 // SetUp/TearDown methods runs before/after each test and performs actions common for each test
 309 class HESchedulerTest : public ::testing::Test
 310 {
 311 protected:
 312   void SetUp() override
 313   {
 314     // Initialize mock backends
 315     _cpu_backend = new MockBackendCPU();
 316     _gpu_backend = new MockBackendGPU();
 317     _npu_backend = new MockBackendNPU();
 318     _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
 319
 320     // Remove previous profile data if it exists
 321     if (!remove("exec_time.json"))
 322     {
 323       // DO NOTHING (no profile data)
 324     }
 325
 326     // Remember original value of 'EXECUTOR' environment variable
 327     char *executor = std::getenv("EXECUTOR");
 328     _original_executor = executor == nullptr ? "" : executor;
 329
 330     // Remember original value of 'PROFILING_MODE' environment variable
 331     char *profiling_mode = std::getenv("PROFILING_MODE");
 332     _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
 333   }
 334
 335   void TearDown() override
 336   {
 337     delete _cpu_backend;
 338     delete _gpu_backend;
 339     delete _npu_backend;
 340     EXPECT_EQ(remove("exec_time.json"), 0);
 341     setenv("EXECUTOR", _original_executor.c_str(), true);
 342     setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
 343   }
 344
 345   const MockBackendCPU *_cpu_backend{nullptr};
 346   const MockBackendGPU *_gpu_backend{nullptr};
 347   const MockBackendNPU *_npu_backend{nullptr};
 348   std::vector<const Backend *> _mock_backends;
 349
 350   std::string _original_executor;
 351   std::string _original_profiling_mode;
 352 };
 353
 354 //
 355 // HEScheduler tests
 356 //
 357
 358 class HESchedulerTestWithExecutorParam : public HESchedulerTest,
 359                                          public testing::WithParamInterface<std::string>
 360 {
 361 };
 362
 363 // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
 364 // one time for each executor
 365 INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
 366                         testing::Values(LINEAR, DATAFLOW, PARALLEL));
 367
 368 // Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
 369 TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
 370 {
 371   setExecutor(GetParam());
 372
 373   // Prepare graph
 374   ir::Subgraphs subgs;
 375   auto graph(createStraightGraph());
 376   subgs.push(ir::SubgraphIndex{0}, graph);
 377   OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
 378
 379   // Set default execution and transfer time
 380   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
 381   setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
 382                              {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
 383
 384   // Test 1
 385   // Expected behaviour: scheduler assigns different backend to each node
 386   {
 387     // For each backend reduce execution time of one node
 388     ExecTime et(_mock_backends);
 389     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
 390     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
 391     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
 392     et.storeOperationsExecTime();
 393
 394     // Test scheduler
 395     auto scheduler =
 396       compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 397     const auto br = scheduler.schedule(*graph);
 398     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
 399     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
 400     ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
 401   }
 402
 403   // Test 2
 404   // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
 405   {
 406     // Increase transfer time
 407     setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
 408
 409     // Test scheduler
 410     auto scheduler =
 411       compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 412     const auto br = scheduler.schedule(*graph);
 413     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
 414     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
 415     ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
 416   }
 417 }
 418
 419 // Test scheduler behavior for branched graph with known execution time of all nodes and permutes
 420 TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
 421 {
 422   const int64_t NPU_ET = 5000;
 423   setExecutor(GetParam());
 424
 425   // Prepare graph
 426   ir::Subgraphs subgs;
 427   auto graph(createBranchedGraph());
 428   subgs.push(ir::SubgraphIndex{0}, graph);
 429   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
 430     sub_op_idx(5);
 431
 432   // Set default execution and transfer time
 433   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
 434   setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
 435                              {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
 436
 437   // Test 1
 438   // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
 439   // nodes, in case of parallel executor scheduler assigns different backends to branches.
 440   {
 441     // Reduce execution time
 442     ExecTime et(_mock_backends);
 443     setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
 444     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
 445     setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
 446     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
 447     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
 448     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
 449     et.storeOperationsExecTime();
 450
 451     // Test scheduler
 452     auto scheduler =
 453       compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 454     const auto br = scheduler.schedule(*graph);
 455
 456     std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
 457     if (GetParam() == PARALLEL)
 458     {
 459       branch1_expected_backend =
 460         br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
 461       branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
 462     }
 463
 464     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
 465     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
 466     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
 467     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
 468     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
 469     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
 470   }
 471
 472   // Test 2
 473   // Expected behaviour: scheduler assigns single backend to all nodes
 474   {
 475     // Increase execution time for GPU backend
 476     ExecTime et(_mock_backends);
 477     /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
 478      * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
 479      * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
 480      * branching or scheduler assigns another backend to a node*/
 481     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
 482     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
 483     et.storeOperationsExecTime();
 484
 485     // Test scheduler
 486     auto scheduler =
 487       compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 488     const auto br = scheduler.schedule(*graph);
 489     ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
 490     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
 491     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
 492     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
 493     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
 494     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
 495   }
 496 }
 497
 498 // Test scheduler behavior for branched graph and enabled profiling mode
 499 TEST_F(HESchedulerTest, branched_graph_profiling_mode)
 500 {
 501   const int ET = 1e5;
 502
 503   // Turn on profiling mode
 504   setProfilingMode(true);
 505   setExecutor(DATAFLOW);
 506
 507   // Prepare graph
 508   ir::Subgraphs subgs;
 509   auto graph(createBranchedGraph());
 510   subgs.push(ir::SubgraphIndex{0}, graph);
 511   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
 512     sub_op_idx(5);
 513
 514   // Test 1
 515   // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
 516   {
 517     // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
 518     ExecTime et(_mock_backends);
 519     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
 520     setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 521     setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 522     setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
 523     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 524     setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
 525     setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
 526     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 527     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
 528     et.storeOperationsExecTime();
 529
 530     // Test scheduler
 531     auto scheduler =
 532       compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 533     const auto br = scheduler.schedule(*graph);
 534     ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
 535     ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
 536     ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
 537     ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
 538     ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
 539   }
 540
 541   // Test 2
 542   // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
 543   // neighbor nodes
 544   {
 545     // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
 546     ExecTime et(_mock_backends);
 547     setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
 548     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
 549     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
 550     et.storeOperationsExecTime();
 551
 552     // Test scheduler
 553     auto scheduler =
 554       compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
 555     const auto br = scheduler.schedule(*graph);
 556     ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
 557               br->getBackend(mul1_op_idx)->config()->id());
 558     ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
 559               br->getBackend(fc1_op_idx)->config()->id());
 560     ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
 561               br->getBackend(mul2_op_idx)->config()->id());
 562     ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
 563               br->getBackend(fc2_op_idx)->config()->id());
 564     ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
 565               br->getBackend(sub_op_idx)->config()->id());
 566     ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
 567               br->getBackend(sub_op_idx)->config()->id());
 568   }
 569 }
 570
 571 // TODO: Add tests with unknown execution and permutation time
 572
 573 } // unnamed namespace