2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <compiler/HEScheduler.h>
18 #include <exec/ExecTime.h>
21 #include <ir/InternalType.h>
22 #include <ir/TypeInfo.h>
23 #include <ir/DataType.h>
25 #include <ir/operation/Add.h>
26 #include <ir/operation/Sub.h>
27 #include <ir/operation/Mul.h>
28 #include <ir/operation/FullyConnected.h>
30 #include <gtest/gtest.h>
34 using namespace onert;
36 using namespace backend;
37 using namespace operation;
41 // Mock backends classes
44 struct MockConfigCPU : public IConfig
46 std::string id() override { return "cpu"; }
47 bool initialize() override { return true; };
48 bool supportPermutation() override { return false; }
49 Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
50 bool supportDynamicTensor() override { return false; }
51 bool supportFP16() override { return false; }
54 struct MockBackendCPU : public Backend
56 std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
57 std::unique_ptr<BackendContext>
58 newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
60 return std::unique_ptr<BackendContext>(
61 new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
65 struct MockConfigGPU : public IConfig
67 std::string id() override { return "gpu"; }
68 bool initialize() override { return true; };
69 bool supportPermutation() override { return false; }
70 ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
72 return ir::Layout::UNKNOWN;
74 bool supportDynamicTensor() override { return false; }
75 bool supportFP16() override { return false; }
78 struct MockBackendGPU : public Backend
80 std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
81 std::unique_ptr<BackendContext>
82 newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
84 return std::unique_ptr<BackendContext>(
85 new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
89 struct MockConfigNPU : public IConfig
91 std::string id() override { return "npu"; }
92 bool initialize() override { return true; };
93 bool supportPermutation() override { return false; }
94 ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
96 return ir::Layout::UNKNOWN;
98 bool supportDynamicTensor() override { return false; }
99 bool supportFP16() override { return false; }
102 struct MockBackendNPU : public Backend
104 std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
105 std::unique_ptr<BackendContext>
106 newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
108 return std::unique_ptr<BackendContext>(
109 new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
117 const int OPERAND_ELEMS = 268203;
118 const int OPERAND_SIZE = OPERAND_ELEMS * 4;
119 const int OPERATION_SIZE = OPERAND_SIZE * 3;
121 const std::string LINEAR("Linear");
122 const std::string DATAFLOW("Dataflow");
123 const std::string PARALLEL("Parallel");
129 // Set executor through environment variable
130 void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
132 // Set profiling mode through environment variable
133 void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
135 // Calculate operation size by addition sizes of all input and output operands
136 uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
139 const auto &op = graph->operations().at(op_idx);
140 for (const auto &ind : op.getInputs() + op.getOutputs())
141 size += graph->operands().at(ind).info().total_size();
145 // Set execution operation time. This method is needed since ExecutionTime has only
146 // 'updateOperationExecTime' method.
147 void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
148 bool quant, uint32_t op_size, int64_t time)
150 // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
152 int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
153 int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
154 et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
155 assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
158 // Set same execution time for all given backends/operations
159 void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
160 const std::vector<std::string> &op_names,
161 const std::vector<uint32_t> &op_sizes, int64_t exec_time)
163 assert(op_names.size() == op_sizes.size());
164 ExecTime et(backends);
165 for (int i = 0; i < op_names.size(); ++i)
167 for (auto &backend : backends)
168 setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
170 et.uploadOperationsExecTime();
173 // Set permute time from one backend to another. This method is needed since ExecutionTime has only
174 // 'updatePermuteTime' method.
175 void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
176 bool quant, uint32_t op_size, int64_t time)
178 // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
180 int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
181 int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
182 et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
183 assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
186 // Set same permutation time between all given backends
187 void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
188 const int operand_size, const int64_t exec_time)
190 ExecTime et(backends);
191 for (const auto &backend : backends)
193 for (auto &other_backend : backends)
195 if (backend == other_backend)
197 setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
200 et.uploadOperationsExecTime();
204 // Functions for creating graphs
207 using OIS = OperandIndexSequence;
209 template <typename NodeT, typename... Types>
210 OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
212 typename NodeT::Param op_params{Activation::NONE};
213 auto op = std::make_unique<NodeT>(std::forward<Types>(args)..., op_params);
214 auto op_idx = graph->addOperation(std::move(op));
215 // For now in scheduler test all operations in tested graphs has same size (for simplicity)
216 assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
220 // Create straight graph: Add->Sub->Mul
221 std::shared_ptr<Graph> createStraightGraph()
223 auto graph = std::make_shared<Graph>();
224 const TypeInfo float_op(DataType::FLOAT32);
227 auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
228 auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
229 auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
230 create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
233 auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
234 auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
235 create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx});
238 auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
239 auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
240 create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx});
242 graph->finishBuilding();
246 /* Create branched graph:
255 std::shared_ptr<Graph> createBranchedGraph()
257 auto graph = std::make_shared<Graph>();
258 const TypeInfo float_op(DataType::FLOAT32);
261 auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
262 auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
263 auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
264 create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
267 auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
268 auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
269 create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx});
272 auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
273 auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
274 create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx});
277 auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
278 auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
279 create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx});
282 auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
283 auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
284 create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx});
287 auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
288 create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx});
290 graph->finishBuilding();
295 // Tests setup/teardown
298 // SetUp/TearDown methods runs before/after each test and performs actions common for each test
299 class SchedulerTest : public ::testing::Test
302 void SetUp() override
304 // Initialize mock backends
305 _cpu_backend = new MockBackendCPU();
306 _gpu_backend = new MockBackendGPU();
307 _npu_backend = new MockBackendNPU();
308 _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
310 // Remove previous profile data if it exists
311 if (!remove("exec_time.json"))
313 // DO NOTHING (no profile data)
316 // Remember original value of 'EXECUTOR' environment variable
317 char *executor = std::getenv("EXECUTOR");
318 _original_executor = executor == nullptr ? "" : executor;
320 // Remember original value of 'PROFILING_MODE' environment variable
321 char *profiling_mode = std::getenv("PROFILING_MODE");
322 _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
325 void TearDown() override
330 EXPECT_EQ(remove("exec_time.json"), 0);
331 setenv("EXECUTOR", _original_executor.c_str(), true);
332 setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
335 backend::BackendContexts buildBackendContexts(const Graph &graph)
337 backend::BackendContexts contexts;
338 for (auto backend : _mock_backends)
340 contexts.emplace(backend, backend->newContext(graph, nullptr, false));
345 const MockBackendCPU *_cpu_backend{nullptr};
346 const MockBackendGPU *_gpu_backend{nullptr};
347 const MockBackendNPU *_npu_backend{nullptr};
348 std::vector<const Backend *> _mock_backends;
350 std::string _original_executor;
351 std::string _original_profiling_mode;
354 class SchedulerTestWithExecutorParam : public SchedulerTest,
355 public testing::WithParamInterface<std::string>
363 // Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
364 TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
366 setExecutor(GetParam());
370 auto graph(createStraightGraph());
371 subgs.push(ir::SubgraphIndex{0}, graph);
372 OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
374 // Set default execution and transfer time
375 setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
376 setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
377 {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
380 // Expected behaviour: scheduler assigns different backend to each node
382 // For each backend reduce execution time of one node
383 ExecTime et(_mock_backends);
384 setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
385 setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
386 setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
387 et.uploadOperationsExecTime();
390 auto backend_contexts = buildBackendContexts(*graph);
391 auto scheduler = compiler::HEScheduler(backend_contexts,
392 compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
393 const auto br = scheduler.schedule(*graph);
394 ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
395 ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
396 ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
400 // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
402 // Increase transfer time
403 setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
406 auto backend_contexts = buildBackendContexts(*graph);
407 auto scheduler = compiler::HEScheduler(backend_contexts,
408 compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
409 const auto br = scheduler.schedule(*graph);
410 ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
411 ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
412 ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
416 // Test scheduler behavior for branched graph with known execution time of all nodes and permutes
417 TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
419 const int64_t NPU_ET = 5000;
420 setExecutor(GetParam());
424 auto graph(createBranchedGraph());
425 subgs.push(ir::SubgraphIndex{0}, graph);
426 OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
429 // Set default execution and transfer time
430 setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
431 setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
432 {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
435 // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
436 // nodes, in case of parallel executor scheduler assigns different backends to branches.
438 // Reduce execution time
439 ExecTime et(_mock_backends);
440 setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
441 setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
442 setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
443 setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
444 setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
445 setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
446 et.uploadOperationsExecTime();
449 auto backend_contexts = buildBackendContexts(*graph);
450 auto scheduler = compiler::HEScheduler(backend_contexts,
451 compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
452 const auto br = scheduler.schedule(*graph);
454 std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
455 if (GetParam() == PARALLEL)
457 branch1_expected_backend =
458 br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
459 branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
462 ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
463 ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
464 ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
465 ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
466 ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
467 ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
471 // Expected behaviour: scheduler assigns single backend to all nodes
473 // Increase execution time for GPU backend
474 ExecTime et(_mock_backends);
475 /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
476 * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
477 * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
478 * branching or scheduler assigns another backend to a node*/
479 setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
480 setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
481 et.uploadOperationsExecTime();
484 auto backend_contexts = buildBackendContexts(*graph);
485 auto scheduler = compiler::HEScheduler(backend_contexts,
486 compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
487 const auto br = scheduler.schedule(*graph);
488 ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
489 ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
490 ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
491 ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
492 ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
493 ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
497 // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
498 // one time for each executor
499 INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam,
500 testing::Values(LINEAR, DATAFLOW, PARALLEL));
502 // Test scheduler behavior for branched graph and enabled profiling mode
503 TEST_F(SchedulerTest, branched_graph_profiling_mode)
507 // Turn on profiling mode
508 setProfilingMode(true);
509 setExecutor(DATAFLOW);
513 auto graph(createBranchedGraph());
514 subgs.push(ir::SubgraphIndex{0}, graph);
515 OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
519 // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
521 // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
522 ExecTime et(_mock_backends);
523 setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
524 setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
525 setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
526 setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
527 setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
528 setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
529 setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
530 setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
531 setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
532 et.uploadOperationsExecTime();
535 auto backend_contexts = buildBackendContexts(*graph);
536 auto scheduler = compiler::HEScheduler(backend_contexts,
537 compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
538 const auto br = scheduler.schedule(*graph);
539 ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
540 ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
541 ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
542 ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
543 ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
547 // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
550 // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
551 ExecTime et(_mock_backends);
552 setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
553 setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
554 setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
555 et.uploadOperationsExecTime();
558 auto backend_contexts = buildBackendContexts(*graph);
559 auto scheduler = compiler::HEScheduler(backend_contexts,
560 compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
561 const auto br = scheduler.schedule(*graph);
562 ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
563 br->getBackend(mul1_op_idx)->config()->id());
564 ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
565 br->getBackend(fc1_op_idx)->config()->id());
566 ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
567 br->getBackend(mul2_op_idx)->config()->id());
568 ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
569 br->getBackend(fc2_op_idx)->config()->id());
570 ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
571 br->getBackend(sub_op_idx)->config()->id());
572 ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
573 br->getBackend(sub_op_idx)->config()->id());
577 // TODO: Add tests with unknown execution and permutation time
579 } // unnamed namespace