Imported Upstream version 1.25.0
[platform/core/ml/nnfw.git] / runtime / onert / core / src / compiler / train / TrainingCompiler.cc
1 /*
2  * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "TrainingCompiler.h"
18
19 #include "StaticDerivativeShapeInferer.h"
20 #include "TrainableOperationConverter.h"
21 #include "pass/LossInsertionPass.h"
22 #include "../CompilerHelpers.h"
23 #include "../ExecutorFactory.h"
24 #include "../pass/ConstantOutputPass.h"
25 #include "../pass/OddOutputPass.h"
26 #include "../pass/PassRunner.h"
27 #include "../pass/UnusedOperandEliminationPass.h"
28 #include "../ShapeValidator.h"
29 #include "../../dumper/dot/DotDumper.h"
30 #include "../../exec/train/TrainableExecutors.h"
31 #include "../../ir/OperationDumper.h"
32 #include "../../ir/verifier/Verifier.h"
33
34 #include <compiler/StaticShapeInferer.h>
35 #include <compiler/train/LoweredTrainableGraph.h>
36 #include <ir/train/TrainableGraph.h>
37 #include <exec/train/optimizer/SGD.h>
38
39 #include <misc/polymorphic_downcast.h>
40 #include <misc/string_helpers.h>
41
42 namespace onert
43 {
44 namespace compiler
45 {
46 namespace train
47 {
48
49 TrainingCompiler::TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
50                                    std::vector<std::unique_ptr<CompilerOptions>> &copts,
51                                    const TrainingInfo &training_info)
52   : _model{nnpkg->primary_model()}, _options{copts[0].get()}, _training_info{training_info}
53 {
54   if (nnpkg->model_count() > 1)
55     throw std::runtime_error("TrainingCompiler does not support multiple models yet");
56
57   if (nnpkg->primary_model()->subgraphs_count() > 1)
58     throw std::runtime_error("TrainingCompiler does not support multiple subgraphs yet");
59 }
60
61 std::shared_ptr<CompilerArtifact> TrainingCompiler::compile(void)
62 {
63   /***************************************************
64    * Prepare compilation phase
65    ***************************************************/
66   if (!_options)
67     throw std::runtime_error{"Empty compile option"};
68
69   // Mode check
70   // TODO handle option for each model
71   if (_options->he_profiling_mode)
72   {
73     if (!_options->he_scheduler)
74       throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
75
76     if (_options->executor != "Dataflow")
77       throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
78   }
79
80   if (!_options->minmax_filepath.empty())
81   {
82     if (_options->executor != "Linear")
83       throw std::runtime_error("Recording minmax works only with Linear executor");
84   }
85
86   _options->forceInternalOptions();
87   _options->verboseOptions();
88
89   auto custom_kernel_builder = _model->getKernelBuilder();
90
91   _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) {
92     auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
93     // Mandatory passes
94     compiler::pass::PassRunner{}
95       .append(std::make_unique<compiler::pass::ConstantOutputPass>(subg))
96       .append(std::make_unique<compiler::pass::OddOutputPass>(subg))
97       .run();
98
99     // Optimizations
100     compiler::pass::PassRunner{}
101       .append(std::make_unique<compiler::pass::UnusedOperandEliminationPass>(subg))
102       .run();
103   });
104
105   std::unordered_map<ir::SubgraphIndex, std::shared_ptr<ir::train::TrainableGraph>>
106     trainable_subgraphs;
107
108   if (_model->hasOnly<ir::Graph>())
109   {
110     // Create trainable subgraphs by copy and converting inference model
111     _model->iterate([&](const ir::SubgraphIndex &subg_index, const ir::IGraph &graph) {
112       const auto &subg = nnfw::misc::polymorphic_downcast<const ir::Graph &>(graph);
113       // Create TrainableGraph by copying Graph
114       auto trainable_subg = std::make_shared<ir::train::TrainableGraph>(subg);
115
116       // Convert operations to trainable operations
117       auto converter = TrainableOperationConverter{*trainable_subg, &_training_info};
118       subg.operations().iterate(
119         [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &op) {
120           auto trainable_op = converter(op);
121           auto gen_index = trainable_subg->replaceOperation(op_index, std::move(trainable_op));
122           UNUSED_RELEASE(gen_index);
123           assert(gen_index == op_index);
124         });
125
126       trainable_subgraphs[subg_index] = std::move(trainable_subg);
127     });
128   }
129   else
130   {
131     // TODO Support models that have TrainableGraphs
132     throw std::runtime_error("TrainingCompiler: Invalid model");
133   }
134
135   // operation
136   _model.reset();
137
138   // Apply pass for trainable subgraphs
139   for (auto &&pair : trainable_subgraphs)
140   {
141     auto trainable_subg = pair.second;
142     auto subg_index = pair.first;
143
144     compiler::pass::PassRunner{}
145       .append(std::make_unique<train::pass::LossInsertionPass>(*trainable_subg, &_training_info,
146                                                                subg_index))
147       .run();
148   }
149
150   // Change input shape according to batch_size
151   for (auto &&pair : trainable_subgraphs)
152   {
153     auto trainable_subg = pair.second;
154
155     for (const auto &ind : trainable_subg->getInputs())
156     {
157       auto &input = trainable_subg->operands().at(ind);
158       auto new_shape = input.info().shape();
159       // TODO Consider batch size index
160       if (new_shape.dim(0) != 1)
161         throw std::runtime_error("the first dim is not 1. It is not supported yet.");
162       new_shape.dim(0) = _training_info.batchSize();
163       input.info().shape(new_shape);
164     }
165   }
166
167   /***************************************************
168    * Backend independent analysis & optimization phase
169    ***************************************************/
170   // TODO Handle dump level for each model
171   auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level);
172   onert::dumper::dot::DotDumper dot_dumper(dump_level);
173
174   // Tracing context
175   auto tracing_ctx = std::make_unique<util::TracingCtx>();
176
177   // Lower: Assign backend
178   std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::train::LoweredTrainableGraph>>
179     lowered_subgs;
180   {
181     for (auto &&pair : trainable_subgraphs)
182     {
183       auto &subg_index = pair.first;
184       auto trainable_subg = pair.second;
185
186       // Lower: Assign backend
187       lowered_subgs[subg_index] =
188         std::make_unique<compiler::train::LoweredTrainableGraph>(*trainable_subg, *_options);
189       // Set tracing_ctx for copied graph
190       if (tracing_ctx != nullptr)
191         tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value());
192     }
193   }
194
195   for (const auto &pair : lowered_subgs)
196   {
197     const auto &subg_index = pair.first;
198     const auto &lowered_subg = pair.second;
199     dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value()));
200   }
201
202   // Set derivatives as default tensor info
203   for (const auto &pair : lowered_subgs)
204   {
205     auto lowered_subg = pair.second.get();
206     auto &tgraph = lowered_subg->trainable_graph();
207     tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &obj) {
208       if (!obj.isConstant())
209       {
210         auto deriv = std::make_unique<ir::Operand>(obj);
211         const auto gen_index = tgraph.addDerivative(index, std::move(deriv));
212         assert(gen_index == index);
213         UNUSED_RELEASE(gen_index);
214       }
215     });
216   }
217
218   // Shape inference.
219   {
220     // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
221     // recursively
222     std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
223       createStaticShapeInferers(lowered_subgs);
224
225     const auto primary_subg_idx = ir::SubgraphIndex{0};
226     inferers.at(primary_subg_idx)->infer();
227
228     for (const auto &pair_inferer : inferers)
229     {
230       const auto inferer = pair_inferer.second.get();
231       inferer->dump();
232     }
233
234     // NOTE StaticDerivativeShapeInferer is allocated for each subgraph,
235     //      so it does not support models that have controlflow operations yet.
236     for (auto &&pair : lowered_subgs)
237     {
238       auto &lowered_subg = pair.second;
239       auto inferer = std::make_unique<StaticDerivativeShapeInferer>(lowered_subg.get());
240       inferer->infer();
241       inferer->dump();
242     }
243   }
244
245   // Shape validation
246   for (const auto &pair : lowered_subgs)
247   {
248     auto &lowered_subg = pair.second;
249     compiler::ShapeValidator{lowered_subg->graph()}();
250   }
251
252   // TODO Validate shapes of derivative tensors
253
254   // Create optimizer
255   // TODO Set properties of optimizer
256   std::shared_ptr<exec::train::optimizer::Optimizer> optimizer;
257   const auto &optim_info = _training_info.optimizerInfo();
258   if (optim_info.optim_code == exec::train::optimizer::OptimizerCode::SGD)
259     optimizer = std::make_shared<exec::train::optimizer::SGD>(optim_info.learning_rate);
260   else
261     throw std::runtime_error("Invalid optimizer type, " +
262                              exec::train::optimizer::toString(optim_info.optim_code));
263
264   /*************************************************************
265    *  Backend independent analysis & optimization phase finished
266    *************************************************************/
267   auto executors = std::make_shared<exec::train::TrainableExecutors>();
268   for (auto &&pair : lowered_subgs)
269   {
270     auto const model_index = ir::ModelIndex{0};
271     auto const subg_index = pair.first;
272     auto &lowered_subg = pair.second;
273     auto const indexed_ranks = lowered_subg->indexed_ranks();
274
275     ir::OperationDumper dumper("Executor generation of Subgraph " +
276                                std::to_string(subg_index.value()));
277     lowered_subg->graph().operations().iterate(
278       [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); });
279
280     ExecutorFactoryArgs args;
281     args.tracing_ctx = tracing_ctx.get();
282     args.options = _options;
283     args.model_index = model_index;
284     args.custom_kernel_builder = custom_kernel_builder;
285     auto executor = std::unique_ptr<exec::IExecutor>{
286       ExecutorFactory::get().create(std::move(lowered_subg), executors, args, optimizer)};
287     executor->setIndexedRanks(indexed_ranks);
288     executors->emplace(model_index, subg_index, std::move(executor));
289   }
290
291   /********************************
292    * Code generation phase finished
293    ********************************/
294   return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
295 }
296
297 } // namespace train
298 } // namespace compiler
299 } // namespace onert