Imported Upstream version 1.25.0
[platform/core/ml/nnfw.git] / tests / tools / onert_run / src / onert_run.cc
1 /*
2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *    http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "allocation.h"
18 #include "args.h"
19 #include "benchmark.h"
20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
21 #include "h5formatter.h"
22 #endif
23 #include "nnfw.h"
24 #include "nnfw_util.h"
25 #include "nnfw_internal.h"
26 #include "nnfw_experimental.h"
27 #include "randomgen.h"
28 #include "rawformatter.h"
29 #ifdef RUY_PROFILER
30 #include "ruy/profiler/profiler.h"
31 #endif
32
33 #include <boost/program_options.hpp>
34 #include <cassert>
35 #include <chrono>
36 #include <cstdlib>
37 #include <iostream>
38 #include <libgen.h>
39 #include <stdexcept>
40 #include <unordered_map>
41 #include <vector>
42
43 static const char *default_backend_cand = "cpu";
44
45 void overwriteShapeMap(onert_run::TensorShapeMap &shape_map,
46                        std::vector<onert_run::TensorShape> shapes)
47 {
48   for (uint32_t i = 0; i < shapes.size(); i++)
49     shape_map[i] = shapes[i];
50 }
51
52 std::string genQuantizedModelPathFromModelPath(const std::string &model_path, bool is_q16)
53 {
54   auto const extension_pos = model_path.find(".circle");
55   if (extension_pos == std::string::npos)
56   {
57     std::cerr << "Input model isn't .circle." << std::endl;
58     exit(-1);
59   }
60   auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
61   return model_path.substr(0, extension_pos) + qstring + ".circle";
62 }
63
64 std::string genQuantizedModelPathFromPackagePath(const std::string &package_path, bool is_q16)
65 {
66   auto package_path_without_slash = package_path;
67   if (package_path_without_slash.back() == '/')
68     package_path_without_slash.pop_back();
69   auto package_name_pos = package_path_without_slash.find_last_of('/');
70   if (package_name_pos == std::string::npos)
71     package_name_pos = 0;
72   else
73     package_name_pos++;
74   auto package_name = package_path_without_slash.substr(package_name_pos);
75   auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
76   return package_path_without_slash + "/" + package_name + qstring + ".circle";
77 }
78
79 int main(const int argc, char **argv)
80 {
81   using namespace onert_run;
82
83   try
84   {
85     Args args(argc, argv);
86     if (args.printVersion())
87     {
88       uint32_t version;
89       NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
90       std::cout << "onert_run (nnfw runtime: v" << (version >> 24) << "."
91                 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
92       exit(0);
93     }
94
95 #ifdef RUY_PROFILER
96     ruy::profiler::ScopeProfile ruy_profile;
97 #endif
98
99     // TODO Apply verbose level to phases
100     const int verbose = args.getVerboseLevel();
101     benchmark::Phases phases(
102       benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
103
104     nnfw_session *session = nullptr;
105     NNPR_ENSURE_STATUS(nnfw_create_session(&session));
106
107     // ModelLoad
108     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
109       if (args.useSingleModel())
110         NNPR_ENSURE_STATUS(
111           nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
112       else
113         NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
114     });
115
116     // Quantize model
117     auto quantize = args.getQuantize();
118     if (!quantize.empty())
119     {
120       NNFW_QUANTIZE_TYPE quantize_type = NNFW_QUANTIZE_TYPE_NOT_SET;
121       if (quantize == "int8")
122         quantize_type = NNFW_QUANTIZE_TYPE_U8_ASYM;
123       if (quantize == "int16")
124         quantize_type = NNFW_QUANTIZE_TYPE_I16_SYM;
125       NNPR_ENSURE_STATUS(nnfw_set_quantization_type(session, quantize_type));
126
127       if (args.getQuantizedModelPath() != "")
128         NNPR_ENSURE_STATUS(
129           nnfw_set_quantized_model_path(session, args.getQuantizedModelPath().c_str()));
130       else
131       {
132         if (args.useSingleModel())
133           NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
134             session,
135             genQuantizedModelPathFromModelPath(args.getModelFilename(), quantize == "int16")
136               .c_str()));
137         else
138           NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
139             session,
140             genQuantizedModelPathFromPackagePath(args.getPackageFilename(), quantize == "int16")
141               .c_str()));
142       }
143
144       NNPR_ENSURE_STATUS(nnfw_quantize(session));
145     }
146
147     char *available_backends = std::getenv("BACKENDS");
148     if (available_backends)
149       NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
150
151     uint32_t num_inputs;
152     NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
153
154     // verify input and output
155
156     auto verifyInputTypes = [session]() {
157       uint32_t sz;
158       NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
159       for (uint32_t i = 0; i < sz; ++i)
160       {
161         nnfw_tensorinfo ti;
162         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
163
164         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
165         {
166           std::cerr << "E: not supported input type" << std::endl;
167           exit(-1);
168         }
169       }
170     };
171
172     auto verifyOutputTypes = [session]() {
173       uint32_t sz;
174       NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
175
176       for (uint32_t i = 0; i < sz; ++i)
177       {
178         nnfw_tensorinfo ti;
179         NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
180
181         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
182         {
183           std::cerr << "E: not supported output type" << std::endl;
184           exit(-1);
185         }
186       }
187     };
188
189     auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
190       for (auto tensor_shape : tensor_shape_map)
191       {
192         auto ind = tensor_shape.first;
193         auto &shape = tensor_shape.second;
194         nnfw_tensorinfo ti;
195         // to fill dtype
196         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
197
198         bool set_input = false;
199         if (ti.rank != shape.size())
200         {
201           set_input = true;
202         }
203         else
204         {
205           for (int i = 0; i < ti.rank; i++)
206           {
207             if (ti.dims[i] != shape.at(i))
208             {
209               set_input = true;
210               break;
211             }
212           }
213         }
214         if (!set_input)
215           continue;
216
217         ti.rank = shape.size();
218         for (int i = 0; i < ti.rank; i++)
219           ti.dims[i] = shape.at(i);
220         NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
221       }
222     };
223
224     verifyInputTypes();
225     verifyOutputTypes();
226
227 // set input shape before compilation
228 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
229
230     auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
231       assert(!h5_file.empty());
232       auto shapes = H5Formatter(session).readTensorShapes(h5_file);
233       overwriteShapeMap(shape_map, shapes);
234     };
235
236     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
237       fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
238 #endif
239     setTensorInfo(args.getShapeMapForPrepare());
240
241     // prepare execution
242
243     // TODO When nnfw_{prepare|run} are failed, can't catch the time
244     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
245       NNPR_ENSURE_STATUS(nnfw_prepare(session));
246     });
247
248 // set input shape after compilation and before execution
249 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
250     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
251         (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
252       fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
253 #endif
254     setTensorInfo(args.getShapeMapForRun());
255
256     // prepare input
257     std::vector<Allocation> inputs(num_inputs);
258 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
259     if (!args.getLoadFilename().empty())
260       H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
261     else if (!args.getLoadRawFilename().empty())
262       RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
263     else
264       RandomGenerator(session).generate(inputs);
265 #else
266     if (!args.getLoadRawFilename().empty())
267       RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
268     else
269       RandomGenerator(session).generate(inputs);
270 #endif
271
272     // prepare output
273     uint32_t num_outputs = 0;
274     NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
275     std::vector<Allocation> outputs(num_outputs);
276     auto output_sizes = args.getOutputSizes();
277     for (uint32_t i = 0; i < num_outputs; i++)
278     {
279       nnfw_tensorinfo ti;
280       uint64_t output_size_in_bytes = 0;
281       {
282         auto found = output_sizes.find(i);
283         if (found == output_sizes.end())
284         {
285           NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
286           output_size_in_bytes = bufsize_for(&ti);
287         }
288         else
289         {
290           output_size_in_bytes = found->second;
291         }
292       }
293       outputs[i].alloc(output_size_in_bytes);
294       NNPR_ENSURE_STATUS(
295         nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
296       NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
297     }
298
299     // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
300     // only warmup.
301     if (verbose == 0)
302     {
303       phases.run(
304         "WARMUP",
305         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
306         args.getWarmupRuns());
307       phases.run(
308         "EXECUTE",
309         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
310         args.getNumRuns(), true);
311     }
312     else
313     {
314       phases.run(
315         "WARMUP",
316         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
317         [&](const benchmark::Phase &phase, uint32_t nth) {
318           std::cout << "... "
319                     << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
320                     << std::endl;
321         },
322         args.getWarmupRuns());
323       phases.run(
324         "EXECUTE",
325         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
326         [&](const benchmark::Phase &phase, uint32_t nth) {
327           std::cout << "... "
328                     << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
329                     << std::endl;
330         },
331         args.getNumRuns(), true);
332     }
333
334 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
335     // dump output tensors
336     if (!args.getDumpFilename().empty())
337       H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
338 #endif
339     if (!args.getDumpRawFilename().empty())
340       RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
341
342     NNPR_ENSURE_STATUS(nnfw_close_session(session));
343
344     // TODO Apply verbose level to result
345
346     // prepare result
347     benchmark::Result result(phases);
348
349     // to stdout
350     benchmark::printResult(result);
351
352     // to csv
353     if (args.getWriteReport() == false)
354       return 0;
355
356     // prepare csv task
357     std::string exec_basename;
358     std::string nnpkg_basename;
359     std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
360     {
361       char buf[PATH_MAX];
362       char *res = args.useSingleModel() ? realpath(args.getModelFilename().c_str(), buf)
363                                         : realpath(args.getPackageFilename().c_str(), buf);
364       if (res)
365       {
366         nnpkg_basename = basename(buf);
367       }
368       else
369       {
370         std::cerr << "E: during getting realpath from nnpackage or model path." << std::endl;
371         exit(-1);
372       }
373       exec_basename = basename(argv[0]);
374     }
375
376     benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
377
378     return 0;
379   }
380   catch (boost::program_options::error &e)
381   {
382     std::cerr << "E: " << e.what() << std::endl;
383     exit(-1);
384   }
385   catch (std::runtime_error &e)
386   {
387     std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
388     exit(-1);
389   }
390 }