Imported Upstream version 1.9.0
[platform/core/ml/nnfw.git] / tests / tools / nnpackage_run / src / nnpackage_run.cc
1 /*
2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *    http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "allocation.h"
18 #include "args.h"
19 #include "benchmark.h"
20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
21 #include "h5formatter.h"
22 #endif
23 #include "nnfw.h"
24 #include "nnfw_util.h"
25 #include "nnfw_internal.h"
26 #include "randomgen.h"
27 #ifdef RUY_PROFILER
28 #include "ruy/profiler/profiler.h"
29 #endif
30
31 #include <cassert>
32 #include <chrono>
33 #include <cstdlib>
34 #include <iostream>
35 #include <libgen.h>
36 #include <stdexcept>
37 #include <unordered_map>
38 #include <vector>
39
40 static const char *default_backend_cand = "cpu";
41
42 void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
43                        std::vector<nnpkg_run::TensorShape> shapes)
44 {
45   for (uint32_t i = 0; i < shapes.size(); i++)
46     shape_map[i] = shapes[i];
47 }
48
49 int main(const int argc, char **argv)
50 {
51   using namespace nnpkg_run;
52
53   try
54   {
55     Args args(argc, argv);
56     auto nnpackage_path = args.getPackageFilename();
57     if (args.printVersion())
58     {
59       uint32_t version;
60       NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
61       std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
62                 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
63       exit(0);
64     }
65
66 #ifdef RUY_PROFILER
67     ruy::profiler::ScopeProfile ruy_profile;
68 #endif
69
70     // TODO Apply verbose level to phases
71     const int verbose = args.getVerboseLevel();
72     benchmark::Phases phases(
73         benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
74
75     nnfw_session *session = nullptr;
76     NNPR_ENSURE_STATUS(nnfw_create_session(&session));
77
78     // ModelLoad
79     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
80       NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
81     });
82
83     char *available_backends = std::getenv("BACKENDS");
84     if (available_backends)
85       NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
86
87     uint32_t num_inputs;
88     NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
89
90     // verify input and output
91
92     auto verifyInputTypes = [session]() {
93       uint32_t sz;
94       NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
95       for (uint32_t i = 0; i < sz; ++i)
96       {
97         nnfw_tensorinfo ti;
98         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
99
100         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
101         {
102           std::cerr << "E: not supported input type" << std::endl;
103           exit(-1);
104         }
105       }
106     };
107
108     auto verifyOutputTypes = [session]() {
109       uint32_t sz;
110       NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
111
112       for (uint32_t i = 0; i < sz; ++i)
113       {
114         nnfw_tensorinfo ti;
115         NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
116
117         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
118         {
119           std::cerr << "E: not supported output type" << std::endl;
120           exit(-1);
121         }
122       }
123     };
124
125     auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
126       for (auto tensor_shape : tensor_shape_map)
127       {
128         auto ind = tensor_shape.first;
129         auto &shape = tensor_shape.second;
130         nnfw_tensorinfo ti;
131         // to fill dtype
132         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
133
134         ti.rank = shape.size();
135         for (int i = 0; i < ti.rank; i++)
136           ti.dims[i] = shape.at(i);
137         NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
138       }
139     };
140
141     verifyInputTypes();
142     verifyOutputTypes();
143
144 // set input shape before compilation
145 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
146     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
147     {
148       auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
149       overwriteShapeMap(args.getShapeMapForPrepare(), shapes);
150     }
151 #endif
152     setTensorInfo(args.getShapeMapForPrepare());
153
154     // prepare execution
155
156     // TODO When nnfw_{prepare|run} are failed, can't catch the time
157     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
158       NNPR_ENSURE_STATUS(nnfw_prepare(session));
159     });
160
161 // set input shape after compilation and before execution
162 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
163     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN)
164     {
165       auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
166       overwriteShapeMap(args.getShapeMapForRun(), shapes);
167     }
168 #endif
169     setTensorInfo(args.getShapeMapForRun());
170
171     // prepare input
172     std::vector<Allocation> inputs(num_inputs);
173 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
174     if (!args.getLoadFilename().empty())
175       H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
176     else
177       RandomGenerator(session).generate(inputs);
178 #else
179     RandomGenerator(session).generate(inputs);
180 #endif
181
182     // prepare output
183     uint32_t num_outputs = 0;
184     NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
185     std::vector<Allocation> outputs(num_outputs);
186     auto output_sizes = args.getOutputSizes();
187     for (uint32_t i = 0; i < num_outputs; i++)
188     {
189       nnfw_tensorinfo ti;
190       uint64_t output_size_in_bytes = 0;
191       {
192         auto found = output_sizes.find(i);
193         if (found == output_sizes.end())
194         {
195           NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
196           output_size_in_bytes = bufsize_for(&ti);
197         }
198         else
199         {
200           output_size_in_bytes = found->second;
201         }
202       }
203       outputs[i].alloc(output_size_in_bytes);
204       NNPR_ENSURE_STATUS(
205           nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
206       NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
207     }
208
209     // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
210     // only warmup.
211     if (verbose == 0)
212     {
213       phases.run("WARMUP",
214                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
215                  args.getWarmupRuns());
216       phases.run("EXECUTE",
217                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
218                  args.getNumRuns(), true);
219     }
220     else
221     {
222       phases.run("WARMUP",
223                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
224                  [&](const benchmark::Phase &phase, uint32_t nth) {
225                    std::cout << "... "
226                              << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
227                              << std::endl;
228                  },
229                  args.getWarmupRuns());
230       phases.run("EXECUTE",
231                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
232                  [&](const benchmark::Phase &phase, uint32_t nth) {
233                    std::cout << "... "
234                              << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
235                              << std::endl;
236                  },
237                  args.getNumRuns(), true);
238     }
239
240 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
241     // dump output tensors
242     if (!args.getDumpFilename().empty())
243       H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
244 #endif
245
246     NNPR_ENSURE_STATUS(nnfw_close_session(session));
247
248     // TODO Apply verbose level to result
249
250     // prepare result
251     benchmark::Result result(phases);
252
253     // to stdout
254     benchmark::printResult(result);
255
256     // to csv
257     if (args.getWriteReport() == false)
258       return 0;
259
260     // prepare csv task
261     std::string exec_basename;
262     std::string nnpkg_basename;
263     std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
264     {
265       char buf[PATH_MAX];
266       char *res = realpath(nnpackage_path.c_str(), buf);
267       if (res)
268       {
269         nnpkg_basename = basename(buf);
270       }
271       else
272       {
273         std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
274         exit(-1);
275       }
276       exec_basename = basename(argv[0]);
277     }
278
279     benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
280
281     return 0;
282   }
283   catch (std::runtime_error &e)
284   {
285     std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
286     exit(-1);
287   }
288 }