2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "allocation.h"
19 #include "benchmark.h"
20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
21 #include "h5formatter.h"
24 #include "nnfw_util.h"
25 #include "nnfw_internal.h"
26 #include "randomgen.h"
28 #include "ruy/profiler/profiler.h"
37 #include <unordered_map>
40 static const char *default_backend_cand = "cpu";
42 void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
43 std::vector<nnpkg_run::TensorShape> shapes)
45 for (uint32_t i = 0; i < shapes.size(); i++)
46 shape_map[i] = shapes[i];
49 int main(const int argc, char **argv)
51 using namespace nnpkg_run;
55 Args args(argc, argv);
56 auto nnpackage_path = args.getPackageFilename();
57 if (args.printVersion())
60 NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
61 std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
62 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
67 ruy::profiler::ScopeProfile ruy_profile;
70 // TODO Apply verbose level to phases
71 const int verbose = args.getVerboseLevel();
72 benchmark::Phases phases(
73 benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
75 nnfw_session *session = nullptr;
76 NNPR_ENSURE_STATUS(nnfw_create_session(&session));
79 phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
80 NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
83 char *available_backends = std::getenv("BACKENDS");
84 if (available_backends)
85 NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
88 NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
90 // verify input and output
92 auto verifyInputTypes = [session]() {
94 NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
95 for (uint32_t i = 0; i < sz; ++i)
98 NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
100 if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
102 std::cerr << "E: not supported input type" << std::endl;
108 auto verifyOutputTypes = [session]() {
110 NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
112 for (uint32_t i = 0; i < sz; ++i)
115 NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
117 if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
119 std::cerr << "E: not supported output type" << std::endl;
125 auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
126 for (auto tensor_shape : tensor_shape_map)
128 auto ind = tensor_shape.first;
129 auto &shape = tensor_shape.second;
132 NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
134 bool set_input = false;
135 if (ti.rank != shape.size())
141 for (int i = 0; i < ti.rank; i++)
143 if (ti.dims[i] != shape.at(i))
153 ti.rank = shape.size();
154 for (int i = 0; i < ti.rank; i++)
155 ti.dims[i] = shape.at(i);
156 NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
163 // set input shape before compilation
164 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
166 auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
167 assert(!h5_file.empty());
168 auto shapes = H5Formatter(session).readTensorShapes(h5_file);
169 overwriteShapeMap(shape_map, shapes);
172 if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
173 fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
175 setTensorInfo(args.getShapeMapForPrepare());
179 // TODO When nnfw_{prepare|run} are failed, can't catch the time
180 phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
181 NNPR_ENSURE_STATUS(nnfw_prepare(session));
184 // set input shape after compilation and before execution
185 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
186 if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
187 (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
188 fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
190 setTensorInfo(args.getShapeMapForRun());
193 std::vector<Allocation> inputs(num_inputs);
194 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
195 if (!args.getLoadFilename().empty())
196 H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
198 RandomGenerator(session).generate(inputs);
200 RandomGenerator(session).generate(inputs);
204 uint32_t num_outputs = 0;
205 NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
206 std::vector<Allocation> outputs(num_outputs);
207 auto output_sizes = args.getOutputSizes();
208 for (uint32_t i = 0; i < num_outputs; i++)
211 uint64_t output_size_in_bytes = 0;
213 auto found = output_sizes.find(i);
214 if (found == output_sizes.end())
216 NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
217 output_size_in_bytes = bufsize_for(&ti);
221 output_size_in_bytes = found->second;
224 outputs[i].alloc(output_size_in_bytes);
226 nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
227 NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
230 // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
236 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
237 args.getWarmupRuns());
240 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
241 args.getNumRuns(), true);
247 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
248 [&](const benchmark::Phase &phase, uint32_t nth) {
250 << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
253 args.getWarmupRuns());
256 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
257 [&](const benchmark::Phase &phase, uint32_t nth) {
259 << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
262 args.getNumRuns(), true);
265 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
266 // dump output tensors
267 if (!args.getDumpFilename().empty())
268 H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
271 NNPR_ENSURE_STATUS(nnfw_close_session(session));
273 // TODO Apply verbose level to result
276 benchmark::Result result(phases);
279 benchmark::printResult(result);
282 if (args.getWriteReport() == false)
286 std::string exec_basename;
287 std::string nnpkg_basename;
288 std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
291 char *res = realpath(nnpackage_path.c_str(), buf);
294 nnpkg_basename = basename(buf);
298 std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
301 exec_basename = basename(argv[0]);
304 benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
308 catch (std::runtime_error &e)
310 std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;