2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "allocation.h"
19 #include "benchmark.h"
20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
21 #include "h5formatter.h"
24 #include "nnfw_util.h"
25 #include "nnfw_internal.h"
26 #include "nnfw_experimental.h"
27 #include "randomgen.h"
28 #include "rawformatter.h"
30 #include "ruy/profiler/profiler.h"
33 #include <boost/program_options.hpp>
40 #include <unordered_map>
43 static const char *default_backend_cand = "cpu";
45 void overwriteShapeMap(onert_run::TensorShapeMap &shape_map,
46 std::vector<onert_run::TensorShape> shapes)
48 for (uint32_t i = 0; i < shapes.size(); i++)
49 shape_map[i] = shapes[i];
52 std::string genQuantizedModelPathFromModelPath(const std::string &model_path, bool is_q16)
54 auto const extension_pos = model_path.find(".circle");
55 if (extension_pos == std::string::npos)
57 std::cerr << "Input model isn't .circle." << std::endl;
60 auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
61 return model_path.substr(0, extension_pos) + qstring + ".circle";
64 std::string genQuantizedModelPathFromPackagePath(const std::string &package_path, bool is_q16)
66 auto package_path_without_slash = package_path;
67 if (package_path_without_slash.back() == '/')
68 package_path_without_slash.pop_back();
69 auto package_name_pos = package_path_without_slash.find_last_of('/');
70 if (package_name_pos == std::string::npos)
74 auto package_name = package_path_without_slash.substr(package_name_pos);
75 auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
76 return package_path_without_slash + "/" + package_name + qstring + ".circle";
79 int main(const int argc, char **argv)
81 using namespace onert_run;
85 Args args(argc, argv);
86 if (args.printVersion())
89 NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
90 std::cout << "onert_run (nnfw runtime: v" << (version >> 24) << "."
91 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
96 ruy::profiler::ScopeProfile ruy_profile;
99 // TODO Apply verbose level to phases
100 const int verbose = args.getVerboseLevel();
101 benchmark::Phases phases(
102 benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
104 nnfw_session *session = nullptr;
105 NNPR_ENSURE_STATUS(nnfw_create_session(&session));
108 phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
109 if (args.useSingleModel())
111 nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
113 NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
117 auto quantize = args.getQuantize();
118 if (!quantize.empty())
120 NNFW_QUANTIZE_TYPE quantize_type = NNFW_QUANTIZE_TYPE_NOT_SET;
121 if (quantize == "int8")
122 quantize_type = NNFW_QUANTIZE_TYPE_U8_ASYM;
123 if (quantize == "int16")
124 quantize_type = NNFW_QUANTIZE_TYPE_I16_SYM;
125 NNPR_ENSURE_STATUS(nnfw_set_quantization_type(session, quantize_type));
127 if (args.getQuantizedModelPath() != "")
129 nnfw_set_quantized_model_path(session, args.getQuantizedModelPath().c_str()));
132 if (args.useSingleModel())
133 NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
135 genQuantizedModelPathFromModelPath(args.getModelFilename(), quantize == "int16")
138 NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
140 genQuantizedModelPathFromPackagePath(args.getPackageFilename(), quantize == "int16")
144 NNPR_ENSURE_STATUS(nnfw_quantize(session));
147 char *available_backends = std::getenv("BACKENDS");
148 if (available_backends)
149 NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
152 NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
154 // verify input and output
156 auto verifyInputTypes = [session]() {
158 NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
159 for (uint32_t i = 0; i < sz; ++i)
162 NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
164 if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
166 std::cerr << "E: not supported input type" << std::endl;
172 auto verifyOutputTypes = [session]() {
174 NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
176 for (uint32_t i = 0; i < sz; ++i)
179 NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
181 if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
183 std::cerr << "E: not supported output type" << std::endl;
189 auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
190 for (auto tensor_shape : tensor_shape_map)
192 auto ind = tensor_shape.first;
193 auto &shape = tensor_shape.second;
196 NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
198 bool set_input = false;
199 if (ti.rank != shape.size())
205 for (int i = 0; i < ti.rank; i++)
207 if (ti.dims[i] != shape.at(i))
217 ti.rank = shape.size();
218 for (int i = 0; i < ti.rank; i++)
219 ti.dims[i] = shape.at(i);
220 NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
227 // set input shape before compilation
228 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
230 auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
231 assert(!h5_file.empty());
232 auto shapes = H5Formatter(session).readTensorShapes(h5_file);
233 overwriteShapeMap(shape_map, shapes);
236 if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
237 fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
239 setTensorInfo(args.getShapeMapForPrepare());
243 // TODO When nnfw_{prepare|run} are failed, can't catch the time
244 phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
245 NNPR_ENSURE_STATUS(nnfw_prepare(session));
248 // set input shape after compilation and before execution
249 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
250 if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
251 (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
252 fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
254 setTensorInfo(args.getShapeMapForRun());
257 std::vector<Allocation> inputs(num_inputs);
258 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
259 if (!args.getLoadFilename().empty())
260 H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
261 else if (!args.getLoadRawFilename().empty())
262 RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
264 RandomGenerator(session).generate(inputs);
266 if (!args.getLoadRawFilename().empty())
267 RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
269 RandomGenerator(session).generate(inputs);
273 uint32_t num_outputs = 0;
274 NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
275 std::vector<Allocation> outputs(num_outputs);
276 auto output_sizes = args.getOutputSizes();
277 for (uint32_t i = 0; i < num_outputs; i++)
280 uint64_t output_size_in_bytes = 0;
282 auto found = output_sizes.find(i);
283 if (found == output_sizes.end())
285 NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
286 output_size_in_bytes = bufsize_for(&ti);
290 output_size_in_bytes = found->second;
293 outputs[i].alloc(output_size_in_bytes);
295 nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
296 NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
299 // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
305 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
306 args.getWarmupRuns());
309 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
310 args.getNumRuns(), true);
316 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
317 [&](const benchmark::Phase &phase, uint32_t nth) {
319 << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
322 args.getWarmupRuns());
325 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
326 [&](const benchmark::Phase &phase, uint32_t nth) {
328 << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
331 args.getNumRuns(), true);
334 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
335 // dump output tensors
336 if (!args.getDumpFilename().empty())
337 H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
339 if (!args.getDumpRawFilename().empty())
340 RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
342 NNPR_ENSURE_STATUS(nnfw_close_session(session));
344 // TODO Apply verbose level to result
347 benchmark::Result result(phases);
350 benchmark::printResult(result);
353 if (args.getWriteReport() == false)
357 std::string exec_basename;
358 std::string nnpkg_basename;
359 std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
362 char *res = args.useSingleModel() ? realpath(args.getModelFilename().c_str(), buf)
363 : realpath(args.getPackageFilename().c_str(), buf);
366 nnpkg_basename = basename(buf);
370 std::cerr << "E: during getting realpath from nnpackage or model path." << std::endl;
373 exec_basename = basename(argv[0]);
376 benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
380 catch (boost::program_options::error &e)
382 std::cerr << "E: " << e.what() << std::endl;
385 catch (std::runtime_error &e)
387 std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;