2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "allocation.h"
19 #include "benchmark.h"
20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
21 #include "h5formatter.h"
24 #include "nnfw_util.h"
25 #include "nnfw_internal.h"
26 #include "randomgen.h"
28 #include "ruy/profiler/profiler.h"
37 #include <unordered_map>
40 static const char *default_backend_cand = "acl_cl";
42 NNFW_STATUS resolve_op_backend(nnfw_session *session)
44 static std::unordered_map<std::string, std::string> operation_map = {
45 {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"}, {"CONV_2D", "OP_BACKEND_Conv2D"},
46 {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
47 {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"}, {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
48 {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"}, {"ADD", "OP_BACKEND_Add"}};
50 for (auto i : operation_map)
52 char *default_backend = std::getenv(i.second.c_str());
55 NNFW_STATUS return_result = nnfw_set_op_backend(session, i.first.c_str(), default_backend);
56 if (return_result == NNFW_STATUS_ERROR)
61 return NNFW_STATUS_NO_ERROR;
64 int main(const int argc, char **argv)
66 using namespace nnpkg_run;
70 Args args(argc, argv);
71 auto nnpackage_path = args.getPackageFilename();
72 if (args.printVersion())
75 NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
76 std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
77 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
82 ruy::profiler::ScopeProfile ruy_profile;
85 // TODO Apply verbose level to phases
86 const int verbose = args.getVerboseLevel();
87 benchmark::Phases phases(
88 benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
90 nnfw_session *session = nullptr;
91 NNPR_ENSURE_STATUS(nnfw_create_session(&session));
94 phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
95 NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
98 char *available_backends = std::getenv("BACKENDS");
99 if (available_backends)
100 NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
101 NNPR_ENSURE_STATUS(resolve_op_backend(session));
104 NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
106 // verify input and output
108 auto verifyInputTypes = [session]() {
110 NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
111 for (uint32_t i = 0; i < sz; ++i)
114 NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
116 if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
118 std::cerr << "E: not supported input type" << std::endl;
124 auto verifyOutputTypes = [session]() {
126 NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
128 for (uint32_t i = 0; i < sz; ++i)
131 NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
133 if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
135 std::cerr << "E: not supported output type" << std::endl;
141 auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
142 for (auto tensor_shape : tensor_shape_map)
144 auto ind = tensor_shape.first;
145 auto &shape = tensor_shape.second;
148 NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
150 ti.rank = shape.size();
151 for (int i = 0; i < ti.rank; i++)
152 ti.dims[i] = shape.at(i);
153 NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
160 // set input shape before compilation
161 setTensorInfo(args.getShapeMapForPrepare());
165 // TODO When nnfw_{prepare|run} are failed, can't catch the time
166 phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
167 NNPR_ENSURE_STATUS(nnfw_prepare(session));
170 // set input shape after compilation and before execution
171 setTensorInfo(args.getShapeMapForRun());
174 std::vector<Allocation> inputs(num_inputs);
175 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
176 if (!args.getLoadFilename().empty())
177 H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
179 RandomGenerator(session).generate(inputs);
181 RandomGenerator(session).generate(inputs);
185 uint32_t num_outputs = 0;
186 NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
187 std::vector<Allocation> outputs(num_outputs);
188 auto output_sizes = args.getOutputSizes();
189 for (uint32_t i = 0; i < num_outputs; i++)
192 uint64_t output_size_in_bytes = 0;
194 auto found = output_sizes.find(i);
195 if (found == output_sizes.end())
197 NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
198 output_size_in_bytes = bufsize_for(&ti);
202 output_size_in_bytes = found->second;
205 outputs[i].alloc(output_size_in_bytes);
207 nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
208 NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
211 // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
216 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
217 args.getWarmupRuns());
218 phases.run("EXECUTE",
219 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
220 args.getNumRuns(), true);
225 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
226 [&](const benchmark::Phase &phase, uint32_t nth) {
228 << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
231 args.getWarmupRuns());
232 phases.run("EXECUTE",
233 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
234 [&](const benchmark::Phase &phase, uint32_t nth) {
236 << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
239 args.getNumRuns(), true);
242 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
243 // dump output tensors
244 if (!args.getDumpFilename().empty())
245 H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
248 NNPR_ENSURE_STATUS(nnfw_close_session(session));
250 // TODO Apply verbose level to result
253 benchmark::Result result(phases);
256 benchmark::printResult(result);
259 if (args.getWriteReport() == false)
263 std::string exec_basename;
264 std::string nnpkg_basename;
265 std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
268 char *res = realpath(nnpackage_path.c_str(), buf);
271 nnpkg_basename = basename(buf);
275 std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
278 exec_basename = basename(argv[0]);
281 benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
285 catch (std::runtime_error &e)
287 std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;