2 * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "tflite/ext/kernels/register.h"
18 #include "tensorflow/lite/model.h"
21 #include "tensor_dumper.h"
22 #include "tensor_loader.h"
23 #include "misc/benchmark.h"
24 #include "misc/EnvVar.h"
25 #include "misc/fp32.h"
26 #include "tflite/Diff.h"
27 #include "tflite/Assert.h"
28 #include "tflite/Session.h"
29 #include "tflite/InterpreterSession.h"
30 #include "tflite/NNAPISession.h"
31 #include "misc/tensor/IndexIterator.h"
32 #include "misc/tensor/Object.h"
33 #include "benchmark.h"
42 using namespace tflite;
43 using namespace nnfw::tflite;
44 using namespace std::placeholders; // for _1, _2 ...
49 void print_max_idx(float *f, int size)
51 float *p = std::max_element(f, f + size);
52 std::cout << "max:" << p - f;
55 static const char *default_backend_cand = "tflite_cpu";
57 // Verifies whether the model is a flatbuffer file.
58 class BMFlatBufferVerifier : public tflite::TfLiteVerifier
61 bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
64 flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
65 if (!tflite::VerifyModelBuffer(verifier))
67 reporter->Report("The model is not a valid Flatbuffer file");
74 } // namespace anonymous
76 int main(const int argc, char **argv)
78 const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
80 StderrReporter error_reporter;
82 TFLiteRun::Args args(argc, argv);
84 std::chrono::milliseconds t_model_load(0), t_prepare(0);
86 // TODO Apply verbose level to phases
87 const int verbose = args.getVerboseLevel();
88 benchmark::Phases phases(
89 benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
91 std::unique_ptr<FlatBufferModel> model;
92 std::unique_ptr<Interpreter> interpreter;
93 std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
97 phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
98 if (args.getModelValidate())
100 model = FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
101 verifier.get(), &error_reporter);
105 model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
107 if (model == nullptr)
109 throw std::runtime_error{"Cannot create model"};
112 BuiltinOpResolver resolver;
113 InterpreterBuilder builder(*model, resolver);
114 TFLITE_ENSURE(builder(&interpreter))
115 interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
118 catch (const std::exception &e)
120 std::cerr << e.what() << '\n';
124 std::shared_ptr<nnfw::tflite::Session> sess;
128 sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
132 sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
137 phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) { sess->prepare(); });
139 catch (const std::exception &e)
141 std::cerr << e.what() << '\n';
145 if (args.getInputShapes().size() != 0)
147 const int dim_values = args.getInputShapes().size();
150 for (const auto &id : interpreter->inputs())
152 TfLiteTensor *tensor = interpreter->tensor(id);
153 std::vector<int32_t> new_dim;
154 new_dim.resize(tensor->dims->size);
156 for (uint32_t axis = 0; axis < tensor->dims->size; axis++, offset++)
159 ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]);
162 interpreter->ResizeInputTensor(id, new_dim);
164 if (offset >= dim_values)
167 interpreter->AllocateTensors();
170 TFLiteRun::TensorLoader tensor_loader(*interpreter);
172 // Load input from raw or dumped tensor file.
173 // Two options are exclusive and will be checked from Args.
174 if (!args.getInputFilename().empty() || !args.getCompareFilename().empty())
176 if (!args.getInputFilename().empty())
178 tensor_loader.loadRawTensors(args.getInputFilename(), interpreter->inputs());
182 tensor_loader.loadDumpedTensors(args.getCompareFilename());
185 for (const auto &o : interpreter->inputs())
187 const auto &tensor_view = tensor_loader.get(o);
188 TfLiteTensor *tensor = interpreter->tensor(o);
190 memcpy(reinterpret_cast<void *>(tensor->data.f),
191 reinterpret_cast<const void *>(tensor_view._base), tensor->bytes);
196 const int seed = 1; /* TODO Add an option for seed value */
197 nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
199 // No input specified. So we fill the input tensors with random values.
200 for (const auto &o : interpreter->inputs())
202 TfLiteTensor *tensor = interpreter->tensor(o);
203 if (tensor->type == kTfLiteInt32)
205 // Generate singed 32-bit integer (s32) input
206 auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o);
210 nnfw::misc::tensor::iterate(tensor_view.shape())
211 << [&](const nnfw::misc::tensor::Index &ind) {
212 // TODO Generate random values
213 // Gather operation: index should be within input coverage.
214 tensor_view.at(ind) = value;
218 else if (tensor->type == kTfLiteUInt8)
220 // Generate unsigned 8-bit integer input
221 auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
223 auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
224 const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
225 &nnfw::misc::RandomGenerator::generate<uint8_t>);
226 const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
227 std::bind(fp, randgen, _1, _2));
229 nnfw::misc::tensor::iterate(tensor_view.shape())
230 << [&](const nnfw::misc::tensor::Index &ind) {
231 const auto value = data.at(ind);
232 tensor_view.at(ind) = value;
235 else if (tensor->type == kTfLiteBool)
237 // Generate bool input
238 auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
240 auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
241 const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
242 &nnfw::misc::RandomGenerator::generate<bool>);
243 const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
244 std::bind(fp, randgen, _1, _2));
246 nnfw::misc::tensor::iterate(tensor_view.shape())
247 << [&](const nnfw::misc::tensor::Index &ind) {
248 const auto value = data.at(ind);
249 tensor_view.at(ind) = value;
254 assert(tensor->type == kTfLiteFloat32);
256 const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
257 for (float *ptr = tensor->data.f; ptr < end; ptr++)
259 *ptr = randgen.generate<float>();
265 TFLiteRun::TensorDumper tensor_dumper;
266 // Must be called before `interpreter->Invoke()`
267 tensor_dumper.addTensors(*interpreter, interpreter->inputs());
269 std::cout << "input tensor indices = [";
270 for (const auto &o : interpreter->inputs())
272 std::cout << o << ",";
274 std::cout << "]" << std::endl;
276 // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
280 phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
281 args.getWarmupRuns());
282 phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
283 args.getNumRuns(), true);
287 phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
288 [&](const benchmark::Phase &phase, uint32_t nth) {
290 << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
293 args.getWarmupRuns());
294 phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
295 [&](const benchmark::Phase &phase, uint32_t nth) {
297 << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
300 args.getNumRuns(), true);
305 // Must be called after `interpreter->Invoke()`
306 tensor_dumper.addTensors(*interpreter, interpreter->outputs());
308 std::cout << "output tensor indices = [";
309 for (const auto &o : interpreter->outputs())
311 std::cout << o << "(";
313 print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
317 std::cout << "]" << std::endl;
319 // TODO Apply verbose level to result
322 benchmark::Result result(phases);
325 benchmark::printResult(result);
327 if (args.getWriteReport())
330 std::string exec_basename;
331 std::string model_basename;
332 std::string backend_name = default_backend_cand;
334 std::vector<char> vpath(args.getTFLiteFilename().begin(), args.getTFLiteFilename().end() + 1);
335 model_basename = basename(vpath.data());
336 size_t lastindex = model_basename.find_last_of(".");
337 model_basename = model_basename.substr(0, lastindex);
338 exec_basename = basename(argv[0]);
340 benchmark::writeResult(result, exec_basename, model_basename, backend_name);
343 if (!args.getDumpFilename().empty())
345 const std::string &dump_filename = args.getDumpFilename();
346 tensor_dumper.dump(dump_filename);
347 std::cout << "Input/output tensors have been dumped to file \"" << dump_filename << "\"."
351 if (!args.getCompareFilename().empty())
353 const std::string &compare_filename = args.getCompareFilename();
354 std::cout << "========================================" << std::endl;
355 std::cout << "Comparing the results with \"" << compare_filename << "\"." << std::endl;
356 std::cout << "========================================" << std::endl;
358 // TODO Code duplication (copied from RandomTestRunner)
360 int tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
362 auto equals = [tolerance](float lhs, float rhs) {
363 // NOTE Hybrid approach
364 // TODO Allow users to set tolerance for absolute_epsilon_equal
365 if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
370 return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance);
373 nnfw::misc::tensor::Comparator comparator(equals);
374 TfLiteInterpMatchApp app(comparator);
377 for (const auto &o : interpreter->outputs())
379 auto expected = tensor_loader.get(o);
380 auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o);
382 res = res && app.compareSingleTensorView(expected, obtained, o);