tests/tools/tflite_run/src/tflite_run.cc

   1 /*
   2  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "tflite/ext/kernels/register.h"
  18 #include "tensorflow/lite/model.h"
  19
  20 #include "args.h"
  21 #include "tensor_dumper.h"
  22 #include "tensor_loader.h"
  23 #include "misc/benchmark.h"
  24 #include "misc/EnvVar.h"
  25 #include "misc/fp32.h"
  26 #include "tflite/Diff.h"
  27 #include "tflite/Assert.h"
  28 #include "tflite/Session.h"
  29 #include "tflite/InterpreterSession.h"
  30 #include "tflite/NNAPISession.h"
  31 #include "misc/tensor/IndexIterator.h"
  32 #include "misc/tensor/Object.h"
  33 #include "benchmark.h"
  34
  35 #include <iostream>
  36 #include <chrono>
  37 #include <algorithm>
  38 #include <vector>
  39
  40 #include <libgen.h>
  41
  42 using namespace tflite;
  43 using namespace nnfw::tflite;
  44 using namespace std::placeholders; // for _1, _2 ...
  45
  46 namespace
  47 {
  48
  49 void print_max_idx(float *f, int size)
  50 {
  51   float *p = std::max_element(f, f + size);
  52   std::cout << "max:" << p - f;
  53 }
  54
  55 static const char *default_backend_cand = "tflite_cpu";
  56
  57 // Verifies whether the model is a flatbuffer file.
  58 class BMFlatBufferVerifier : public tflite::TfLiteVerifier
  59 {
  60 public:
  61   bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
  62   {
  63
  64     flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
  65     if (!tflite::VerifyModelBuffer(verifier))
  66     {
  67       reporter->Report("The model is not a valid Flatbuffer file");
  68       return false;
  69     }
  70     return true;
  71   }
  72 };
  73
  74 } // namespace anonymous
  75
  76 int main(const int argc, char **argv)
  77 {
  78   const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
  79
  80   StderrReporter error_reporter;
  81
  82   TFLiteRun::Args args(argc, argv);
  83
  84   std::chrono::milliseconds t_model_load(0), t_prepare(0);
  85
  86   // TODO Apply verbose level to phases
  87   const int verbose = args.getVerboseLevel();
  88   benchmark::Phases phases(
  89       benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
  90
  91   std::unique_ptr<FlatBufferModel> model;
  92   std::unique_ptr<Interpreter> interpreter;
  93   std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
  94
  95   try
  96   {
  97     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
  98       if (args.getModelValidate())
  99       {
 100         model = FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
 101                                                         verifier.get(), &error_reporter);
 102       }
 103       else
 104       {
 105         model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
 106       }
 107       if (model == nullptr)
 108       {
 109         throw std::runtime_error{"Cannot create model"};
 110       }
 111
 112       BuiltinOpResolver resolver;
 113       InterpreterBuilder builder(*model, resolver);
 114       TFLITE_ENSURE(builder(&interpreter))
 115       interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
 116     });
 117   }
 118   catch (const std::exception &e)
 119   {
 120     std::cerr << e.what() << '\n';
 121     return 1;
 122   }
 123
 124   std::shared_ptr<nnfw::tflite::Session> sess;
 125
 126   if (use_nnapi)
 127   {
 128     sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
 129   }
 130   else
 131   {
 132     sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
 133   }
 134
 135   try
 136   {
 137     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) { sess->prepare(); });
 138   }
 139   catch (const std::exception &e)
 140   {
 141     std::cerr << e.what() << '\n';
 142     return 1;
 143   }
 144
 145   if (args.getInputShapes().size() != 0)
 146   {
 147     const int dim_values = args.getInputShapes().size();
 148     int offset = 0;
 149
 150     for (const auto &id : interpreter->inputs())
 151     {
 152       TfLiteTensor *tensor = interpreter->tensor(id);
 153       std::vector<int32_t> new_dim;
 154       new_dim.resize(tensor->dims->size);
 155
 156       for (uint32_t axis = 0; axis < tensor->dims->size; axis++, offset++)
 157       {
 158         new_dim[axis] =
 159             ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]);
 160       }
 161
 162       interpreter->ResizeInputTensor(id, new_dim);
 163
 164       if (offset >= dim_values)
 165         break;
 166     }
 167     interpreter->AllocateTensors();
 168   }
 169
 170   TFLiteRun::TensorLoader tensor_loader(*interpreter);
 171
 172   // Load input from raw or dumped tensor file.
 173   // Two options are exclusive and will be checked from Args.
 174   if (!args.getInputFilename().empty() || !args.getCompareFilename().empty())
 175   {
 176     if (!args.getInputFilename().empty())
 177     {
 178       tensor_loader.loadRawTensors(args.getInputFilename(), interpreter->inputs());
 179     }
 180     else
 181     {
 182       tensor_loader.loadDumpedTensors(args.getCompareFilename());
 183     }
 184
 185     for (const auto &o : interpreter->inputs())
 186     {
 187       const auto &tensor_view = tensor_loader.get(o);
 188       TfLiteTensor *tensor = interpreter->tensor(o);
 189
 190       memcpy(reinterpret_cast<void *>(tensor->data.f),
 191              reinterpret_cast<const void *>(tensor_view._base), tensor->bytes);
 192     }
 193   }
 194   else
 195   {
 196     const int seed = 1; /* TODO Add an option for seed value */
 197     nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
 198
 199     // No input specified. So we fill the input tensors with random values.
 200     for (const auto &o : interpreter->inputs())
 201     {
 202       TfLiteTensor *tensor = interpreter->tensor(o);
 203       if (tensor->type == kTfLiteInt32)
 204       {
 205         // Generate singed 32-bit integer (s32) input
 206         auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o);
 207
 208         int32_t value = 0;
 209
 210         nnfw::misc::tensor::iterate(tensor_view.shape())
 211             << [&](const nnfw::misc::tensor::Index &ind) {
 212                  // TODO Generate random values
 213                  // Gather operation: index should be within input coverage.
 214                  tensor_view.at(ind) = value;
 215                  value++;
 216                };
 217       }
 218       else if (tensor->type == kTfLiteUInt8)
 219       {
 220         // Generate unsigned 8-bit integer input
 221         auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
 222
 223         auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
 224             const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
 225             &nnfw::misc::RandomGenerator::generate<uint8_t>);
 226         const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
 227                                                        std::bind(fp, randgen, _1, _2));
 228
 229         nnfw::misc::tensor::iterate(tensor_view.shape())
 230             << [&](const nnfw::misc::tensor::Index &ind) {
 231                  const auto value = data.at(ind);
 232                  tensor_view.at(ind) = value;
 233                };
 234       }
 235       else if (tensor->type == kTfLiteBool)
 236       {
 237         // Generate bool input
 238         auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
 239
 240         auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
 241             const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
 242             &nnfw::misc::RandomGenerator::generate<bool>);
 243         const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
 244                                                     std::bind(fp, randgen, _1, _2));
 245
 246         nnfw::misc::tensor::iterate(tensor_view.shape())
 247             << [&](const nnfw::misc::tensor::Index &ind) {
 248                  const auto value = data.at(ind);
 249                  tensor_view.at(ind) = value;
 250                };
 251       }
 252       else
 253       {
 254         assert(tensor->type == kTfLiteFloat32);
 255
 256         const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
 257         for (float *ptr = tensor->data.f; ptr < end; ptr++)
 258         {
 259           *ptr = randgen.generate<float>();
 260         }
 261       }
 262     }
 263   }
 264
 265   TFLiteRun::TensorDumper tensor_dumper;
 266   // Must be called before `interpreter->Invoke()`
 267   tensor_dumper.addTensors(*interpreter, interpreter->inputs());
 268
 269   std::cout << "input tensor indices = [";
 270   for (const auto &o : interpreter->inputs())
 271   {
 272     std::cout << o << ",";
 273   }
 274   std::cout << "]" << std::endl;
 275
 276   // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
 277   // only warmup.
 278   if (verbose == 0)
 279   {
 280     phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
 281                args.getWarmupRuns());
 282     phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
 283                args.getNumRuns(), true);
 284   }
 285   else
 286   {
 287     phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
 288                [&](const benchmark::Phase &phase, uint32_t nth) {
 289                  std::cout << "... "
 290                            << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 291                            << std::endl;
 292                },
 293                args.getWarmupRuns());
 294     phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
 295                [&](const benchmark::Phase &phase, uint32_t nth) {
 296                  std::cout << "... "
 297                            << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 298                            << std::endl;
 299                },
 300                args.getNumRuns(), true);
 301   }
 302
 303   sess->teardown();
 304
 305   // Must be called after `interpreter->Invoke()`
 306   tensor_dumper.addTensors(*interpreter, interpreter->outputs());
 307
 308   std::cout << "output tensor indices = [";
 309   for (const auto &o : interpreter->outputs())
 310   {
 311     std::cout << o << "(";
 312
 313     print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
 314
 315     std::cout << "),";
 316   }
 317   std::cout << "]" << std::endl;
 318
 319   // TODO Apply verbose level to result
 320
 321   // prepare result
 322   benchmark::Result result(phases);
 323
 324   // to stdout
 325   benchmark::printResult(result);
 326
 327   if (args.getWriteReport())
 328   {
 329     // prepare csv task
 330     std::string exec_basename;
 331     std::string model_basename;
 332     std::string backend_name = default_backend_cand;
 333     {
 334       std::vector<char> vpath(args.getTFLiteFilename().begin(), args.getTFLiteFilename().end() + 1);
 335       model_basename = basename(vpath.data());
 336       size_t lastindex = model_basename.find_last_of(".");
 337       model_basename = model_basename.substr(0, lastindex);
 338       exec_basename = basename(argv[0]);
 339     }
 340     benchmark::writeResult(result, exec_basename, model_basename, backend_name);
 341   }
 342
 343   if (!args.getDumpFilename().empty())
 344   {
 345     const std::string &dump_filename = args.getDumpFilename();
 346     tensor_dumper.dump(dump_filename);
 347     std::cout << "Input/output tensors have been dumped to file \"" << dump_filename << "\"."
 348               << std::endl;
 349   }
 350
 351   if (!args.getCompareFilename().empty())
 352   {
 353     const std::string &compare_filename = args.getCompareFilename();
 354     std::cout << "========================================" << std::endl;
 355     std::cout << "Comparing the results with \"" << compare_filename << "\"." << std::endl;
 356     std::cout << "========================================" << std::endl;
 357
 358     // TODO Code duplication (copied from RandomTestRunner)
 359
 360     int tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
 361
 362     auto equals = [tolerance](float lhs, float rhs) {
 363       // NOTE Hybrid approach
 364       // TODO Allow users to set tolerance for absolute_epsilon_equal
 365       if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
 366       {
 367         return true;
 368       }
 369
 370       return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance);
 371     };
 372
 373     nnfw::misc::tensor::Comparator comparator(equals);
 374     TfLiteInterpMatchApp app(comparator);
 375     bool res = true;
 376
 377     for (const auto &o : interpreter->outputs())
 378     {
 379       auto expected = tensor_loader.get(o);
 380       auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o);
 381
 382       res = res && app.compareSingleTensorView(expected, obtained, o);
 383     }
 384
 385     if (!res)
 386     {
 387       return 255;
 388     }
 389   }
 390
 391   return 0;
 392 }