tests/tools/onert_run/src/onert_run.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "allocation.h"
  18 #include "args.h"
  19 #include "benchmark.h"
  20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
  21 #include "h5formatter.h"
  22 #endif
  23 #include "nnfw.h"
  24 #include "nnfw_util.h"
  25 #include "nnfw_internal.h"
  26 #include "nnfw_experimental.h"
  27 #include "randomgen.h"
  28 #include "rawformatter.h"
  29 #ifdef RUY_PROFILER
  30 #include "ruy/profiler/profiler.h"
  31 #endif
  32
  33 #include <boost/program_options.hpp>
  34 #include <cassert>
  35 #include <chrono>
  36 #include <cstdlib>
  37 #include <iostream>
  38 #include <libgen.h>
  39 #include <stdexcept>
  40 #include <unordered_map>
  41 #include <vector>
  42
  43 static const char *default_backend_cand = "cpu";
  44
  45 void overwriteShapeMap(onert_run::TensorShapeMap &shape_map,
  46                        std::vector<onert_run::TensorShape> shapes)
  47 {
  48   for (uint32_t i = 0; i < shapes.size(); i++)
  49     shape_map[i] = shapes[i];
  50 }
  51
  52 std::string genQuantizedModelPathFromModelPath(const std::string &model_path, bool is_q16)
  53 {
  54   auto const extension_pos = model_path.find(".circle");
  55   if (extension_pos == std::string::npos)
  56   {
  57     std::cerr << "Input model isn't .circle." << std::endl;
  58     exit(-1);
  59   }
  60   auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
  61   return model_path.substr(0, extension_pos) + qstring + ".circle";
  62 }
  63
  64 std::string genQuantizedModelPathFromPackagePath(const std::string &package_path, bool is_q16)
  65 {
  66   auto package_path_without_slash = package_path;
  67   if (package_path_without_slash.back() == '/')
  68     package_path_without_slash.pop_back();
  69   auto package_name_pos = package_path_without_slash.find_last_of('/');
  70   if (package_name_pos == std::string::npos)
  71     package_name_pos = 0;
  72   else
  73     package_name_pos++;
  74   auto package_name = package_path_without_slash.substr(package_name_pos);
  75   auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
  76   return package_path_without_slash + "/" + package_name + qstring + ".circle";
  77 }
  78
  79 int main(const int argc, char **argv)
  80 {
  81   using namespace onert_run;
  82
  83   try
  84   {
  85     Args args(argc, argv);
  86     if (args.printVersion())
  87     {
  88       uint32_t version;
  89       NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
  90       std::cout << "onert_run (nnfw runtime: v" << (version >> 24) << "."
  91                 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
  92       exit(0);
  93     }
  94
  95 #ifdef RUY_PROFILER
  96     ruy::profiler::ScopeProfile ruy_profile;
  97 #endif
  98
  99     // TODO Apply verbose level to phases
 100     const int verbose = args.getVerboseLevel();
 101     benchmark::Phases phases(
 102       benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
 103
 104     nnfw_session *session = nullptr;
 105     NNPR_ENSURE_STATUS(nnfw_create_session(&session));
 106
 107     // ModelLoad
 108     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
 109       if (args.useSingleModel())
 110         NNPR_ENSURE_STATUS(
 111           nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
 112       else
 113         NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
 114     });
 115
 116     // Quantize model
 117     auto quantize = args.getQuantize();
 118     if (!quantize.empty())
 119     {
 120       NNFW_QUANTIZE_TYPE quantize_type = NNFW_QUANTIZE_TYPE_NOT_SET;
 121       if (quantize == "int8")
 122         quantize_type = NNFW_QUANTIZE_TYPE_U8_ASYM;
 123       if (quantize == "int16")
 124         quantize_type = NNFW_QUANTIZE_TYPE_I16_SYM;
 125       NNPR_ENSURE_STATUS(nnfw_set_quantization_type(session, quantize_type));
 126
 127       if (args.getQuantizedModelPath() != "")
 128         NNPR_ENSURE_STATUS(
 129           nnfw_set_quantized_model_path(session, args.getQuantizedModelPath().c_str()));
 130       else
 131       {
 132         if (args.useSingleModel())
 133           NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
 134             session,
 135             genQuantizedModelPathFromModelPath(args.getModelFilename(), quantize == "int16")
 136               .c_str()));
 137         else
 138           NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
 139             session,
 140             genQuantizedModelPathFromPackagePath(args.getPackageFilename(), quantize == "int16")
 141               .c_str()));
 142       }
 143
 144       NNPR_ENSURE_STATUS(nnfw_quantize(session));
 145     }
 146
 147     char *available_backends = std::getenv("BACKENDS");
 148     if (available_backends)
 149       NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
 150
 151     uint32_t num_inputs;
 152     NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
 153
 154     // verify input and output
 155
 156     auto verifyInputTypes = [session]() {
 157       uint32_t sz;
 158       NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
 159       for (uint32_t i = 0; i < sz; ++i)
 160       {
 161         nnfw_tensorinfo ti;
 162         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
 163
 164         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
 165         {
 166           std::cerr << "E: not supported input type" << std::endl;
 167           exit(-1);
 168         }
 169       }
 170     };
 171
 172     auto verifyOutputTypes = [session]() {
 173       uint32_t sz;
 174       NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
 175
 176       for (uint32_t i = 0; i < sz; ++i)
 177       {
 178         nnfw_tensorinfo ti;
 179         NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 180
 181         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
 182         {
 183           std::cerr << "E: not supported output type" << std::endl;
 184           exit(-1);
 185         }
 186       }
 187     };
 188
 189     auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
 190       for (auto tensor_shape : tensor_shape_map)
 191       {
 192         auto ind = tensor_shape.first;
 193         auto &shape = tensor_shape.second;
 194         nnfw_tensorinfo ti;
 195         // to fill dtype
 196         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
 197
 198         bool set_input = false;
 199         if (ti.rank != shape.size())
 200         {
 201           set_input = true;
 202         }
 203         else
 204         {
 205           for (int i = 0; i < ti.rank; i++)
 206           {
 207             if (ti.dims[i] != shape.at(i))
 208             {
 209               set_input = true;
 210               break;
 211             }
 212           }
 213         }
 214         if (!set_input)
 215           continue;
 216
 217         ti.rank = shape.size();
 218         for (int i = 0; i < ti.rank; i++)
 219           ti.dims[i] = shape.at(i);
 220         NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
 221       }
 222     };
 223
 224     verifyInputTypes();
 225     verifyOutputTypes();
 226
 227 // set input shape before compilation
 228 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 229
 230     auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
 231       assert(!h5_file.empty());
 232       auto shapes = H5Formatter(session).readTensorShapes(h5_file);
 233       overwriteShapeMap(shape_map, shapes);
 234     };
 235
 236     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
 237       fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
 238 #endif
 239     setTensorInfo(args.getShapeMapForPrepare());
 240
 241     // prepare execution
 242
 243     // TODO When nnfw_{prepare|run} are failed, can't catch the time
 244     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
 245       NNPR_ENSURE_STATUS(nnfw_prepare(session));
 246     });
 247
 248 // set input shape after compilation and before execution
 249 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 250     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
 251         (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
 252       fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
 253 #endif
 254     setTensorInfo(args.getShapeMapForRun());
 255
 256     // prepare input
 257     std::vector<Allocation> inputs(num_inputs);
 258 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 259     if (!args.getLoadFilename().empty())
 260       H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
 261     else if (!args.getLoadRawFilename().empty())
 262       RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
 263     else
 264       RandomGenerator(session).generate(inputs);
 265 #else
 266     if (!args.getLoadRawFilename().empty())
 267       RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
 268     else
 269       RandomGenerator(session).generate(inputs);
 270 #endif
 271
 272     // prepare output
 273     uint32_t num_outputs = 0;
 274     NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
 275     std::vector<Allocation> outputs(num_outputs);
 276     auto output_sizes = args.getOutputSizes();
 277     for (uint32_t i = 0; i < num_outputs; i++)
 278     {
 279       nnfw_tensorinfo ti;
 280       uint64_t output_size_in_bytes = 0;
 281       {
 282         auto found = output_sizes.find(i);
 283         if (found == output_sizes.end())
 284         {
 285           NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 286           output_size_in_bytes = bufsize_for(&ti);
 287         }
 288         else
 289         {
 290           output_size_in_bytes = found->second;
 291         }
 292       }
 293       outputs[i].alloc(output_size_in_bytes);
 294       NNPR_ENSURE_STATUS(
 295         nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
 296       NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
 297     }
 298
 299     // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
 300     // only warmup.
 301     if (verbose == 0)
 302     {
 303       phases.run(
 304         "WARMUP",
 305         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 306         args.getWarmupRuns());
 307       phases.run(
 308         "EXECUTE",
 309         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 310         args.getNumRuns(), true);
 311     }
 312     else
 313     {
 314       phases.run(
 315         "WARMUP",
 316         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 317         [&](const benchmark::Phase &phase, uint32_t nth) {
 318           std::cout << "... "
 319                     << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 320                     << std::endl;
 321         },
 322         args.getWarmupRuns());
 323       phases.run(
 324         "EXECUTE",
 325         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 326         [&](const benchmark::Phase &phase, uint32_t nth) {
 327           std::cout << "... "
 328                     << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 329                     << std::endl;
 330         },
 331         args.getNumRuns(), true);
 332     }
 333
 334 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 335     // dump output tensors
 336     if (!args.getDumpFilename().empty())
 337       H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
 338 #endif
 339     if (!args.getDumpRawFilename().empty())
 340       RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
 341
 342     NNPR_ENSURE_STATUS(nnfw_close_session(session));
 343
 344     // TODO Apply verbose level to result
 345
 346     // prepare result
 347     benchmark::Result result(phases);
 348
 349     // to stdout
 350     benchmark::printResult(result);
 351
 352     // to csv
 353     if (args.getWriteReport() == false)
 354       return 0;
 355
 356     // prepare csv task
 357     std::string exec_basename;
 358     std::string nnpkg_basename;
 359     std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
 360     {
 361       char buf[PATH_MAX];
 362       char *res = args.useSingleModel() ? realpath(args.getModelFilename().c_str(), buf)
 363                                         : realpath(args.getPackageFilename().c_str(), buf);
 364       if (res)
 365       {
 366         nnpkg_basename = basename(buf);
 367       }
 368       else
 369       {
 370         std::cerr << "E: during getting realpath from nnpackage or model path." << std::endl;
 371         exit(-1);
 372       }
 373       exec_basename = basename(argv[0]);
 374     }
 375
 376     benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
 377
 378     return 0;
 379   }
 380   catch (boost::program_options::error &e)
 381   {
 382     std::cerr << "E: " << e.what() << std::endl;
 383     exit(-1);
 384   }
 385   catch (std::runtime_error &e)
 386   {
 387     std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
 388     exit(-1);
 389   }
 390 }