tests/tools/nnpackage_run/src/nnpackage_run.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "allocation.h"
  18 #include "args.h"
  19 #include "benchmark.h"
  20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
  21 #include "h5formatter.h"
  22 #endif
  23 #include "nnfw.h"
  24 #include "nnfw_util.h"
  25 #include "nnfw_internal.h"
  26 #include "randomgen.h"
  27 #ifdef RUY_PROFILER
  28 #include "ruy/profiler/profiler.h"
  29 #endif
  30
  31 #include <cassert>
  32 #include <chrono>
  33 #include <cstdlib>
  34 #include <iostream>
  35 #include <libgen.h>
  36 #include <stdexcept>
  37 #include <unordered_map>
  38 #include <vector>
  39
  40 static const char *default_backend_cand = "cpu";
  41
  42 void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
  43                        std::vector<nnpkg_run::TensorShape> shapes)
  44 {
  45   for (uint32_t i = 0; i < shapes.size(); i++)
  46     shape_map[i] = shapes[i];
  47 }
  48
  49 int main(const int argc, char **argv)
  50 {
  51   using namespace nnpkg_run;
  52
  53   try
  54   {
  55     Args args(argc, argv);
  56     auto nnpackage_path = args.getPackageFilename();
  57     if (args.printVersion())
  58     {
  59       uint32_t version;
  60       NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
  61       std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
  62                 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
  63       exit(0);
  64     }
  65
  66 #ifdef RUY_PROFILER
  67     ruy::profiler::ScopeProfile ruy_profile;
  68 #endif
  69
  70     // TODO Apply verbose level to phases
  71     const int verbose = args.getVerboseLevel();
  72     benchmark::Phases phases(
  73         benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
  74
  75     nnfw_session *session = nullptr;
  76     NNPR_ENSURE_STATUS(nnfw_create_session(&session));
  77
  78     // ModelLoad
  79     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
  80       NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
  81     });
  82
  83     char *available_backends = std::getenv("BACKENDS");
  84     if (available_backends)
  85       NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
  86
  87     uint32_t num_inputs;
  88     NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
  89
  90     // verify input and output
  91
  92     auto verifyInputTypes = [session]() {
  93       uint32_t sz;
  94       NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
  95       for (uint32_t i = 0; i < sz; ++i)
  96       {
  97         nnfw_tensorinfo ti;
  98         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
  99
 100         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
 101         {
 102           std::cerr << "E: not supported input type" << std::endl;
 103           exit(-1);
 104         }
 105       }
 106     };
 107
 108     auto verifyOutputTypes = [session]() {
 109       uint32_t sz;
 110       NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
 111
 112       for (uint32_t i = 0; i < sz; ++i)
 113       {
 114         nnfw_tensorinfo ti;
 115         NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 116
 117         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
 118         {
 119           std::cerr << "E: not supported output type" << std::endl;
 120           exit(-1);
 121         }
 122       }
 123     };
 124
 125     auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
 126       for (auto tensor_shape : tensor_shape_map)
 127       {
 128         auto ind = tensor_shape.first;
 129         auto &shape = tensor_shape.second;
 130         nnfw_tensorinfo ti;
 131         // to fill dtype
 132         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
 133
 134         bool set_input = false;
 135         if (ti.rank != shape.size())
 136         {
 137           set_input = true;
 138         }
 139         else
 140         {
 141           for (int i = 0; i < ti.rank; i++)
 142           {
 143             if (ti.dims[i] != shape.at(i))
 144             {
 145               set_input = true;
 146               break;
 147             }
 148           }
 149         }
 150         if (!set_input)
 151           continue;
 152
 153         ti.rank = shape.size();
 154         for (int i = 0; i < ti.rank; i++)
 155           ti.dims[i] = shape.at(i);
 156         NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
 157       }
 158     };
 159
 160     verifyInputTypes();
 161     verifyOutputTypes();
 162
 163 // set input shape before compilation
 164 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 165
 166     auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
 167       assert(!h5_file.empty());
 168       auto shapes = H5Formatter(session).readTensorShapes(h5_file);
 169       overwriteShapeMap(shape_map, shapes);
 170     };
 171
 172     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
 173       fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
 174 #endif
 175     setTensorInfo(args.getShapeMapForPrepare());
 176
 177     // prepare execution
 178
 179     // TODO When nnfw_{prepare|run} are failed, can't catch the time
 180     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
 181       NNPR_ENSURE_STATUS(nnfw_prepare(session));
 182     });
 183
 184 // set input shape after compilation and before execution
 185 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 186     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
 187         (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
 188       fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
 189 #endif
 190     setTensorInfo(args.getShapeMapForRun());
 191
 192     // prepare input
 193     std::vector<Allocation> inputs(num_inputs);
 194 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 195     if (!args.getLoadFilename().empty())
 196       H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
 197     else
 198       RandomGenerator(session).generate(inputs);
 199 #else
 200     RandomGenerator(session).generate(inputs);
 201 #endif
 202
 203     // prepare output
 204     uint32_t num_outputs = 0;
 205     NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
 206     std::vector<Allocation> outputs(num_outputs);
 207     auto output_sizes = args.getOutputSizes();
 208     for (uint32_t i = 0; i < num_outputs; i++)
 209     {
 210       nnfw_tensorinfo ti;
 211       uint64_t output_size_in_bytes = 0;
 212       {
 213         auto found = output_sizes.find(i);
 214         if (found == output_sizes.end())
 215         {
 216           NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 217           output_size_in_bytes = bufsize_for(&ti);
 218         }
 219         else
 220         {
 221           output_size_in_bytes = found->second;
 222         }
 223       }
 224       outputs[i].alloc(output_size_in_bytes);
 225       NNPR_ENSURE_STATUS(
 226           nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
 227       NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
 228     }
 229
 230     // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
 231     // only warmup.
 232     if (verbose == 0)
 233     {
 234       phases.run("WARMUP",
 235                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 236                  args.getWarmupRuns());
 237       phases.run("EXECUTE",
 238                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 239                  args.getNumRuns(), true);
 240     }
 241     else
 242     {
 243       phases.run("WARMUP",
 244                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 245                  [&](const benchmark::Phase &phase, uint32_t nth) {
 246                    std::cout << "... "
 247                              << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 248                              << std::endl;
 249                  },
 250                  args.getWarmupRuns());
 251       phases.run("EXECUTE",
 252                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 253                  [&](const benchmark::Phase &phase, uint32_t nth) {
 254                    std::cout << "... "
 255                              << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 256                              << std::endl;
 257                  },
 258                  args.getNumRuns(), true);
 259     }
 260
 261 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 262     // dump output tensors
 263     if (!args.getDumpFilename().empty())
 264       H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
 265 #endif
 266
 267     NNPR_ENSURE_STATUS(nnfw_close_session(session));
 268
 269     // TODO Apply verbose level to result
 270
 271     // prepare result
 272     benchmark::Result result(phases);
 273
 274     // to stdout
 275     benchmark::printResult(result);
 276
 277     // to csv
 278     if (args.getWriteReport() == false)
 279       return 0;
 280
 281     // prepare csv task
 282     std::string exec_basename;
 283     std::string nnpkg_basename;
 284     std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
 285     {
 286       char buf[PATH_MAX];
 287       char *res = realpath(nnpackage_path.c_str(), buf);
 288       if (res)
 289       {
 290         nnpkg_basename = basename(buf);
 291       }
 292       else
 293       {
 294         std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
 295         exit(-1);
 296       }
 297       exec_basename = basename(argv[0]);
 298     }
 299
 300     benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
 301
 302     return 0;
 303   }
 304   catch (std::runtime_error &e)
 305   {
 306     std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
 307     exit(-1);
 308   }
 309 }