tests/tools/nnpackage_run/src/nnpackage_run.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "allocation.h"
  18 #include "args.h"
  19 #include "benchmark.h"
  20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
  21 #include "h5formatter.h"
  22 #endif
  23 #include "nnfw.h"
  24 #include "nnfw_util.h"
  25 #include "nnfw_internal.h"
  26 #include "randomgen.h"
  27 #ifdef RUY_PROFILER
  28 #include "ruy/profiler/profiler.h"
  29 #endif
  30
  31 #include <cassert>
  32 #include <chrono>
  33 #include <cstdlib>
  34 #include <iostream>
  35 #include <libgen.h>
  36 #include <stdexcept>
  37 #include <unordered_map>
  38 #include <vector>
  39
  40 static const char *default_backend_cand = "cpu";
  41
  42 void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
  43                        std::vector<nnpkg_run::TensorShape> shapes)
  44 {
  45   for (uint32_t i = 0; i < shapes.size(); i++)
  46     shape_map[i] = shapes[i];
  47 }
  48
  49 int main(const int argc, char **argv)
  50 {
  51   using namespace nnpkg_run;
  52
  53   try
  54   {
  55     Args args(argc, argv);
  56     auto nnpackage_path = args.getPackageFilename();
  57     if (args.printVersion())
  58     {
  59       uint32_t version;
  60       NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
  61       std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
  62                 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
  63       exit(0);
  64     }
  65
  66 #ifdef RUY_PROFILER
  67     ruy::profiler::ScopeProfile ruy_profile;
  68 #endif
  69
  70     // TODO Apply verbose level to phases
  71     const int verbose = args.getVerboseLevel();
  72     benchmark::Phases phases(
  73       benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
  74
  75     nnfw_session *session = nullptr;
  76     NNPR_ENSURE_STATUS(nnfw_create_session(&session));
  77
  78     // ModelLoad
  79     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
  80       NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
  81     });
  82
  83     char *available_backends = std::getenv("BACKENDS");
  84     if (available_backends)
  85       NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
  86
  87     uint32_t num_inputs;
  88     NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
  89
  90     // verify input and output
  91
  92     auto verifyInputTypes = [session]() {
  93       uint32_t sz;
  94       NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
  95       for (uint32_t i = 0; i < sz; ++i)
  96       {
  97         nnfw_tensorinfo ti;
  98         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
  99
 100         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
 101         {
 102           std::cerr << "E: not supported input type" << std::endl;
 103           exit(-1);
 104         }
 105       }
 106     };
 107
 108     auto verifyOutputTypes = [session]() {
 109       uint32_t sz;
 110       NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
 111
 112       for (uint32_t i = 0; i < sz; ++i)
 113       {
 114         nnfw_tensorinfo ti;
 115         NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 116
 117         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
 118         {
 119           std::cerr << "E: not supported output type" << std::endl;
 120           exit(-1);
 121         }
 122       }
 123     };
 124
 125     auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
 126       for (auto tensor_shape : tensor_shape_map)
 127       {
 128         auto ind = tensor_shape.first;
 129         auto &shape = tensor_shape.second;
 130         nnfw_tensorinfo ti;
 131         // to fill dtype
 132         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
 133
 134         bool set_input = false;
 135         if (ti.rank != shape.size())
 136         {
 137           set_input = true;
 138         }
 139         else
 140         {
 141           for (int i = 0; i < ti.rank; i++)
 142           {
 143             if (ti.dims[i] != shape.at(i))
 144             {
 145               set_input = true;
 146               break;
 147             }
 148           }
 149         }
 150         if (!set_input)
 151           continue;
 152
 153         ti.rank = shape.size();
 154         for (int i = 0; i < ti.rank; i++)
 155           ti.dims[i] = shape.at(i);
 156         NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
 157       }
 158     };
 159
 160     verifyInputTypes();
 161     verifyOutputTypes();
 162
 163 // set input shape before compilation
 164 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 165
 166     auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
 167       assert(!h5_file.empty());
 168       auto shapes = H5Formatter(session).readTensorShapes(h5_file);
 169       overwriteShapeMap(shape_map, shapes);
 170     };
 171
 172     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
 173       fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
 174 #endif
 175     setTensorInfo(args.getShapeMapForPrepare());
 176
 177     // prepare execution
 178
 179     // TODO When nnfw_{prepare|run} are failed, can't catch the time
 180     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
 181       NNPR_ENSURE_STATUS(nnfw_prepare(session));
 182     });
 183
 184 // set input shape after compilation and before execution
 185 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 186     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
 187         (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
 188       fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
 189 #endif
 190     setTensorInfo(args.getShapeMapForRun());
 191
 192     // prepare input
 193     std::vector<Allocation> inputs(num_inputs);
 194 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 195     if (!args.getLoadFilename().empty())
 196       H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
 197     else
 198       RandomGenerator(session).generate(inputs);
 199 #else
 200     RandomGenerator(session).generate(inputs);
 201 #endif
 202
 203     // prepare output
 204     uint32_t num_outputs = 0;
 205     NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
 206     std::vector<Allocation> outputs(num_outputs);
 207     auto output_sizes = args.getOutputSizes();
 208     for (uint32_t i = 0; i < num_outputs; i++)
 209     {
 210       nnfw_tensorinfo ti;
 211       uint64_t output_size_in_bytes = 0;
 212       {
 213         auto found = output_sizes.find(i);
 214         if (found == output_sizes.end())
 215         {
 216           NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 217           output_size_in_bytes = bufsize_for(&ti);
 218         }
 219         else
 220         {
 221           output_size_in_bytes = found->second;
 222         }
 223       }
 224       outputs[i].alloc(output_size_in_bytes);
 225       NNPR_ENSURE_STATUS(
 226         nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
 227       NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
 228     }
 229
 230     // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
 231     // only warmup.
 232     if (verbose == 0)
 233     {
 234       phases.run(
 235         "WARMUP",
 236         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 237         args.getWarmupRuns());
 238       phases.run(
 239         "EXECUTE",
 240         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 241         args.getNumRuns(), true);
 242     }
 243     else
 244     {
 245       phases.run(
 246         "WARMUP",
 247         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 248         [&](const benchmark::Phase &phase, uint32_t nth) {
 249           std::cout << "... "
 250                     << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 251                     << std::endl;
 252         },
 253         args.getWarmupRuns());
 254       phases.run(
 255         "EXECUTE",
 256         [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 257         [&](const benchmark::Phase &phase, uint32_t nth) {
 258           std::cout << "... "
 259                     << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 260                     << std::endl;
 261         },
 262         args.getNumRuns(), true);
 263     }
 264
 265 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 266     // dump output tensors
 267     if (!args.getDumpFilename().empty())
 268       H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
 269 #endif
 270
 271     NNPR_ENSURE_STATUS(nnfw_close_session(session));
 272
 273     // TODO Apply verbose level to result
 274
 275     // prepare result
 276     benchmark::Result result(phases);
 277
 278     // to stdout
 279     benchmark::printResult(result);
 280
 281     // to csv
 282     if (args.getWriteReport() == false)
 283       return 0;
 284
 285     // prepare csv task
 286     std::string exec_basename;
 287     std::string nnpkg_basename;
 288     std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
 289     {
 290       char buf[PATH_MAX];
 291       char *res = realpath(nnpackage_path.c_str(), buf);
 292       if (res)
 293       {
 294         nnpkg_basename = basename(buf);
 295       }
 296       else
 297       {
 298         std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
 299         exit(-1);
 300       }
 301       exec_basename = basename(argv[0]);
 302     }
 303
 304     benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
 305
 306     return 0;
 307   }
 308   catch (std::runtime_error &e)
 309   {
 310     std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
 311     exit(-1);
 312   }
 313 }