tests/tools/nnpackage_run/src/nnpackage_run.cc

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "allocation.h"
  18 #include "args.h"
  19 #include "benchmark.h"
  20 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
  21 #include "h5formatter.h"
  22 #endif
  23 #include "nnfw.h"
  24 #include "nnfw_util.h"
  25 #include "nnfw_internal.h"
  26 #include "randomgen.h"
  27 #ifdef RUY_PROFILER
  28 #include "ruy/profiler/profiler.h"
  29 #endif
  30
  31 #include <cassert>
  32 #include <chrono>
  33 #include <cstdlib>
  34 #include <iostream>
  35 #include <libgen.h>
  36 #include <stdexcept>
  37 #include <unordered_map>
  38 #include <vector>
  39
  40 static const char *default_backend_cand = "cpu";
  41
  42 void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
  43                        std::vector<nnpkg_run::TensorShape> shapes)
  44 {
  45   for (uint32_t i = 0; i < shapes.size(); i++)
  46     shape_map[i] = shapes[i];
  47 }
  48
  49 int main(const int argc, char **argv)
  50 {
  51   using namespace nnpkg_run;
  52
  53   try
  54   {
  55     Args args(argc, argv);
  56     auto nnpackage_path = args.getPackageFilename();
  57     if (args.printVersion())
  58     {
  59       uint32_t version;
  60       NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
  61       std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
  62                 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
  63       exit(0);
  64     }
  65
  66 #ifdef RUY_PROFILER
  67     ruy::profiler::ScopeProfile ruy_profile;
  68 #endif
  69
  70     // TODO Apply verbose level to phases
  71     const int verbose = args.getVerboseLevel();
  72     benchmark::Phases phases(
  73         benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
  74
  75     nnfw_session *session = nullptr;
  76     NNPR_ENSURE_STATUS(nnfw_create_session(&session));
  77
  78     // ModelLoad
  79     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
  80       NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
  81     });
  82
  83     char *available_backends = std::getenv("BACKENDS");
  84     if (available_backends)
  85       NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
  86
  87     uint32_t num_inputs;
  88     NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
  89
  90     // verify input and output
  91
  92     auto verifyInputTypes = [session]() {
  93       uint32_t sz;
  94       NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
  95       for (uint32_t i = 0; i < sz; ++i)
  96       {
  97         nnfw_tensorinfo ti;
  98         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
  99
 100         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
 101         {
 102           std::cerr << "E: not supported input type" << std::endl;
 103           exit(-1);
 104         }
 105       }
 106     };
 107
 108     auto verifyOutputTypes = [session]() {
 109       uint32_t sz;
 110       NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
 111
 112       for (uint32_t i = 0; i < sz; ++i)
 113       {
 114         nnfw_tensorinfo ti;
 115         NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 116
 117         if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
 118         {
 119           std::cerr << "E: not supported output type" << std::endl;
 120           exit(-1);
 121         }
 122       }
 123     };
 124
 125     auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
 126       for (auto tensor_shape : tensor_shape_map)
 127       {
 128         auto ind = tensor_shape.first;
 129         auto &shape = tensor_shape.second;
 130         nnfw_tensorinfo ti;
 131         // to fill dtype
 132         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
 133
 134         ti.rank = shape.size();
 135         for (int i = 0; i < ti.rank; i++)
 136           ti.dims[i] = shape.at(i);
 137         NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
 138       }
 139     };
 140
 141     verifyInputTypes();
 142     verifyOutputTypes();
 143
 144 // set input shape before compilation
 145 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 146     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
 147     {
 148       auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
 149       overwriteShapeMap(args.getShapeMapForPrepare(), shapes);
 150     }
 151 #endif
 152     setTensorInfo(args.getShapeMapForPrepare());
 153
 154     // prepare execution
 155
 156     // TODO When nnfw_{prepare|run} are failed, can't catch the time
 157     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
 158       NNPR_ENSURE_STATUS(nnfw_prepare(session));
 159     });
 160
 161 // set input shape after compilation and before execution
 162 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 163     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN)
 164     {
 165       auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
 166       overwriteShapeMap(args.getShapeMapForRun(), shapes);
 167     }
 168 #endif
 169     setTensorInfo(args.getShapeMapForRun());
 170
 171     // prepare input
 172     std::vector<Allocation> inputs(num_inputs);
 173 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 174     if (!args.getLoadFilename().empty())
 175       H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
 176     else
 177       RandomGenerator(session).generate(inputs);
 178 #else
 179     RandomGenerator(session).generate(inputs);
 180 #endif
 181
 182     // prepare output
 183     uint32_t num_outputs = 0;
 184     NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
 185     std::vector<Allocation> outputs(num_outputs);
 186     auto output_sizes = args.getOutputSizes();
 187     for (uint32_t i = 0; i < num_outputs; i++)
 188     {
 189       nnfw_tensorinfo ti;
 190       uint64_t output_size_in_bytes = 0;
 191       {
 192         auto found = output_sizes.find(i);
 193         if (found == output_sizes.end())
 194         {
 195           NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 196           output_size_in_bytes = bufsize_for(&ti);
 197         }
 198         else
 199         {
 200           output_size_in_bytes = found->second;
 201         }
 202       }
 203       outputs[i].alloc(output_size_in_bytes);
 204       NNPR_ENSURE_STATUS(
 205           nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
 206       NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
 207     }
 208
 209     // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
 210     // only warmup.
 211     if (verbose == 0)
 212     {
 213       phases.run("WARMUP",
 214                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 215                  args.getWarmupRuns());
 216       phases.run("EXECUTE",
 217                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 218                  args.getNumRuns(), true);
 219     }
 220     else
 221     {
 222       phases.run("WARMUP",
 223                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 224                  [&](const benchmark::Phase &phase, uint32_t nth) {
 225                    std::cout << "... "
 226                              << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 227                              << std::endl;
 228                  },
 229                  args.getWarmupRuns());
 230       phases.run("EXECUTE",
 231                  [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
 232                  [&](const benchmark::Phase &phase, uint32_t nth) {
 233                    std::cout << "... "
 234                              << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
 235                              << std::endl;
 236                  },
 237                  args.getNumRuns(), true);
 238     }
 239
 240 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 241     // dump output tensors
 242     if (!args.getDumpFilename().empty())
 243       H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
 244 #endif
 245
 246     NNPR_ENSURE_STATUS(nnfw_close_session(session));
 247
 248     // TODO Apply verbose level to result
 249
 250     // prepare result
 251     benchmark::Result result(phases);
 252
 253     // to stdout
 254     benchmark::printResult(result);
 255
 256     // to csv
 257     if (args.getWriteReport() == false)
 258       return 0;
 259
 260     // prepare csv task
 261     std::string exec_basename;
 262     std::string nnpkg_basename;
 263     std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
 264     {
 265       char buf[PATH_MAX];
 266       char *res = realpath(nnpackage_path.c_str(), buf);
 267       if (res)
 268       {
 269         nnpkg_basename = basename(buf);
 270       }
 271       else
 272       {
 273         std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
 274         exit(-1);
 275       }
 276       exec_basename = basename(argv[0]);
 277     }
 278
 279     benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
 280
 281     return 0;
 282   }
 283   catch (std::runtime_error &e)
 284   {
 285     std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
 286     exit(-1);
 287   }
 288 }