tests/InferenceModel.hpp

   1 //
   2 // Copyright © 2017 Arm Ltd. All rights reserved.
   3 // SPDX-License-Identifier: MIT
   4 //
   5 #pragma once
   6 #include <armnn/ArmNN.hpp>
   7
   8 #if defined(ARMNN_SERIALIZER)
   9 #include "armnnDeserializer/IDeserializer.hpp"
  10 #endif
  11 #if defined(ARMNN_TF_LITE_PARSER)
  12 #include <armnnTfLiteParser/ITfLiteParser.hpp>
  13 #endif
  14 #if defined(ARMNN_ONNX_PARSER)
  15 #include <armnnOnnxParser/IOnnxParser.hpp>
  16 #endif
  17
  18 #include <HeapProfiling.hpp>
  19 #include <TensorIOUtils.hpp>
  20
  21 #include <backendsCommon/BackendRegistry.hpp>
  22
  23 #include <boost/algorithm/string/join.hpp>
  24 #include <boost/exception/exception.hpp>
  25 #include <boost/exception/diagnostic_information.hpp>
  26 #include <boost/log/trivial.hpp>
  27 #include <boost/format.hpp>
  28 #include <boost/program_options.hpp>
  29 #include <boost/filesystem.hpp>
  30 #include <boost/lexical_cast.hpp>
  31 #include <boost/variant.hpp>
  32
  33 #include <algorithm>
  34 #include <chrono>
  35 #include <iterator>
  36 #include <fstream>
  37 #include <map>
  38 #include <string>
  39 #include <vector>
  40 #include <type_traits>
  41
  42 namespace
  43 {
  44
  45 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
  46                                            armnn::Optional<std::string&> invalidBackendIds = armnn::EmptyOptional())
  47 {
  48     if (backendIds.empty())
  49     {
  50         return false;
  51     }
  52
  53     armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds();
  54
  55     bool allValid = true;
  56     for (const auto& backendId : backendIds)
  57     {
  58         if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
  59         {
  60             allValid = false;
  61             if (invalidBackendIds)
  62             {
  63                 if (!invalidBackendIds.value().empty())
  64                 {
  65                     invalidBackendIds.value() += ", ";
  66                 }
  67                 invalidBackendIds.value() += backendId;
  68             }
  69         }
  70     }
  71     return allValid;
  72 }
  73
  74 } // anonymous namespace
  75
  76 namespace InferenceModelInternal
  77 {
  78 using BindingPointInfo = armnn::BindingPointInfo;
  79
  80 using QuantizationParams = std::pair<float,int32_t>;
  81
  82 struct Params
  83 {
  84     std::string                     m_ModelPath;
  85     std::vector<std::string>        m_InputBindings;
  86     std::vector<armnn::TensorShape> m_InputShapes;
  87     std::vector<std::string>        m_OutputBindings;
  88     std::vector<armnn::BackendId>   m_ComputeDevices;
  89     size_t                          m_SubgraphId;
  90     bool                            m_IsModelBinary;
  91     bool                            m_VisualizePostOptimizationModel;
  92     bool                            m_EnableFp16TurboMode;
  93
  94     Params()
  95         : m_ComputeDevices{"CpuRef"}
  96         , m_SubgraphId(0)
  97         , m_IsModelBinary(true)
  98         , m_VisualizePostOptimizationModel(false)
  99         , m_EnableFp16TurboMode(false)
 100     {}
 101 };
 102
 103 } // namespace InferenceModelInternal
 104
 105 template <typename IParser>
 106 struct CreateNetworkImpl
 107 {
 108 public:
 109     using Params = InferenceModelInternal::Params;
 110
 111     static armnn::INetworkPtr Create(const Params& params,
 112                                      std::vector<armnn::BindingPointInfo>& inputBindings,
 113                                      std::vector<armnn::BindingPointInfo>& outputBindings)
 114     {
 115         const std::string& modelPath = params.m_ModelPath;
 116
 117         // Create a network from a file on disk
 118         auto parser(IParser::Create());
 119
 120         std::map<std::string, armnn::TensorShape> inputShapes;
 121         if (!params.m_InputShapes.empty())
 122         {
 123             const size_t numInputShapes   = params.m_InputShapes.size();
 124             const size_t numInputBindings = params.m_InputBindings.size();
 125             if (numInputShapes < numInputBindings)
 126             {
 127                 throw armnn::Exception(boost::str(boost::format(
 128                     "Not every input has its tensor shape specified: expected=%1%, got=%2%")
 129                     % numInputBindings % numInputShapes));
 130             }
 131
 132             for (size_t i = 0; i < numInputShapes; i++)
 133             {
 134                 inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
 135             }
 136         }
 137
 138         std::vector<std::string> requestedOutputs = params.m_OutputBindings;
 139         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
 140
 141         {
 142             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
 143             // Handle text and binary input differently by calling the corresponding parser function
 144             network = (params.m_IsModelBinary ?
 145                 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
 146                 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
 147         }
 148
 149         for (const std::string& inputLayerName : params.m_InputBindings)
 150         {
 151             inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
 152         }
 153
 154         for (const std::string& outputLayerName : params.m_OutputBindings)
 155         {
 156             outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
 157         }
 158
 159         return network;
 160     }
 161 };
 162
 163 #if defined(ARMNN_SERIALIZER)
 164 template <>
 165 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
 166 {
 167 public:
 168     using IParser          = armnnDeserializer::IDeserializer;
 169     using Params           = InferenceModelInternal::Params;
 170
 171     static armnn::INetworkPtr Create(const Params& params,
 172                                      std::vector<armnn::BindingPointInfo>& inputBindings,
 173                                      std::vector<armnn::BindingPointInfo>& outputBindings)
 174     {
 175         auto parser(IParser::Create());
 176         BOOST_ASSERT(parser);
 177
 178         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
 179
 180         {
 181             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
 182
 183             boost::system::error_code errorCode;
 184             boost::filesystem::path pathToFile(params.m_ModelPath);
 185             if (!boost::filesystem::exists(pathToFile, errorCode))
 186             {
 187                 throw armnn::FileNotFoundException(boost::str(
 188                                                    boost::format("Cannot find the file (%1%) errorCode: %2% %3%") %
 189                                                    params.m_ModelPath %
 190                                                    errorCode %
 191                                                    CHECK_LOCATION().AsString()));
 192             }
 193             std::ifstream file(params.m_ModelPath, std::ios::binary);
 194
 195             network = parser->CreateNetworkFromBinary(file);
 196         }
 197
 198         unsigned int subgraphId = boost::numeric_cast<unsigned int>(params.m_SubgraphId);
 199
 200         for (const std::string& inputLayerName : params.m_InputBindings)
 201         {
 202             armnnDeserializer::BindingPointInfo inputBinding =
 203                 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
 204             inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
 205         }
 206
 207         for (const std::string& outputLayerName : params.m_OutputBindings)
 208         {
 209             armnnDeserializer::BindingPointInfo outputBinding =
 210                 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
 211             outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
 212         }
 213
 214         return network;
 215     }
 216 };
 217 #endif
 218
 219 #if defined(ARMNN_TF_LITE_PARSER)
 220 template <>
 221 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
 222 {
 223 public:
 224     using IParser = armnnTfLiteParser::ITfLiteParser;
 225     using Params = InferenceModelInternal::Params;
 226
 227     static armnn::INetworkPtr Create(const Params& params,
 228                                      std::vector<armnn::BindingPointInfo>& inputBindings,
 229                                      std::vector<armnn::BindingPointInfo>& outputBindings)
 230     {
 231         const std::string& modelPath = params.m_ModelPath;
 232
 233         // Create a network from a file on disk
 234         auto parser(IParser::Create());
 235
 236         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
 237
 238         {
 239             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
 240             network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
 241         }
 242
 243         for (const std::string& inputLayerName : params.m_InputBindings)
 244         {
 245             armnn::BindingPointInfo inputBinding =
 246                 parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
 247             inputBindings.push_back(inputBinding);
 248         }
 249
 250         for (const std::string& outputLayerName : params.m_OutputBindings)
 251         {
 252             armnn::BindingPointInfo outputBinding =
 253                 parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
 254             outputBindings.push_back(outputBinding);
 255         }
 256
 257         return network;
 258     }
 259 };
 260 #endif
 261
 262 #if defined(ARMNN_ONNX_PARSER)
 263 template <>
 264 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
 265 {
 266 public:
 267     using IParser = armnnOnnxParser::IOnnxParser;
 268     using Params = InferenceModelInternal::Params;
 269     using BindingPointInfo = InferenceModelInternal::BindingPointInfo;
 270
 271     static armnn::INetworkPtr Create(const Params& params,
 272                                      std::vector<BindingPointInfo>& inputBindings,
 273                                      std::vector<BindingPointInfo>& outputBindings)
 274     {
 275         const std::string& modelPath = params.m_ModelPath;
 276
 277         // Create a network from a file on disk
 278         auto parser(IParser::Create());
 279
 280         armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
 281
 282         {
 283             ARMNN_SCOPED_HEAP_PROFILING("Parsing");
 284             network = (params.m_IsModelBinary ?
 285                 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
 286                 parser->CreateNetworkFromTextFile(modelPath.c_str()));
 287         }
 288
 289         for (const std::string& inputLayerName : params.m_InputBindings)
 290         {
 291             BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
 292             inputBindings.push_back(inputBinding);
 293         }
 294
 295         for (const std::string& outputLayerName : params.m_OutputBindings)
 296         {
 297             BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
 298             outputBindings.push_back(outputBinding);
 299         }
 300
 301         return network;
 302     }
 303 };
 304 #endif
 305
 306
 307
 308 template <typename IParser, typename TDataType>
 309 class InferenceModel
 310 {
 311 public:
 312     using DataType           = TDataType;
 313     using Params             = InferenceModelInternal::Params;
 314     using QuantizationParams = InferenceModelInternal::QuantizationParams;
 315     using TContainer         = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
 316
 317     struct CommandLineOptions
 318     {
 319         std::string m_ModelDir;
 320         std::vector<std::string> m_ComputeDevices;
 321         bool m_VisualizePostOptimizationModel;
 322         bool m_EnableFp16TurboMode;
 323         std::string m_Labels;
 324
 325         std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
 326         {
 327             std::vector<armnn::BackendId> backendIds;
 328             std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
 329             return backendIds;
 330         }
 331     };
 332
 333     static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options)
 334     {
 335         namespace po = boost::program_options;
 336
 337         const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
 338
 339         const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
 340                                           + armnn::BackendRegistryInstance().GetBackendIdsAsString();
 341
 342         desc.add_options()
 343             ("model-dir,m", po::value<std::string>(&options.m_ModelDir)->required(),
 344                 "Path to directory containing model files (.caffemodel/.prototxt/.tflite)")
 345             ("compute,c", po::value<std::vector<std::string>>(&options.m_ComputeDevices)->
 346                 default_value(defaultComputes, boost::algorithm::join(defaultComputes, ", "))->
 347                 multitoken(), backendsMessage.c_str())
 348             ("labels,l", po::value<std::string>(&options.m_Labels),
 349                 "Text file containing one image filename - correct label pair per line, "
 350                 "used to test the accuracy of the network.")
 351             ("visualize-optimized-model,v",
 352                 po::value<bool>(&options.m_VisualizePostOptimizationModel)->default_value(false),
 353              "Produce a dot file useful for visualizing the graph post optimization."
 354                 "The file will have the same name as the model with the .dot extention.")
 355             ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
 356                 "If this option is enabled FP32 layers, weights and biases will be converted "
 357                 "to FP16 where the backend supports it.");
 358     }
 359
 360     InferenceModel(const Params& params,
 361                    bool enableProfiling,
 362                    const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
 363         : m_EnableProfiling(enableProfiling)
 364     {
 365         if (runtime)
 366         {
 367             m_Runtime = runtime;
 368         }
 369         else
 370         {
 371             armnn::IRuntime::CreationOptions options;
 372             options.m_EnableGpuProfiling = m_EnableProfiling;
 373             m_Runtime = std::move(armnn::IRuntime::Create(options));
 374         }
 375
 376         std::string invalidBackends;
 377         if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
 378         {
 379             throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
 380         }
 381
 382         armnn::INetworkPtr network =
 383             CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
 384
 385         armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}};
 386         {
 387             ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
 388
 389             armnn::OptimizerOptions options;
 390             options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
 391
 392             optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
 393             if (!optNet)
 394             {
 395                 throw armnn::Exception("Optimize returned nullptr");
 396             }
 397         }
 398
 399         if (params.m_VisualizePostOptimizationModel)
 400         {
 401             boost::filesystem::path filename = params.m_ModelPath;
 402             filename.replace_extension("dot");
 403             std::fstream file(filename.c_str(), std::ios_base::out);
 404             optNet->SerializeToDot(file);
 405         }
 406
 407         armnn::Status ret;
 408         {
 409             ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
 410             ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet));
 411         }
 412
 413         if (ret == armnn::Status::Failure)
 414         {
 415             throw armnn::Exception("IRuntime::LoadNetwork failed");
 416         }
 417     }
 418
 419     void CheckInputIndexIsValid(unsigned int inputIndex) const
 420     {
 421         if (m_InputBindings.size() < inputIndex + 1)
 422         {
 423             throw armnn::Exception(boost::str(boost::format("Input index out of range: %1%") % inputIndex));
 424         }
 425     }
 426
 427     void CheckOutputIndexIsValid(unsigned int outputIndex) const
 428     {
 429         if (m_OutputBindings.size() < outputIndex + 1)
 430         {
 431             throw armnn::Exception(boost::str(boost::format("Output index out of range: %1%") % outputIndex));
 432         }
 433     }
 434
 435     unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
 436     {
 437         CheckOutputIndexIsValid(outputIndex);
 438         return m_OutputBindings[outputIndex].second.GetNumElements();
 439     }
 440
 441     std::chrono::duration<double, std::milli> Run(
 442             const std::vector<TContainer>& inputContainers,
 443             std::vector<TContainer>& outputContainers)
 444     {
 445         for (unsigned int i = 0; i < outputContainers.size(); ++i)
 446         {
 447             const unsigned int expectedOutputDataSize = GetOutputSize(i);
 448
 449             boost::apply_visitor([expectedOutputDataSize, i](auto&& value)
 450             {
 451                 const unsigned int actualOutputDataSize   = boost::numeric_cast<unsigned int>(value.size());
 452                 if (actualOutputDataSize < expectedOutputDataSize)
 453                 {
 454                     unsigned int outputIndex = boost::numeric_cast<unsigned int>(i);
 455                     throw armnn::Exception(
 456                             boost::str(boost::format("Not enough data for output #%1%: expected "
 457                             "%2% elements, got %3%") % outputIndex % expectedOutputDataSize % actualOutputDataSize));
 458                 }
 459             },
 460             outputContainers[i]);
 461         }
 462
 463         std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
 464         if (profiler)
 465         {
 466             profiler->EnableProfiling(m_EnableProfiling);
 467         }
 468
 469         // Start timer to record inference time in EnqueueWorkload (in milliseconds)
 470         const auto start_time = GetCurrentTime();
 471
 472         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
 473                                                        MakeInputTensors(inputContainers),
 474                                                        MakeOutputTensors(outputContainers));
 475
 476         const auto end_time = GetCurrentTime();
 477
 478         // if profiling is enabled print out the results
 479         if (profiler && profiler->IsProfilingEnabled())
 480         {
 481             profiler->Print(std::cout);
 482         }
 483
 484         if (ret == armnn::Status::Failure)
 485         {
 486             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
 487         }
 488         else
 489         {
 490             return std::chrono::duration<double, std::milli>(end_time - start_time);
 491         }
 492     }
 493
 494     const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
 495     {
 496         CheckInputIndexIsValid(inputIndex);
 497         return m_InputBindings[inputIndex];
 498     }
 499
 500     const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
 501     {
 502         return m_InputBindings;
 503     }
 504
 505     const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
 506     {
 507         CheckOutputIndexIsValid(outputIndex);
 508         return m_OutputBindings[outputIndex];
 509     }
 510
 511     const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
 512     {
 513         return m_OutputBindings;
 514     }
 515
 516     QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
 517     {
 518         CheckOutputIndexIsValid(outputIndex);
 519         return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
 520                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
 521     }
 522
 523     QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
 524     {
 525         CheckInputIndexIsValid(inputIndex);
 526         return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
 527                               m_InputBindings[inputIndex].second.GetQuantizationOffset());
 528     }
 529
 530     std::vector<QuantizationParams> GetAllQuantizationParams() const
 531     {
 532         std::vector<QuantizationParams> quantizationParams;
 533         for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
 534         {
 535             quantizationParams.push_back(GetQuantizationParams(i));
 536         }
 537         return quantizationParams;
 538     }
 539
 540 private:
 541     armnn::NetworkId m_NetworkIdentifier;
 542     std::shared_ptr<armnn::IRuntime> m_Runtime;
 543
 544     std::vector<armnn::BindingPointInfo> m_InputBindings;
 545     std::vector<armnn::BindingPointInfo> m_OutputBindings;
 546     bool m_EnableProfiling;
 547
 548     template<typename TContainer>
 549     armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
 550     {
 551         return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
 552     }
 553
 554     template<typename TContainer>
 555     armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
 556     {
 557         return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
 558     }
 559
 560     std::chrono::high_resolution_clock::time_point GetCurrentTime()
 561     {
 562         return std::chrono::high_resolution_clock::now();
 563     }
 564
 565     std::chrono::duration<double, std::milli> GetTimeDuration(
 566             std::chrono::high_resolution_clock::time_point& start_time,
 567             std::chrono::high_resolution_clock::time_point& end_time)
 568     {
 569         return std::chrono::duration<double, std::milli>(end_time - start_time);
 570     }
 571
 572 };