Adding more performance metrics

author alered01 <Alex.Redshaw@arm.com>

Thu, 7 May 2020 13:58:29 +0000 (14:58 +0100)

committer Alex Redshaw <Alex.Redshaw@arm.com>

Fri, 22 May 2020 11:05:07 +0000 (11:05 +0000)
author alered01 <Alex.Redshaw@arm.com>
Thu, 7 May 2020 13:58:29 +0000 (14:58 +0100)
committer Alex Redshaw <Alex.Redshaw@arm.com>
Fri, 22 May 2020 11:05:07 +0000 (11:05 +0000)
diff --git a/include/armnn/utility/Timer.hpp b/include/armnn/utility/Timer.hpp

new file mode 100644 (file)

index 0000000..daf689e
--- /dev/null
+++ b/include/armnn/utility/Timer.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <chrono>
+#include <iomanip>
+
+namespace armnn
+{
+
+inline std::chrono::high_resolution_clock::time_point GetTimeNow()
+{
+    return std::chrono::high_resolution_clock::now();
+}
+
+inline std::chrono::duration<double, std::milli> GetTimeDuration(
+        std::chrono::high_resolution_clock::time_point start_time)
+{
+    return std::chrono::duration<double, std::milli>(GetTimeNow() - start_time);
+}
+
+}
+\ No newline at end of file
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp

index dbdd409..b1b7d51 100644 (file)
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -7,6 +7,7 @@
  #include <armnn/Version.hpp>
  #include <armnn/BackendRegistry.hpp>
  #include <armnn/Logging.hpp>
+#include <armnn/utility/Timer.hpp>
  
  #include <armnn/backends/IBackendContext.hpp>
  #include <backendsCommon/DynamicBackendUtils.hpp>
@@ -171,6 +172,7 @@ Runtime::Runtime(const CreationOptions& options)
      : m_NetworkIdCounter(0),
        m_ProfilingService(*this)
  {
+    const auto start_time = armnn::GetTimeNow();
      ARMNN_LOG(info) << "ArmNN v" << ARMNN_VERSION << "\n";
  
      if ( options.m_ProfilingOptions.m_TimelineEnabled && !options.m_ProfilingOptions.m_EnableProfiling )
@@ -225,10 +227,14 @@ Runtime::Runtime(const CreationOptions& options)
      m_ProfilingService.ConfigureProfilingService(options.m_ProfilingOptions);
  
      m_DeviceSpec.AddSupportedBackends(supportedBackends);
+
+    ARMNN_LOG(info) << "Initialization time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n";
  }
  
  Runtime::~Runtime()
  {
+    const auto start_time = armnn::GetTimeNow();
      std::vector<int> networkIDs;
      try
      {
@@ -272,6 +278,8 @@ Runtime::~Runtime()
      m_BackendContexts.clear();
  
      BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional());
+    ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n";
  }
  
  LoadedNetwork* Runtime::GetLoadedNetworkPtr(NetworkId networkId) const
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp

index bfe93bd..42f42b3 100644 (file)
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -79,7 +79,7 @@ TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defau
  {
      if (value.IsInt())
      {
-        int v = value.IsInt();
+        int v = value.AsInt();
          if (v > static_cast<int>(TuningLevel::Exhaustive) ||
              v < static_cast<int>(TuningLevel::None))
          {
@@ -218,18 +218,18 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
  
          ConfigureTuner(*(m_Tuner.get()), tuningLevel);
  
-        if (!m_TuningFile.empty())
+        if (!m_TuningFile.empty() && tuningLevel == TuningLevel::None)
          {
              try
              {
                  m_Tuner->load_from_file(m_TuningFile.c_str());
-            } catch (const std::exception& e)
+            }
+            catch (const std::exception& e)
              {
                  ARMNN_LOG(warning) << "Could not load GpuAcc tuner data file.";
              }
-
-            tuner = m_Tuner.get();
          }
+        tuner = m_Tuner.get();
      }
  
      m_ClContextControlWrapper = std::make_unique<ClContextControlWrapper>(
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp

index 57b8692..66d8e13 100644 (file)
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -35,6 +35,10 @@ int main(int argc, const char* argv[])
      uint32_t counterCapturePeriod;
      std::string fileFormat;
  
+    size_t iterations = 1;
+    int tuningLevel = 0;
+    std::string tuningPath;
+
      double thresholdTime = 0.0;
  
      size_t subgraphId = 0;
@@ -121,6 +125,14 @@ int main(int argc, const char* argv[])
               "If profiling is enabled in 'file-only' mode this is the capture period that will be used in the test")
              ("file-format", po::value(&fileFormat)->default_value("binary"),
               "If profiling is enabled specifies the output file format")
+            ("iterations", po::value<size_t>(&iterations)->default_value(1),
+             "Number of iterations to run the network for, default is set to 1")
+            ("tuning-path", po::value(&tuningPath),
+            "Path to tuning file. Enables use of CL tuning")
+            ("tuning-level", po::value<int>(&tuningLevel)->default_value(0),
+             "Sets the tuning level which enables a tuning run which will update/create a tuning file. "
+             "Available options are: 1 (Rapid), 2 (Normal), 3 (Exhaustive). "
+             "Requires tuning-path to be set, default is set to 0 (No tuning run)")
              ("parse-unsupported", po::bool_switch()->default_value(false),
                  "Add unsupported operators as stand-in layers (where supported by parser)");
      }
@@ -275,6 +287,33 @@ int main(int argc, const char* argv[])
          // Remove duplicates from the list of compute devices.
          RemoveDuplicateDevices(computeDevices);
  
+#if defined(ARMCOMPUTECL_ENABLED)
+        std::shared_ptr<armnn::IGpuAccTunedParameters> tuned_params;
+
+        if (tuningPath != "")
+        {
+            if (tuningLevel != 0)
+            {
+                RunCLTuning(tuningPath, tuningLevel, modelFormat, inputTensorShapes, computeDevices,
+                    dynamicBackendsPath, modelPath, inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput,
+                    outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling,
+                    enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId,
+                    enableLayerDetails, parseUnsupported);
+            }
+            ARMNN_LOG(info) << "Using tuning params: " << tuningPath << "\n";
+            options.m_BackendOptions.emplace_back(
+                armnn::BackendOptions
+                {
+                    "GpuAcc",
+                    {
+                        {"TuningLevel", 0},
+                        {"TuningFile", tuningPath.c_str()},
+                        {"KernelProfilingEnabled", enableProfiling}
+                    }
+                }
+            );
+        }
+#endif
          try
          {
              CheckOptionDependencies(vm);
@@ -288,9 +327,9 @@ int main(int argc, const char* argv[])
          // Create runtime
          std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(options));
  
-        return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames,
-                       inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
-                       outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
-                       thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, runtime);
+        return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath,
+            inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
+            outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
+            thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, iterations, runtime);
      }
  }
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp

index 410bc7c..781cef4 100644 (file)
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -6,6 +6,8 @@
  #pragma once
  
  #include <armnn/ArmNN.hpp>
+#include <armnn/Logging.hpp>
+#include <armnn/utility/Timer.hpp>
  #include <armnn/BackendRegistry.hpp>
  #include <armnn/utility/Assert.hpp>
  
@@ -31,7 +33,6 @@
  #include <boost/variant.hpp>
  
  #include <algorithm>
-#include <chrono>
  #include <iterator>
  #include <fstream>
  #include <map>
@@ -399,8 +400,12 @@ public:
              throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
          }
  
+        const auto parsing_start_time = armnn::GetTimeNow();
          armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
  
+        ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
+                        << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms\n";
+
          armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
          {
              ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
@@ -410,7 +415,12 @@ public:
              options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
              options.m_Debug = params.m_PrintIntermediateLayers;
  
+            const auto optimization_start_time = armnn::GetTimeNow();
              optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
+
+            ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
+                            << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
+
              if (!optNet)
              {
                  throw armnn::Exception("Optimize returned nullptr");
@@ -494,13 +504,13 @@ public:
          }
  
          // Start timer to record inference time in EnqueueWorkload (in milliseconds)
-        const auto start_time = GetCurrentTime();
+        const auto start_time = armnn::GetTimeNow();
  
          armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
                                                         MakeInputTensors(inputContainers),
                                                         MakeOutputTensors(outputContainers));
  
-        const auto end_time = GetCurrentTime();
+        const auto duration = armnn::GetTimeDuration(start_time);
  
          // if profiling is enabled print out the results
          if (profiler && profiler->IsProfilingEnabled())
@@ -514,7 +524,7 @@ public:
          }
          else
          {
-            return std::chrono::duration<double, std::milli>(end_time - start_time);
+            return duration;
          }
      }
  
@@ -584,17 +594,4 @@ private:
      {
          return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
      }
-
-    std::chrono::high_resolution_clock::time_point GetCurrentTime()
-    {
-        return std::chrono::high_resolution_clock::now();
-    }
-
-    std::chrono::duration<double, std::milli> GetTimeDuration(
-            std::chrono::high_resolution_clock::time_point& start_time,
-            std::chrono::high_resolution_clock::time_point& end_time)
-    {
-        return std::chrono::duration<double, std::milli>(end_time - start_time);
-    }
-
  };
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp

index a922228..ec0eaf9 100644 (file)
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
@@ -4,6 +4,7 @@
  //
  #include <armnn/ArmNN.hpp>
  #include <armnn/TypesUtils.hpp>
+#include <armnn/utility/Timer.hpp>
  
  #if defined(ARMNN_SERIALIZER)
  #include "armnnDeserializer/IDeserializer.hpp"
@@ -378,7 +379,8 @@ struct ExecuteNetworkParams
  
  template<typename TParser, typename TDataType>
  int MainImpl(const ExecuteNetworkParams& params,
-             const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
+             const std::shared_ptr<armnn::IRuntime>& runtime = nullptr,
+             size_t iterations = 1)
  {
      using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
  
@@ -473,44 +475,47 @@ int MainImpl(const ExecuteNetworkParams& params,
              }
          }
  
-        // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
-        auto inference_duration = model.Run(inputDataContainers, outputDataContainers);
-
-        if (params.m_GenerateTensorData)
+        for (size_t x = 0; x < iterations; x++)
          {
-            ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
-        }
+            // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
+            auto inference_duration = model.Run(inputDataContainers, outputDataContainers);
  
-        // Print output tensors
-        const auto& infosOut = model.GetOutputBindingInfos();
-        for (size_t i = 0; i < numOutputs; i++)
-        {
-            const armnn::TensorInfo& infoOut = infosOut[i].second;
-            auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i];
-
-            TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
-                                  infoOut,
-                                  outputTensorFile,
-                                  params.m_DequantizeOutput);
-            boost::apply_visitor(printer, outputDataContainers[i]);
-        }
+            if (params.m_GenerateTensorData)
+            {
+                ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
+            }
  
-        ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
-                                << std::fixed << inference_duration.count() << " ms";
+            // Print output tensors
+            const auto& infosOut = model.GetOutputBindingInfos();
+            for (size_t i = 0; i < numOutputs; i++)
+            {
+                const armnn::TensorInfo& infoOut = infosOut[i].second;
+                auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i];
+
+                TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
+                                    infoOut,
+                                    outputTensorFile,
+                                    params.m_DequantizeOutput);
+                boost::apply_visitor(printer, outputDataContainers[i]);
+            }
  
-        // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
-        if (params.m_ThresholdTime != 0.0)
-        {
-            ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
-                                    << std::fixed << params.m_ThresholdTime << " ms";
-            auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
-            ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
-                                    << std::fixed << thresholdMinusInference << " ms" << "\n";
+            ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
+                                    << std::fixed << inference_duration.count() << " ms\n";
  
-            if (thresholdMinusInference < 0)
+            // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+            if (params.m_ThresholdTime != 0.0)
              {
-                std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
-                ARMNN_LOG(fatal) << errorMessage;
+                ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
+                                        << std::fixed << params.m_ThresholdTime << " ms";
+                auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
+                ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
+                                        << std::fixed << thresholdMinusInference << " ms" << "\n";
+
+                if (thresholdMinusInference < 0)
+                {
+                    std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
+                    ARMNN_LOG(fatal) << errorMessage;
+                }
              }
          }
      }
@@ -545,6 +550,7 @@ int RunTest(const std::string& format,
              const size_t subgraphId,
              bool enableLayerDetails = false,
              bool parseUnsupported = false,
+            const size_t iterations = 1,
              const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
  {
      std::string modelFormat = armnn::stringUtils::StringTrimCopy(format);
@@ -682,34 +688,34 @@ int RunTest(const std::string& format,
      if (modelFormat.find("armnn") != std::string::npos)
      {
  #if defined(ARMNN_SERIALIZER)
-    return MainImpl<armnnDeserializer::IDeserializer, float>(params, runtime);
+        return MainImpl<armnnDeserializer::IDeserializer, float>(params, runtime, iterations);
  #else
          ARMNN_LOG(fatal) << "Not built with serialization support.";
-    return EXIT_FAILURE;
+        return EXIT_FAILURE;
  #endif
      }
      else if (modelFormat.find("caffe") != std::string::npos)
      {
  #if defined(ARMNN_CAFFE_PARSER)
-        return MainImpl<armnnCaffeParser::ICaffeParser, float>(params, runtime);
+        return MainImpl<armnnCaffeParser::ICaffeParser, float>(params, runtime, iterations);
  #else
          ARMNN_LOG(fatal) << "Not built with Caffe parser support.";
          return EXIT_FAILURE;
  #endif
      }
      else if (modelFormat.find("onnx") != std::string::npos)
-{
+    {
  #if defined(ARMNN_ONNX_PARSER)
-    return MainImpl<armnnOnnxParser::IOnnxParser, float>(params, runtime);
+        return MainImpl<armnnOnnxParser::IOnnxParser, float>(params, runtime, iterations);
  #else
          ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
-    return EXIT_FAILURE;
+        return EXIT_FAILURE;
  #endif
      }
      else if (modelFormat.find("tensorflow") != std::string::npos)
      {
  #if defined(ARMNN_TF_PARSER)
-        return MainImpl<armnnTfParser::ITfParser, float>(params, runtime);
+        return MainImpl<armnnTfParser::ITfParser, float>(params, runtime, iterations);
  #else
          ARMNN_LOG(fatal) << "Not built with Tensorflow parser support.";
          return EXIT_FAILURE;
@@ -720,21 +726,21 @@ int RunTest(const std::string& format,
  #if defined(ARMNN_TF_LITE_PARSER)
          if (! isModelBinary)
          {
-            ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat << "'. Only 'binary' format supported \
-              for tflite files";
+            ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
+                << "'. Only 'binary' format supported for tflite files";
              return EXIT_FAILURE;
          }
-        return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(params, runtime);
+        return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(params, runtime, iterations);
  #else
-        ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat <<
-            "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
+        ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
+            << "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
          return EXIT_FAILURE;
  #endif
      }
      else
      {
-        ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat <<
-                                 "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
+        ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
+            << "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
          return EXIT_FAILURE;
      }
  }
@@ -864,3 +870,57 @@ int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IR
                     dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
                     thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnuspported);
  }
+
+#if defined(ARMCOMPUTECL_ENABLED)
+int RunCLTuning(const std::string& tuningPath,
+            const int tuningLevel,
+            const std::string& modelFormat,
+            const std::string& inputTensorShapes,
+            const vector<armnn::BackendId>& computeDevices,
+            const std::string& dynamicBackendsPath,
+            const std::string& modelPath,
+            const std::string& inputNames,
+            const std::string& inputTensorDataFilePaths,
+            const std::string& inputTypes,
+            bool quantizeInput,
+            const std::string& outputTypes,
+            const std::string& outputNames,
+            const std::string& outputTensorFiles,
+            bool dequantizeOutput,
+            bool enableProfiling,
+            bool enableFp16TurboMode,
+            bool enableBf16TurboMode,
+            const double& thresholdTime,
+            bool printIntermediate,
+            const size_t subgraphId,
+            bool enableLayerDetails = false,
+            bool parseUnsupported = false)
+{
+    armnn::IRuntime::CreationOptions options;
+    options.m_BackendOptions.emplace_back(
+        armnn::BackendOptions
+        {
+            "GpuAcc",
+            {
+                {"TuningLevel", tuningLevel},
+                {"TuningFile", tuningPath.c_str()},
+                {"KernelProfilingEnabled", enableProfiling}
+            }
+        }
+    );
+
+    std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(options));
+    const auto start_time = armnn::GetTimeNow();
+
+    ARMNN_LOG(info) << "Tuning run...\n";
+    int state = RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames,
+                        inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
+                        outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
+                        thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, 1, runtime);
+
+    ARMNN_LOG(info) << "Tuning time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n";
+
+    return state;
+}
+#endif
+\ No newline at end of file
author	alered01 <Alex.Redshaw@arm.com>
	Thu, 7 May 2020 13:58:29 +0000 (14:58 +0100)
committer	Alex Redshaw <Alex.Redshaw@arm.com>
	Fri, 22 May 2020 11:05:07 +0000 (11:05 +0000)
include/armnn/utility/Timer.hpp	[new file with mode: 0644]	patch \| blob
src/armnn/Runtime.cpp		patch \| blob \| history
src/backends/cl/ClBackendContext.cpp		patch \| blob \| history
tests/ExecuteNetwork/ExecuteNetwork.cpp		patch \| blob \| history
tests/InferenceModel.hpp		patch \| blob \| history
tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp		patch \| blob \| history