IVGCVSW-5686 Add GpuAcc MLGO tuning file configuration argument

author Finn Williams <Finn.Williams@arm.com>

Thu, 11 Feb 2021 16:16:42 +0000 (16:16 +0000)

committer KeithARM <keith.davis@arm.com>

Mon, 15 Feb 2021 15:04:31 +0000 (15:04 +0000)
author Finn Williams <Finn.Williams@arm.com>
Thu, 11 Feb 2021 16:16:42 +0000 (16:16 +0000)
committer KeithARM <keith.davis@arm.com>
Mon, 15 Feb 2021 15:04:31 +0000 (15:04 +0000)
diff --git a/delegate/src/armnn_external_delegate.cpp b/delegate/src/armnn_external_delegate.cpp

index 4dba07d..edf46ef 100644 (file)
--- a/delegate/src/armnn_external_delegate.cpp
+++ b/delegate/src/armnn_external_delegate.cpp
@@ -125,6 +125,11 @@ TfLiteDelegate* tflite_plugin_create_delegate(char** options_keys,
                  armnn::BackendOptions option("GpuAcc", {{"TuningLevel", atoi(options_values[i])}});
                  options.AddBackendOption(option);
              }
+            else if (std::string(options_keys[i]) == std::string("gpu-mlgo-tuning-file"))
+            {
+                armnn::BackendOptions option("GpuAcc", {{"MLGOTuningFilePath", std::string(options_values[i])}});
+                options.AddBackendOption(option);
+            }
              else if (std::string(options_keys[i]) == std::string("gpu-tuning-file"))
              {
                  armnn::BackendOptions option("GpuAcc", {{"TuningFile", std::string(options_values[i])}});
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp

index 125f01b..9c5cca9 100644 (file)
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -21,8 +21,9 @@ namespace armnn
  struct ClBackendContext::ClContextControlWrapper
  {
      ClContextControlWrapper(arm_compute::CLTuner* tuner,
+                            arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
                              bool profilingEnabled)
-        : m_ClContextControl(tuner, profilingEnabled)
+        : m_ClContextControl(tuner, heuristicsHandle, profilingEnabled)
      {}
  
      bool Sync()
@@ -143,6 +144,7 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
      bool kernelProfiling = options.m_EnableGpuProfiling;
  
      arm_compute::CLTuner* tuner = nullptr;
+    arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr;
      bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr;
      if (useLegacyTunerAPI)
      {
@@ -197,6 +199,10 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
                  {
                      tuningLevel = ParseTuningLevel(value, defaultTuningLevel);
                  }
+                else if (name == "MLGOTuningFilePath")
+                {
+                    m_MLGOTuningFile = ParseFile(value, "");
+                }
              });
  
          // Create the tuner, in tuning mode initially.
@@ -216,13 +222,31 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options)
                  ARMNN_LOG(warning) << "Could not load GpuAcc tuner data file.";
              }
          }
+
+        if (!m_MLGOTuningFile.empty())
+        {
+            try
+            {
+                ARMNN_LOG(info) << "Loading Gpu MLGO tuning data from file: " << m_TuningFile;
+                if(m_MLGOTuner.reload_from_file(m_MLGOTuningFile.c_str()))
+                {
+                    mlgoTuner = &m_MLGOTuner;
+                }
+            }
+            catch (const std::exception& e)
+            {
+                ARMNN_LOG(warning) << "Could not load GpuAcc MLGO tuner data file.";
+            }
+        }
+
          tuner = m_Tuner.get();
      }
  
      m_ClContextControlWrapper = std::make_unique<ClContextControlWrapper>(
              tuner,
+            mlgoTuner,
              kernelProfiling
-        );
+    );
  }
  
  bool ClBackendContext::BeforeLoadNetwork(NetworkId)
diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp

index bcac0d2..af988a9 100644 (file)
--- a/src/backends/cl/ClBackendContext.hpp
+++ b/src/backends/cl/ClBackendContext.hpp
@@ -9,6 +9,7 @@
  #include <mutex>
  
  #include <arm_compute/runtime/CL/CLTuner.h>
+#include <arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h>
  
  namespace armnn
  {
@@ -35,6 +36,10 @@ private:
  
      std::unique_ptr<arm_compute::CLTuner> m_Tuner;
      std::string m_TuningFile;
+
+protected:
+    arm_compute::CLGEMMHeuristicsHandle m_MLGOTuner;
+    std::string m_MLGOTuningFile;
  };
  
  } // namespace armnn
 \ No newline at end of file
diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp

index 7ab825f..fd2d0f5 100644 (file)
--- a/src/backends/cl/ClContextControl.cpp
+++ b/src/backends/cl/ClContextControl.cpp
@@ -28,8 +28,10 @@ namespace armnn
  {
  
  ClContextControl::ClContextControl(arm_compute::CLTuner *tuner,
+                                   arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
                                     bool profilingEnabled)
      : m_Tuner(tuner)
+    , m_HeuristicsHandle(heuristicsHandle)
      , m_ProfilingEnabled(profilingEnabled)
  {
      // Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled.
@@ -156,7 +158,7 @@ void ClContextControl::DoLoadOpenClRuntime(bool updateTunedParameters)
  
      // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute.
      arm_compute::CLKernelLibrary::get().init(".", context, device);
-    arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner);
+    arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner, m_HeuristicsHandle);
  }
  
  void ClContextControl::ClearClCache()
diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp

index 2ed43bc..4a640cd 100644 (file)
--- a/src/backends/cl/ClContextControl.hpp
+++ b/src/backends/cl/ClContextControl.hpp
@@ -7,6 +7,7 @@
  #include "armnn/IRuntime.hpp"
  
  #include <arm_compute/runtime/CL/CLTuner.h>
+#include <arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h>
  
  namespace armnn
  {
@@ -17,6 +18,7 @@ class ClContextControl
  public:
  
      ClContextControl(arm_compute::CLTuner* = nullptr,
+                     arm_compute::CLGEMMHeuristicsHandle* = nullptr,
                       bool profilingEnabled = false);
  
      virtual ~ClContextControl();
@@ -35,6 +37,7 @@ private:
      void DoLoadOpenClRuntime(bool updateTunedParameters);
  
      arm_compute::CLTuner* m_Tuner;
+    arm_compute::CLGEMMHeuristicsHandle* m_HeuristicsHandle;
  
      bool m_ProfilingEnabled;
  };
@@ -51,6 +54,7 @@ public:
      TuningLevel m_TuningLevel;
  
      arm_compute::CLTuner m_Tuner;
+    arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle;
  };
  
  } // namespace armnn
diff --git a/src/backends/cl/test/ClContextControlFixture.hpp b/src/backends/cl/test/ClContextControlFixture.hpp

index 0371c69..14970be 100644 (file)
--- a/src/backends/cl/test/ClContextControlFixture.hpp
+++ b/src/backends/cl/test/ClContextControlFixture.hpp
@@ -13,7 +13,7 @@ struct ClContextControlFixtureBase
  {
      // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case
      ClContextControlFixtureBase()
-        : m_ClContextControl(nullptr, ProfilingEnabled) {}
+        : m_ClContextControl(nullptr, nullptr, ProfilingEnabled) {}
  
      armnn::ClContextControl m_ClContextControl;
  };
diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp

index 2797080..dddc5aa 100644 (file)
--- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp
+++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
@@ -10,6 +10,10 @@
  #include <test/GraphUtils.hpp>
  
  #include <cl/ClWorkloadFactory.hpp>
+#include <cl/ClBackendContext.hpp>
+
+#include <Filesystem.hpp>
+
  
  #include <boost/test/unit_test.hpp>
  
@@ -130,4 +134,113 @@ BOOST_AUTO_TEST_CASE(FastMathEnabledTestOnGpuAcc)
      BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsBool() == true);
  }
  
+BOOST_AUTO_TEST_CASE(CheckMLGOTuningFile)
+{
+    class ClBackendContextTestClass : public armnn::ClBackendContext
+    {
+    public:
+        ClBackendContextTestClass(const armnn::IRuntime::CreationOptions &options) : ClBackendContext(options)
+        {}
+
+        bool call_reload_from_file()
+        {
+            return m_MLGOTuner.reload_from_file(m_MLGOTuningFile);
+        }
+    };
+
+    const std::string validText{
+            "<header>\n"
+            "gemm-version, [1,2,1]\n"
+            "ip-type,gpu\n"
+            "</header>\n"
+            "<heuristics-table>\n"
+            "0, g71 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]\n"
+            "1, g71 , 8, f32, best-performance, static, gemm-config-reshaped-only-rhs, [m,n,k,n]\n"
+            "2, g71 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]\n"
+            "3, g71 , 8, qasymm8, best-performance, static, gemm-type, [m,n,k,n]\n"
+            "4, g71 , 8, qasymm8, best-performance, static, gemm-config-reshaped-only-rhs, [m,n,k,n]\n"
+            "5, g71 , 8, qasymm8, best-performance, static, gemm-config-native, [m,n,k,n]\n"
+            "</heuristics-table>\n"
+            "<heuristic, 0>\n"
+            "b , 0, var, r_mn, >=, num, 2., 1, 2\n"
+            "l , 1, gemm-type, reshaped\n"
+            "l , 2, gemm-type, reshaped-only-rhs\n"
+            "</heuristic>\n"
+            "<heuristic, 1>\n"
+            "l ,0,gemm-config-reshaped-only-rhs, [2, 4,4,4,1,1,0]\n"
+            "</heuristic>\n"
+            "<heuristic, 2>\n"
+            "l ,0,gemm-config-reshaped,[4,2,8,16,16,1,0,1,0]\n"
+            "</heuristic>\n"
+            "<heuristic, 3>\n"
+            "l , 0, gemm-type, native\n"
+            "</heuristic>\n"
+            "<heuristic, 4>\n"
+            "l ,0,gemm-config-reshaped-only-rhs, [2, 4,4,4,1,1,0]\n"
+            "</heuristic>\n"
+            "<heuristic, 5>\n"
+            "l ,0,gemm-config-native,[4,2,8]\n"
+            "</heuristic>\n"};
+
+    const std::string invalidText{"ʕノ•ᴥ•ʔノ ︵ ┻━┻"};
+
+    fs::path validFile = armnnUtils::Filesystem::NamedTempFile("validFile.mlgo");
+    fs::path invalidFile = armnnUtils::Filesystem::NamedTempFile("invalidFile.mlgo");
+
+    try
+    {
+        std::ofstream ofs1{validFile};
+        ofs1 << validText << std::endl;
+        ofs1.close();
+
+        std::ofstream ofs2{invalidFile};
+        ofs2 << invalidText << std::endl;
+        ofs2.close();
+    }
+    catch (std::exception &e)
+    {
+        std::cerr << "Unable to write to file at location [" << validFile.c_str() << "] : " << e.what() << std::endl;
+        BOOST_TEST(false);
+    }
+
+    armnn::IRuntime::CreationOptions creationOptions1;
+    armnn::BackendOptions validOptions
+            {
+                    "GpuAcc",
+                    {
+                            {"MLGOTuningFilePath", validFile.c_str()}
+                    }
+            };
+
+    creationOptions1.m_BackendOptions.emplace_back(validOptions);
+    ClBackendContextTestClass clBackendContext1(creationOptions1);
+    BOOST_TEST(clBackendContext1.call_reload_from_file());
+
+    armnn::BackendOptions invalidOptions
+            {
+                    "GpuAcc",
+                    {
+                            {"MLGOTuningFilePath", invalidFile.c_str()}
+                    }
+            };
+
+    armnn::IRuntime::CreationOptions creationOptions2;
+    creationOptions2.m_BackendOptions.emplace_back(invalidOptions);
+    ClBackendContextTestClass clBackendContext2(creationOptions2);
+    BOOST_TEST(clBackendContext2.call_reload_from_file() == false);
+
+    armnn::BackendOptions invalidPathOptions
+            {
+                    "GpuAcc",
+                    {
+                            {"MLGOTuningFilePath", "not_a_real_file_path"}
+                    }
+            };
+
+    armnn::IRuntime::CreationOptions creationOptions3;
+    creationOptions3.m_BackendOptions.emplace_back(invalidPathOptions);
+    ClBackendContextTestClass clBackendContext3(creationOptions3);
+    BOOST_TEST(clBackendContext3.call_reload_from_file() == false);
+}
+
  BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/backends/cl/test/OpenClTimerTest.cpp b/src/backends/cl/test/OpenClTimerTest.cpp

index 68a356a..0e1f28e 100644 (file)
--- a/src/backends/cl/test/OpenClTimerTest.cpp
+++ b/src/backends/cl/test/OpenClTimerTest.cpp
@@ -32,7 +32,7 @@ struct OpenClFixture
      // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case.
      // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution
      // times from OpenClTimer.
-    OpenClFixture() : m_ClContextControl(nullptr, true) {}
+    OpenClFixture() : m_ClContextControl(nullptr, nullptr, true) {}
      ~OpenClFixture() {}
  
      ClContextControl m_ClContextControl;
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp

index c19f519..e3ca22e 100644 (file)
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -309,6 +309,7 @@ int MainImpl(const ExecuteNetworkParams& params,
          inferenceModelParams.m_SaveCachedNetwork              = params.m_SaveCachedNetwork;
          inferenceModelParams.m_CachedNetworkFilePath          = params.m_CachedNetworkFilePath;
          inferenceModelParams.m_NumberOfThreads                = params.m_NumberOfThreads;
+        inferenceModelParams.m_MLGOTuningFilePath             = params.m_MLGOTuningFilePath;
  
          for(const std::string& inputName: params.m_InputNames)
          {
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp

index 830270a..a30ce57 100644 (file)
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -46,6 +46,7 @@ struct ExecuteNetworkParams
      double                        m_ThresholdTime;
      int                           m_TuningLevel;
      std::string                   m_TuningPath;
+    std::string                   m_MLGOTuningFilePath;
  
      // Ensures that the parameters for ExecuteNetwork fit together
      void ValidateParams();
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp

index 73da1f1..0eaf8da 100644 (file)
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -310,7 +310,11 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
  
                  ("tuning-path",
                   "Path to tuning file. Enables use of CL tuning",
-                 cxxopts::value<std::string>(m_ExNetParams.m_TuningPath));
+                 cxxopts::value<std::string>(m_ExNetParams.m_TuningPath))
+
+                ("MLGOTuningFilePath",
+                "Path to tuning file. Enables use of CL MLGO tuning",
+                cxxopts::value<std::string>(m_ExNetParams.m_MLGOTuningFilePath));
  
          m_CxxOptions.add_options("d) Profiling")
                  ("a,enable-external-profiling",
@@ -427,7 +431,8 @@ void ProgramOptions::ParseOptions(int ac, const char* av[])
                  {
                      {"TuningLevel", m_ExNetParams.m_TuningLevel},
                      {"TuningFile", m_ExNetParams.m_TuningPath.c_str()},
-                    {"KernelProfilingEnabled", m_ExNetParams.m_EnableProfiling}
+                    {"KernelProfilingEnabled", m_ExNetParams.m_EnableProfiling},
+                    {"MLGOTuningFilePath", m_ExNetParams.m_MLGOTuningFilePath}
                  }
              }
          );
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp

index d20bb22..7996262 100644 (file)
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -100,6 +100,8 @@ struct Params
      bool                            m_SaveCachedNetwork;
      std::string                     m_CachedNetworkFilePath;
      unsigned int                    m_NumberOfThreads;
+    std::string                     m_MLGOTuningFilePath;
+
  
      Params()
          : m_ComputeDevices{}
@@ -115,6 +117,7 @@ struct Params
          , m_SaveCachedNetwork(false)
          , m_CachedNetworkFilePath("")
          , m_NumberOfThreads(0)
+        , m_MLGOTuningFilePath("")
      {}
  };
  
@@ -434,8 +437,10 @@ public:
              {
                  { "FastMathEnabled", params.m_EnableFastMath },
                  { "SaveCachedNetwork", params.m_SaveCachedNetwork },
-                { "CachedNetworkFilePath", params.m_CachedNetworkFilePath }
+                { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
+                { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
              });
+
              armnn::BackendOptions cpuAcc("CpuAcc",
              {
                  { "FastMathEnabled", params.m_EnableFastMath },
author	Finn Williams <Finn.Williams@arm.com>
	Thu, 11 Feb 2021 16:16:42 +0000 (16:16 +0000)
committer	KeithARM <keith.davis@arm.com>
	Mon, 15 Feb 2021 15:04:31 +0000 (15:04 +0000)
delegate/src/armnn_external_delegate.cpp		patch \| blob \| history
src/backends/cl/ClBackendContext.cpp		patch \| blob \| history
src/backends/cl/ClBackendContext.hpp		patch \| blob \| history
src/backends/cl/ClContextControl.cpp		patch \| blob \| history
src/backends/cl/ClContextControl.hpp		patch \| blob \| history
src/backends/cl/test/ClContextControlFixture.hpp		patch \| blob \| history
src/backends/cl/test/ClOptimizedNetworkTests.cpp		patch \| blob \| history
src/backends/cl/test/OpenClTimerTest.cpp		patch \| blob \| history
tests/ExecuteNetwork/ExecuteNetwork.cpp		patch \| blob \| history
tests/ExecuteNetwork/ExecuteNetworkParams.hpp		patch \| blob \| history
tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp		patch \| blob \| history
tests/InferenceModel.hpp		patch \| blob \| history