IVGCVSW-4521 Add bf16-turbo-mode option to ExecuteNetwork

author Narumol Prangnawarat <narumol.prangnawarat@arm.com>

Tue, 24 Mar 2020 13:54:05 +0000 (13:54 +0000)

committer Jim Flynn <jim.flynn@arm.com>

Tue, 24 Mar 2020 16:45:36 +0000 (16:45 +0000)
author Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Tue, 24 Mar 2020 13:54:05 +0000 (13:54 +0000)
committer Jim Flynn <jim.flynn@arm.com>
Tue, 24 Mar 2020 16:45:36 +0000 (16:45 +0000)
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp

index e9811d5..a59f580 100644 (file)
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -97,6 +97,8 @@ int main(int argc, const char* argv[])
               "Enables built in profiler. If unset, defaults to off.")
              ("visualize-optimized-model,v", po::bool_switch()->default_value(false),
               "Enables built optimized model visualizer. If unset, defaults to off.")
+            ("bf16-turbo-mode", po::bool_switch()->default_value(false), "If this option is enabled, FP32 layers, "
+             "weights and biases will be converted to BFloat16 where the backend supports it")
              ("fp16-turbo-mode,h", po::bool_switch()->default_value(false), "If this option is enabled, FP32 layers, "
               "weights and biases will be converted to FP16 where the backend supports it")
              ("threshold-time,r", po::value<double>(&thresholdTime)->default_value(0.0),
@@ -158,6 +160,7 @@ int main(int argc, const char* argv[])
      bool concurrent = vm["concurrent"].as<bool>();
      bool enableProfiling = vm["event-based-profiling"].as<bool>();
      bool enableLayerDetails = vm["visualize-optimized-model"].as<bool>();
+    bool enableBf16TurboMode = vm["bf16-turbo-mode"].as<bool>();
      bool enableFp16TurboMode = vm["fp16-turbo-mode"].as<bool>();
      bool quantizeInput = vm["quantize-input"].as<bool>();
      bool dequantizeOutput = vm["dequantize-output"].as<bool>();
@@ -166,6 +169,12 @@ int main(int argc, const char* argv[])
      bool fileOnlyExternalProfiling = vm["file-only-external-profiling"].as<bool>();
      bool parseUnsupported = vm["parse-unsupported"].as<bool>();
  
+    if (enableBf16TurboMode && enableFp16TurboMode)
+    {
+        ARMNN_LOG(fatal) << "BFloat16 and Float16 turbo mode cannot be enabled at the same time.";
+        return EXIT_FAILURE;
+    }
+
  
      // Check whether we have to load test cases from a file.
      if (CheckOption(vm, "test-cases"))
@@ -213,8 +222,8 @@ int main(int argc, const char* argv[])
              {
                  testCase.values.insert(testCase.values.begin(), executableName);
                  results.push_back(std::async(std::launch::async, RunCsvTest, std::cref(testCase), std::cref(runtime),
-                                             enableProfiling, enableFp16TurboMode, thresholdTime, printIntermediate,
-                                             enableLayerDetails, parseUnsupported));
+                                             enableProfiling, enableFp16TurboMode, enableBf16TurboMode, thresholdTime,
+                                             printIntermediate, enableLayerDetails, parseUnsupported));
              }
  
              // Check results
@@ -233,7 +242,7 @@ int main(int argc, const char* argv[])
              {
                  testCase.values.insert(testCase.values.begin(), executableName);
                  if (RunCsvTest(testCase, runtime, enableProfiling,
-                               enableFp16TurboMode, thresholdTime, printIntermediate,
+                               enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate,
                                 enableLayerDetails, parseUnsupported) != EXIT_SUCCESS)
                  {
                      return EXIT_FAILURE;
@@ -280,7 +289,7 @@ int main(int argc, const char* argv[])
  
          return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames,
                         inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
-                       outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, thresholdTime,
-                       printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, runtime);
+                       outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
+                       thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, runtime);
      }
  }
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp

index 50b1607..711f768 100644 (file)
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -91,6 +91,7 @@ struct Params
      bool                            m_IsModelBinary;
      bool                            m_VisualizePostOptimizationModel;
      bool                            m_EnableFp16TurboMode;
+    bool                            m_EnableBf16TurboMode;
      bool                            m_PrintIntermediateLayers;
      bool                            m_ParseUnsupported;
  
@@ -100,6 +101,7 @@ struct Params
          , m_IsModelBinary(true)
          , m_VisualizePostOptimizationModel(false)
          , m_EnableFp16TurboMode(false)
+        , m_EnableBf16TurboMode(false)
          , m_PrintIntermediateLayers(false)
          , m_ParseUnsupported(false)
      {}
@@ -328,6 +330,7 @@ public:
          std::string m_DynamicBackendsPath;
          bool m_VisualizePostOptimizationModel;
          bool m_EnableFp16TurboMode;
+        bool m_EnableBf16TurboMode;
          std::string m_Labels;
  
          std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
@@ -365,7 +368,10 @@ public:
                  "The file will have the same name as the model with the .dot extention.")
              ("fp16-turbo-mode", po::value<bool>(&options.m_EnableFp16TurboMode)->default_value(false),
                  "If this option is enabled FP32 layers, weights and biases will be converted "
-                "to FP16 where the backend supports it.");
+                "to FP16 where the backend supports it.")
+            ("bf16-turbo-mode", po::value<bool>(&options.m_EnableBf16TurboMode)->default_value(false),
+                "If this option is enabled FP32 layers, weights and biases will be converted "
+                "to BF16 where the backend supports it.");
      }
  
      InferenceModel(const Params& params,
@@ -401,6 +407,7 @@ public:
  
              armnn::OptimizerOptions options;
              options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
+            options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
              options.m_Debug = params.m_PrintIntermediateLayers;
  
              optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp

index 4d996fd..a0aeb8b 100644 (file)
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
@@ -379,6 +379,7 @@ struct ExecuteNetworkParams
      bool                          m_DequantizeOutput;
      bool                          m_EnableProfiling;
      bool                          m_EnableFp16TurboMode;
+    bool                          m_EnableBf16TurboMode;
      double                        m_ThresholdTime;
      bool                          m_PrintIntermediate;
      size_t                        m_SubgraphId;
@@ -424,6 +425,7 @@ int MainImpl(const ExecuteNetworkParams& params,
  
          inferenceModelParams.m_SubgraphId          = params.m_SubgraphId;
          inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
+        inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
  
          InferenceModel<TParser, TDataType> model(inferenceModelParams,
                                                   params.m_EnableProfiling,
@@ -549,6 +551,7 @@ int RunTest(const std::string& format,
              bool dequantizeOuput,
              bool enableProfiling,
              bool enableFp16TurboMode,
+            bool enableBf16TurboMode,
              const double& thresholdTime,
              bool printIntermediate,
              const size_t subgraphId,
@@ -673,6 +676,7 @@ int RunTest(const std::string& format,
      params.m_DequantizeOutput         = dequantizeOuput;
      params.m_EnableProfiling          = enableProfiling;
      params.m_EnableFp16TurboMode      = enableFp16TurboMode;
+    params.m_EnableBf16TurboMode      = enableBf16TurboMode;
      params.m_ThresholdTime            = thresholdTime;
      params.m_PrintIntermediate        = printIntermediate;
      params.m_SubgraphId               = subgraphId;
@@ -748,8 +752,9 @@ int RunTest(const std::string& format,
  }
  
  int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IRuntime>& runtime,
-               const bool enableProfiling, const bool enableFp16TurboMode, const double& thresholdTime,
-               const bool printIntermediate, bool enableLayerDetails = false, bool parseUnuspported = false)
+               const bool enableProfiling, const bool enableFp16TurboMode, const bool enableBf16TurboMode,
+               const double& thresholdTime, const bool printIntermediate, bool enableLayerDetails = false,
+               bool parseUnuspported = false)
  {
      IgnoreUnused(runtime);
      std::string modelFormat;
@@ -868,6 +873,6 @@ int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IR
  
      return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames,
                     inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames, outputTensorFiles,
-                   dequantizeOutput, enableProfiling, enableFp16TurboMode, thresholdTime, printIntermediate, subgraphId,
-                   enableLayerDetails, parseUnuspported);
+                   dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
+                   thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnuspported);
  }
author	Narumol Prangnawarat <narumol.prangnawarat@arm.com>
	Tue, 24 Mar 2020 13:54:05 +0000 (13:54 +0000)
committer	Jim Flynn <jim.flynn@arm.com>
	Tue, 24 Mar 2020 16:45:36 +0000 (16:45 +0000)
tests/ExecuteNetwork/ExecuteNetwork.cpp		patch \| blob \| history
tests/InferenceModel.hpp		patch \| blob \| history
tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp		patch \| blob \| history