Enable optimization feature 13/253413/2
authorInki Dae <inki.dae@samsung.com>
Wed, 10 Feb 2021 01:17:41 +0000 (10:17 +0900)
committerInki Dae <inki.dae@samsung.com>
Wed, 10 Feb 2021 06:01:38 +0000 (15:01 +0900)
Enabled below two optimization features,
    - converting data type from fp32 to fp16.
    - using Winograd algorithm[1] for matrix multiplication optimization,
      which reduces matrix multiplication count.

[1] https://en.wikipedia.org/wiki/Coppersmith%E2%80%93Winograd_algorithm

Change-Id: I9d37bc8e1cbb196ce46a2725a15ff09240826aaa
Signed-off-by: Inki Dae <inki.dae@samsung.com>
src/inference_engine_armnn.cpp

index a1be6dd885701a69fa47cc76157350e1f0f212fb..96a141fb905292c410ad1ccd0198fdc47a098fd2 100644 (file)
@@ -284,39 +284,52 @@ namespace ARMNNImpl
 
                armnn::IRuntime::CreationOptions creation_options;
 
-               // The use of CLTuner is valid only in case of GpuAcc request.
-               if (mCLTuner.active && UseGpuAcc()) {
-                       std::string tune_path = model_paths[0] + ".tune";
-
-                       // file_path can be set by user. So if file_path is given then
-                       // user-given path will be passed to a given inference engine.
-                       // On the other hand, if file_path is empty then default
-                       // path - "model file path + .tune" - will be passed to the
-                       // inference engine.
-                       if (!mCLTuner.file_path.empty())
-                               tune_path = mCLTuner.file_path;
-
-                       LOGI("CLTuner tuning file name is %s", tune_path.c_str());
-
-                       // If CLTuner is read only mode then set INFERENCE_ENGINE_CLTUNER_READ
-                       // to TuningLevel.
-                       // Ps. if TuningLevel is INFERENCE_ENGINE_CLTUNER_READ then
-                       // ARMCL will read a tuned file for inference.
-                       if (mCLTuner.update == false) {
-                               LOGI("CLTuner mode is read only.");
-                               mCLTuner.tuning_mode = INFERENCE_ENGINE_CLTUNER_READ;
-                       }
+               if (UseGpuAcc()) {
+                       // The use of CLTuner is valid only in case of GpuAcc request.
+                       if (mCLTuner.active) {
+                               std::string tune_path = model_paths[0] + ".tune";
+
+                               // file_path can be set by user. So if file_path is given then
+                               // user-given path will be passed to a given inference engine.
+                               // On the other hand, if file_path is empty then default
+                               // path - "model file path + .tune" - will be passed to the
+                               // inference engine.
+                               if (!mCLTuner.file_path.empty())
+                                       tune_path = mCLTuner.file_path;
+
+                               LOGI("CLTuner tuning file name is %s", tune_path.c_str());
+
+                               // If CLTuner is read only mode then set INFERENCE_ENGINE_CLTUNER_READ
+                               // to TuningLevel.
+                               // Ps. if TuningLevel is INFERENCE_ENGINE_CLTUNER_READ then
+                               // ARMCL will read a tuned file for inference.
+                               if (mCLTuner.update == false) {
+                                       LOGI("CLTuner mode is read only.");
+                                       mCLTuner.tuning_mode = INFERENCE_ENGINE_CLTUNER_READ;
+                               }
 
-                       creation_options.m_BackendOptions.emplace_back(
-                               armnn::BackendOptions
-                               {
-                                       "GpuAcc",
+                               creation_options.m_BackendOptions.emplace_back(
+                                       armnn::BackendOptions
                                        {
-                                               {"TuningLevel", static_cast<int>(ConvertTuningType(mCLTuner.tuning_mode))},
-                                               {"TuningFile", tune_path.c_str()}
+                                               "GpuAcc",
+                                               {
+                                                       {"FastMathEnabled", true},
+                                                       {"TuningLevel", static_cast<int>(ConvertTuningType(mCLTuner.tuning_mode))},
+                                                       {"TuningFile", tune_path.c_str()}
+                                               }
                                        }
-                               }
-                       );
+                               );
+                       } else {
+                               creation_options.m_BackendOptions.emplace_back(
+                                       armnn::BackendOptions
+                                       {
+                                               "GpuAcc",
+                                               {
+                                                       { "FastMathEnabled", true}
+                                               }
+                                       }
+                               );
+                       }
                }
 
                if (sRuntime == nullptr) {
@@ -340,7 +353,7 @@ namespace ARMNNImpl
 #endif
                // First parameter is reduceFp32ToFp16, and second one is debug mode.
                // In default, reduceFp32ToFp16 is false.
-               armnn::OptimizerOptions optimizerOptions(false, graph_debug);
+               armnn::OptimizerOptions optimizerOptions(true, graph_debug);
 
                // Optimize the network for a specific runtime compute device, e.g. CpuAcc, GpuAcc
                armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(