[CPU][BF16] Default Optimisation Capability of BF16 was enabled on CPX (#647)

author Alexey Varyzgin <alexey.varyzgin@intel.com>

Thu, 4 Jun 2020 13:06:15 +0000 (16:06 +0300)

committer GitHub <noreply@github.com>

Thu, 4 Jun 2020 13:06:15 +0000 (16:06 +0300)
author Alexey Varyzgin <alexey.varyzgin@intel.com>
Thu, 4 Jun 2020 13:06:15 +0000 (16:06 +0300)
committer GitHub <noreply@github.com>
Thu, 4 Jun 2020 13:06:15 +0000 (16:06 +0300)
diff --git a/inference-engine/include/ie_plugin_config.hpp b/inference-engine/include/ie_plugin_config.hpp

index 199c613..043b706 100644 (file)
--- a/inference-engine/include/ie_plugin_config.hpp
+++ b/inference-engine/include/ie_plugin_config.hpp
@@ -92,6 +92,7 @@ DECLARE_METRIC_KEY(FULL_DEVICE_NAME, std::string);
   *
   * The possible values:
   *  - "FP32" - device can support FP32 models
+ *  - "BF16" - device can support BF16 computations for models
   *  - "FP16" - device can support FP16 models
   *  - "INT8" - device can support models with INT8 layers
   *  - "BIN" - device can support models with BIN layers
@@ -100,6 +101,7 @@ DECLARE_METRIC_KEY(FULL_DEVICE_NAME, std::string);
  DECLARE_METRIC_KEY(OPTIMIZATION_CAPABILITIES, std::vector<std::string>);
  
  DECLARE_METRIC_VALUE(FP32);
+DECLARE_METRIC_VALUE(BF16);
  DECLARE_METRIC_VALUE(FP16);
  DECLARE_METRIC_VALUE(INT8);
  DECLARE_METRIC_VALUE(BIN);
diff --git a/inference-engine/src/mkldnn_plugin/config.cpp b/inference-engine/src/mkldnn_plugin/config.cpp

index 3ff1172..c30bd33 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/config.cpp
+++ b/inference-engine/src/mkldnn_plugin/config.cpp
@@ -73,11 +73,17 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
          } else if (key.compare(PluginConfigParams::KEY_DUMP_QUANTIZED_GRAPH_AS_IR) == 0) {
              dumpQuantizedGraphToIr = val;
          } else if (key == PluginConfigParams::KEY_ENFORCE_BF16) {
-            if (val == PluginConfigParams::YES) enforceBF16 = true;
-            else if (val == PluginConfigParams::NO) enforceBF16 = false;
-            else
+            if (val == PluginConfigParams::YES) {
+                if (with_cpu_x86_bfloat16())
+                    enforceBF16 = true;
+                else
+                    THROW_IE_EXCEPTION << "Platform doesn't support BF16 format";
+            } else if (val == PluginConfigParams::NO) {
+                enforceBF16 = false;
+            } else {
                  THROW_IE_EXCEPTION << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
                      << ". Expected only YES/NO";
+            }
          } else {
              THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property " << key << " by CPU plugin";
          }
@@ -118,6 +124,8 @@ void Config::updateProperties() {
          _config.insert({ PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, std::to_string(streamExecutorConfig._streams) });
          _config.insert({ PluginConfigParams::KEY_CPU_THREADS_NUM, std::to_string(streamExecutorConfig._threads) });
          _config.insert({ PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, dumpToDot });
+        if (!with_cpu_x86_bfloat16())
+            enforceBF16 = false;
          if (enforceBF16)
              _config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
          else
diff --git a/inference-engine/src/mkldnn_plugin/config.h b/inference-engine/src/mkldnn_plugin/config.h

index 2444f00..a06d4be 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/config.h
+++ b/inference-engine/src/mkldnn_plugin/config.h
@@ -32,14 +32,15 @@ struct Config {
      std::string dumpQuantizedGraphToDot = "";
      std::string dumpQuantizedGraphToIr = "";
      int batchLimit = 0;
-    bool enforceBF16 = false;
      InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
  
  #if defined(__arm__) || defined(__aarch64__)
      // Currently INT8 mode is not optimized on ARM, fallback to FP32 mode.
      LPTransformsMode lpTransformsMode = LPTransformsMode::Off;
+    bool enforceBF16 = false;
  #else
      LPTransformsMode lpTransformsMode = LPTransformsMode::On;
+    bool enforceBF16 = true;
  #endif
  
      void readProperties(const std::map<std::string, std::string> &config);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp

index c0846e7..6cd7715 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@@ -101,11 +101,11 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
              if (with_cpu_x86_bfloat16() && isFloatModel) {
                  BF16Transformer bf16Transformer;
                  CNNNetwork cnnetwork(_clonedNetwork);
-                if (cfg.enforceBF16 == true) {
+                // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin.
+                // Overwise, only layers marked as BF16 in 'cnnetwork' will be performed in bfloat16 mode.
+                // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin.
+                if (cfg.enforceBF16 == true)
                      bf16Transformer.convertToBFloat16(cnnetwork);
-                } else {
-                    bf16Transformer.optimizeToFloat(cnnetwork);
-                }
              } else {
                  BF16Transformer bf16Transformer;
                  CNNNetwork cnnetwork(_clonedNetwork);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp

index a2af567..c81d633 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -185,6 +185,8 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
          IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
      } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
          std::vector<std::string> capabilities;
+        if (with_cpu_x86_bfloat16())
+            capabilities.push_back(METRIC_VALUE(BF16));
          if (hasAVX512())
              capabilities.push_back(METRIC_VALUE(WINOGRAD));
          capabilities.push_back(METRIC_VALUE(FP32));
author	Alexey Varyzgin <alexey.varyzgin@intel.com>
	Thu, 4 Jun 2020 13:06:15 +0000 (16:06 +0300)
committer	GitHub <noreply@github.com>
	Thu, 4 Jun 2020 13:06:15 +0000 (16:06 +0300)
inference-engine/include/ie_plugin_config.hpp		patch \| blob \| history
inference-engine/src/mkldnn_plugin/config.cpp		patch \| blob \| history
inference-engine/src/mkldnn_plugin/config.h		patch \| blob \| history
inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp		patch \| blob \| history
inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp		patch \| blob \| history