[CPU][BF16] Default Optimisation Capability of BF16 was enabled on CPX (#647)
authorAlexey Varyzgin <alexey.varyzgin@intel.com>
Thu, 4 Jun 2020 13:06:15 +0000 (16:06 +0300)
committerGitHub <noreply@github.com>
Thu, 4 Jun 2020 13:06:15 +0000 (16:06 +0300)
inference-engine/include/ie_plugin_config.hpp
inference-engine/src/mkldnn_plugin/config.cpp
inference-engine/src/mkldnn_plugin/config.h
inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp

index 199c613..043b706 100644 (file)
@@ -92,6 +92,7 @@ DECLARE_METRIC_KEY(FULL_DEVICE_NAME, std::string);
  *
  * The possible values:
  *  - "FP32" - device can support FP32 models
+ *  - "BF16" - device can support BF16 computations for models
  *  - "FP16" - device can support FP16 models
  *  - "INT8" - device can support models with INT8 layers
  *  - "BIN" - device can support models with BIN layers
@@ -100,6 +101,7 @@ DECLARE_METRIC_KEY(FULL_DEVICE_NAME, std::string);
 DECLARE_METRIC_KEY(OPTIMIZATION_CAPABILITIES, std::vector<std::string>);
 
 DECLARE_METRIC_VALUE(FP32);
+DECLARE_METRIC_VALUE(BF16);
 DECLARE_METRIC_VALUE(FP16);
 DECLARE_METRIC_VALUE(INT8);
 DECLARE_METRIC_VALUE(BIN);
index 3ff1172..c30bd33 100644 (file)
@@ -73,11 +73,17 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
         } else if (key.compare(PluginConfigParams::KEY_DUMP_QUANTIZED_GRAPH_AS_IR) == 0) {
             dumpQuantizedGraphToIr = val;
         } else if (key == PluginConfigParams::KEY_ENFORCE_BF16) {
-            if (val == PluginConfigParams::YES) enforceBF16 = true;
-            else if (val == PluginConfigParams::NO) enforceBF16 = false;
-            else
+            if (val == PluginConfigParams::YES) {
+                if (with_cpu_x86_bfloat16())
+                    enforceBF16 = true;
+                else
+                    THROW_IE_EXCEPTION << "Platform doesn't support BF16 format";
+            } else if (val == PluginConfigParams::NO) {
+                enforceBF16 = false;
+            } else {
                 THROW_IE_EXCEPTION << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
                     << ". Expected only YES/NO";
+            }
         } else {
             THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property " << key << " by CPU plugin";
         }
@@ -118,6 +124,8 @@ void Config::updateProperties() {
         _config.insert({ PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, std::to_string(streamExecutorConfig._streams) });
         _config.insert({ PluginConfigParams::KEY_CPU_THREADS_NUM, std::to_string(streamExecutorConfig._threads) });
         _config.insert({ PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, dumpToDot });
+        if (!with_cpu_x86_bfloat16())
+            enforceBF16 = false;
         if (enforceBF16)
             _config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
         else
index 2444f00..a06d4be 100644 (file)
@@ -32,14 +32,15 @@ struct Config {
     std::string dumpQuantizedGraphToDot = "";
     std::string dumpQuantizedGraphToIr = "";
     int batchLimit = 0;
-    bool enforceBF16 = false;
     InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
 
 #if defined(__arm__) || defined(__aarch64__)
     // Currently INT8 mode is not optimized on ARM, fallback to FP32 mode.
     LPTransformsMode lpTransformsMode = LPTransformsMode::Off;
+    bool enforceBF16 = false;
 #else
     LPTransformsMode lpTransformsMode = LPTransformsMode::On;
+    bool enforceBF16 = true;
 #endif
 
     void readProperties(const std::map<std::string, std::string> &config);
index c0846e7..6cd7715 100644 (file)
@@ -101,11 +101,11 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
             if (with_cpu_x86_bfloat16() && isFloatModel) {
                 BF16Transformer bf16Transformer;
                 CNNNetwork cnnetwork(_clonedNetwork);
-                if (cfg.enforceBF16 == true) {
+                // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin.
+                // Overwise, only layers marked as BF16 in 'cnnetwork' will be performed in bfloat16 mode.
+                // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin.
+                if (cfg.enforceBF16 == true)
                     bf16Transformer.convertToBFloat16(cnnetwork);
-                } else {
-                    bf16Transformer.optimizeToFloat(cnnetwork);
-                }
             } else {
                 BF16Transformer bf16Transformer;
                 CNNNetwork cnnetwork(_clonedNetwork);
index a2af567..c81d633 100644 (file)
@@ -185,6 +185,8 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
         IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
     } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
         std::vector<std::string> capabilities;
+        if (with_cpu_x86_bfloat16())
+            capabilities.push_back(METRIC_VALUE(BF16));
         if (hasAVX512())
             capabilities.push_back(METRIC_VALUE(WINOGRAD));
         capabilities.push_back(METRIC_VALUE(FP32));