*
* The possible values:
* - "FP32" - device can support FP32 models
+ * - "BF16" - device can support BF16 computations for models
* - "FP16" - device can support FP16 models
* - "INT8" - device can support models with INT8 layers
* - "BIN" - device can support models with BIN layers
DECLARE_METRIC_KEY(OPTIMIZATION_CAPABILITIES, std::vector<std::string>);
DECLARE_METRIC_VALUE(FP32);
+DECLARE_METRIC_VALUE(BF16);
DECLARE_METRIC_VALUE(FP16);
DECLARE_METRIC_VALUE(INT8);
DECLARE_METRIC_VALUE(BIN);
} else if (key.compare(PluginConfigParams::KEY_DUMP_QUANTIZED_GRAPH_AS_IR) == 0) {
dumpQuantizedGraphToIr = val;
} else if (key == PluginConfigParams::KEY_ENFORCE_BF16) {
- if (val == PluginConfigParams::YES) enforceBF16 = true;
- else if (val == PluginConfigParams::NO) enforceBF16 = false;
- else
+ if (val == PluginConfigParams::YES) {
+ if (with_cpu_x86_bfloat16())
+ enforceBF16 = true;
+ else
+ THROW_IE_EXCEPTION << "Platform doesn't support BF16 format";
+ } else if (val == PluginConfigParams::NO) {
+ enforceBF16 = false;
+ } else {
THROW_IE_EXCEPTION << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
<< ". Expected only YES/NO";
+ }
} else {
THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property " << key << " by CPU plugin";
}
_config.insert({ PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, std::to_string(streamExecutorConfig._streams) });
_config.insert({ PluginConfigParams::KEY_CPU_THREADS_NUM, std::to_string(streamExecutorConfig._threads) });
_config.insert({ PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, dumpToDot });
+ if (!with_cpu_x86_bfloat16())
+ enforceBF16 = false;
if (enforceBF16)
_config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
else
std::string dumpQuantizedGraphToDot = "";
std::string dumpQuantizedGraphToIr = "";
int batchLimit = 0;
- bool enforceBF16 = false;
InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
#if defined(__arm__) || defined(__aarch64__)
// Currently INT8 mode is not optimized on ARM, fallback to FP32 mode.
LPTransformsMode lpTransformsMode = LPTransformsMode::Off;
+ bool enforceBF16 = false;
#else
LPTransformsMode lpTransformsMode = LPTransformsMode::On;
+ bool enforceBF16 = true;
#endif
void readProperties(const std::map<std::string, std::string> &config);
if (with_cpu_x86_bfloat16() && isFloatModel) {
BF16Transformer bf16Transformer;
CNNNetwork cnnetwork(_clonedNetwork);
- if (cfg.enforceBF16 == true) {
+ // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin.
+ // Overwise, only layers marked as BF16 in 'cnnetwork' will be performed in bfloat16 mode.
+ // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin.
+ if (cfg.enforceBF16 == true)
bf16Transformer.convertToBFloat16(cnnetwork);
- } else {
- bf16Transformer.optimizeToFloat(cnnetwork);
- }
} else {
BF16Transformer bf16Transformer;
CNNNetwork cnnetwork(_clonedNetwork);
IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
} else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
std::vector<std::string> capabilities;
+ if (with_cpu_x86_bfloat16())
+ capabilities.push_back(METRIC_VALUE(BF16));
if (hasAVX512())
capabilities.push_back(METRIC_VALUE(WINOGRAD));
capabilities.push_back(METRIC_VALUE(FP32));