1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ie_metric_helpers.hpp"
6 #include "mkldnn_plugin.h"
7 #include "mkldnn_extension_mngr.h"
8 #include "mkldnn_weights_cache.hpp"
9 #include "mkldnn_itt.h"
11 #include <legacy/net_pass.h>
12 #include <threading/ie_executor_manager.hpp>
14 #include <ie_plugin_config.hpp>
17 #include <ie_system_conf.h>
18 #include <generic_ie.hpp>
19 #include <nodes/list.hpp>
20 #include <legacy/ie_util_internal.hpp>
21 #include <legacy/graph_transformer.h>
22 #include <ie_ngraph_utils.hpp>
24 #include <legacy/convert_function_to_cnn_network.hpp>
25 #include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
26 #include <legacy/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
27 #include <legacy/transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
28 #include <legacy/ngraph_ops/fully_connected.hpp>
30 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
31 #include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
33 #include <transformations/common_optimizations/common_optimizations.hpp>
34 #include <transformations/common_optimizations/depth_to_space_fusion.hpp>
35 #include <transformations/control_flow/unroll_tensor_iterator.hpp>
36 #include <transformations/op_conversions/convert_depth_to_space.hpp>
37 #include <transformations/op_conversions/convert_space_to_depth.hpp>
38 #include <transformations/op_conversions/convert_gelu.hpp>
39 #include <transformations/op_conversions/hswish_decomposition.hpp>
40 #include <transformations/op_conversions/hsigmoid_decomposition.hpp>
41 #include <transformations/op_conversions/reduce_l1_decomposition.hpp>
42 #include <transformations/op_conversions/reduce_l2_decomposition.hpp>
43 #include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
44 #include <transformations/op_conversions/softplus_decomposition.hpp>
45 #include <transformations/op_conversions/convert_space_to_batch.hpp>
46 #include <transformations/op_conversions/convert_batch_to_space.hpp>
47 #include <transformations/op_conversions/convert_mod.hpp>
48 #include <transformations/op_conversions/log_softmax_decomposition.hpp>
49 #include <transformations/convert_precision.hpp>
50 #include <transformations/init_node_info.hpp>
51 #include <transformations/rt_info/fused_names_attribute.hpp>
53 #include <ngraph/opsets/opset2.hpp>
54 #include <ngraph/opsets/opset3.hpp>
55 #include <ngraph/opsets/opset4.hpp>
56 #include <ngraph/op/util/op_types.hpp>
57 #include <ngraph/pass/manager.hpp>
59 #include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
61 #ifndef USE_CNNNETWORK_LPT
62 # include <low_precision/transformer.hpp>
63 # include <low_precision/convolution.hpp>
64 # include <low_precision/group_convolution.hpp>
65 # include <low_precision/multiply_to_group_convolution.hpp>
68 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
69 #if defined(_WIN32) || defined(WIN32)
78 using namespace MKLDNNPlugin;
79 using namespace InferenceEngine;
83 extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
87 ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
88 ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
91 static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf) {
92 OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNNPlugin, "Transformation");
94 auto nGraphFunc = clonedNetwork->getFunction();
95 // Disable shape inference (WA for generic operations)
96 ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
98 ngraph::pass::Manager manager;
99 manager.register_pass<ngraph::pass::InitNodeInfo>();
100 // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
101 manager.register_pass<ngraph::pass::ConvertPriorBox>();
102 manager.register_pass<ngraph::pass::CommonOptimizations>();
103 manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
104 manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
106 std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list {
107 {ngraph::element::i64, ngraph::element::i32},
108 {ngraph::element::u64, ngraph::element::i32},
109 {ngraph::element::u16, ngraph::element::i32},
110 {ngraph::element::u32, ngraph::element::i32},
111 {ngraph::element::f16, ngraph::element::f32},
112 {ngraph::element::boolean, ngraph::element::u8},
115 for (auto & precision : convert_precision_list) {
116 manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
119 auto pass_config = manager.get_pass_config();
121 using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
123 // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
124 pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
125 ngraph::pass::ConvertDepthToSpace>(
126 [](const_node_ptr &node) -> bool {
127 return node->input_value(0).get_shape().size() <= 5lu &&
128 node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
131 // Disable FC reshaping for 3D case
132 pass_config->set_callback<ngraph::pass::ReshapeFullyConnected>(
133 [](const_node_ptr &node) -> bool {
134 return node->input_value(0).get_shape().size() == 3ul;
137 pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
138 ngraph::pass::ConvertSpaceToBatch>(
139 [](const_node_ptr &node) -> bool {
140 const auto & rank = node->input(0).get_partial_shape().rank().get_length();
141 return rank == 4lu || rank == 5lu;
144 // List of enabled/disabled transformations
145 pass_config->disable<ngraph::pass::ConvertGELU>();
146 pass_config->disable<ngraph::pass::HSwishDecomposition>();
147 pass_config->disable<ngraph::pass::ReduceL1Decomposition>();
148 pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
149 pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
150 pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
151 pass_config->disable<ngraph::pass::ConvertMod>();
152 pass_config->disable<ngraph::pass::LogSoftmaxDecomposition>();
154 pass_config->enable<ngraph::pass::ConvertPadToGroupConvolution>();
156 manager.run_passes(nGraphFunc);
158 #ifndef USE_CNNNETWORK_LPT
159 using namespace ngraph::pass::low_precision;
160 if (conf.lpTransformsMode == Config::LPTransformsMode::On) {
161 auto params = LayerTransformation::Params(
162 true, // updatePrecisions
163 LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
164 LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
165 true); // supportAsymmetricQuantization
166 LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
167 .add<ConvolutionTransformation, ngraph::opset1::Convolution>(
168 LayerTransformation::Params(params).setPrecisionsOnActivations({ngraph::element::u8}).setSupportAsymmetricQuantization(true))
169 .add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
170 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
171 .addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
172 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })));
174 transformer.transform(nGraphFunc);
178 ngraph::pass::Manager legacyManager;
179 legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
180 legacyManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
181 // not legacy actually, but it should be the last transformation in the transformation pipeline
182 legacyManager.register_pass<ngraph::pass::UnrollTensorIterator>();
184 auto legacyPassConfig = legacyManager.get_pass_config();
185 legacyPassConfig->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
186 if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
187 auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
188 auto constant = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(mul_op->get_input_node_shared_ptr(1));
189 bool is_dequantization = mul_op->get_rt_info().count("DEQUANTIZATION") != 0;
190 if (add_op && constant && is_dequantization) {
191 return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
192 ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
193 ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
199 legacyManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
200 // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
201 return node->get_rt_info().count("UNROLL_TI") == 0;
203 legacyManager.run_passes(nGraphFunc);
205 clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
207 // WA: after conversion to CNNNetwork user precision can redefine input/output precisions
208 // so we need to apply additional precision conversion but only for inputs and outputs
209 for (auto & precision : convert_precision_list) {
210 NetPass::ConvertIOPrecision(*clonedNetwork, convertPrecision(precision.first), convertPrecision(precision.second));
214 InferenceEngine::ExecutableNetworkInternal::Ptr
215 Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
216 OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
218 // verification of supported input
219 InferenceEngine::InputsDataMap _networkInputs;
220 network.getInputsInfo(_networkInputs);
221 for (const auto &ii : _networkInputs) {
222 auto input_precision = ii.second->getPrecision();
223 if (input_precision != InferenceEngine::Precision::FP32 &&
224 input_precision != InferenceEngine::Precision::I32 &&
225 input_precision != InferenceEngine::Precision::U16 &&
226 input_precision != InferenceEngine::Precision::I16 &&
227 input_precision != InferenceEngine::Precision::I8 &&
228 input_precision != InferenceEngine::Precision::U8 &&
229 input_precision != InferenceEngine::Precision::BOOL &&
230 input_precision != InferenceEngine::Precision::I64 &&
231 input_precision != InferenceEngine::Precision::U64) {
232 THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
233 << "Input image format " << input_precision << " is not supported yet...";
237 // TODO: handle input precision differently - per input and not one per network...
239 // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
240 Config conf = engConfig;
241 conf.readProperties(config);
243 if (conf.enableDynamicBatch) {
244 conf.batchLimit = static_cast<int>(network.getBatchSize());
247 std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
248 bool is_transformed = false;
249 if (clonedNetwork->getFunction()) {
250 Transformation(clonedNetwork, conf);
251 is_transformed = true;
253 auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(clonedNetwork);
255 // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
256 ConstTransformer transformator(implNetwork.get());
257 transformator.fullTrim();
258 if (!is_transformed) {
259 NetPass::ConvertPrecision(*implNetwork, Precision::I64, Precision::I32);
260 NetPass::ConvertPrecision(*implNetwork, Precision::U64, Precision::I32);
261 NetPass::ConvertPrecision(*implNetwork, Precision::U32, Precision::I32);
262 NetPass::ConvertPrecision(*implNetwork, Precision::FP16, Precision::FP32);
263 NetPass::ConvertPrecision(*implNetwork, Precision::BOOL, Precision::U8);
264 NetPass::ConvertPrecision(*implNetwork, Precision::U16, Precision::I32);
268 return std::make_shared<MKLDNNExecNetwork>(*clonedNetwork, conf, extensionManager, weightsSharing);
271 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
272 // accumulate config parameters on engine level
273 engConfig.readProperties(config);
276 Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
278 auto option = engConfig._config.find(name);
279 if (option != engConfig._config.end()) {
280 result = option->second;
282 THROW_IE_EXCEPTION << "Unsupported config key " << name;
287 static bool hasAVX512() {
288 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
289 unsigned int regs[4] = {7, 0, 0, 0};
290 #if defined(_WIN32) || defined(WIN32)
291 __cpuid(reinterpret_cast<int*>(regs), regs[0]);
293 __cpuid_count(regs[0], regs[1], regs[0], regs[1], regs[2], regs[3]);
295 if (regs[1] & (1U << 16))
301 Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
302 if (name == METRIC_KEY(SUPPORTED_METRICS)) {
303 std::vector<std::string> metrics;
304 metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES));
305 metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
306 metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
307 metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
308 metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
309 metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
310 metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
311 IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
312 } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
313 std::string brand_string;
314 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
315 unsigned int addr_list[3] = { 0x80000002, 0x80000003, 0x80000004 };
316 unsigned int regs[4];
317 for (auto addr : addr_list) {
319 #if defined(_WIN32) || defined(WIN32)
320 __cpuid(reinterpret_cast<int*>(regs), regs[0]);
322 __get_cpuid(regs[0], ®s[0], ®s[1], ®s[2], ®s[3]);
324 char *ch = reinterpret_cast<char*>(®s[0]);
325 for (size_t j = 0; j < sizeof(regs); j++)
326 brand_string += ch[j];
329 brand_string = "Non Intel Architecture";
331 IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, brand_string);
332 } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
333 std::vector<std::string> availableDevices = { "" };
334 IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
335 } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
336 std::vector<std::string> capabilities;
337 if (with_cpu_x86_bfloat16())
338 capabilities.push_back(METRIC_VALUE(BF16));
340 capabilities.push_back(METRIC_VALUE(WINOGRAD));
341 capabilities.push_back(METRIC_VALUE(FP32));
342 capabilities.push_back(METRIC_VALUE(FP16));
343 capabilities.push_back(METRIC_VALUE(INT8));
344 capabilities.push_back(METRIC_VALUE(BIN));
345 IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
346 } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
347 std::vector<std::string> configKeys;
348 for (auto && opt : engConfig._config)
349 configKeys.push_back(opt.first);
350 IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
351 } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
352 std::tuple<unsigned int, unsigned int, unsigned int> range = std::make_tuple(1, 1, 1);
353 IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range);
354 } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
355 std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
356 IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
358 THROW_IE_EXCEPTION << "Unsupported metric key " << name;
362 void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
363 extensionManager->AddExtension(extension);
366 QueryNetworkResult Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config) const {
367 QueryNetworkResult res;
368 MKLDNNWeightsSharing::Ptr fake_w_cache;
369 auto function = network.getFunction();
370 if (function != nullptr) {
371 std::unordered_set<std::string> originalOps;
372 for (auto&& node : function->get_ops()) {
373 originalOps.emplace(node->get_friendly_name());
376 // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
377 Config conf = engConfig;
378 conf.readProperties(config);
380 if (conf.enableDynamicBatch) {
381 conf.batchLimit = static_cast<int>(network.getBatchSize());
384 auto clonedNetwork = cloneNetwork(network);
385 Transformation(clonedNetwork, conf);
386 std::unordered_set<std::string> supported;
387 std::unordered_set<std::string> unsupported;
388 for (details::CNNNetworkIterator itLayer{clonedNetwork.get()}; itLayer != details::CNNNetworkIterator(); itLayer++) {
389 auto layerIsSupported = [&] {
390 std::unique_ptr<MKLDNNNode> ptr;
392 ptr.reset(MKLDNNNode::factory().create(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
393 } catch (InferenceEngine::details::InferenceEngineException&) {
398 for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
399 if (contains(originalOps, fusedLayerName)) {
400 if (layerIsSupported) {
401 supported.emplace(fusedLayerName);
403 unsupported.emplace(fusedLayerName);
409 for (auto&& node : function->get_ops()) {
410 if (!contains(unsupported, node->get_friendly_name())) {
411 for (auto&& inputNodeOutput : node->input_values()) {
412 if (ngraph::op::is_constant(inputNodeOutput.get_node())) {
413 supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
416 for (auto&& outputs : node->outputs()) {
417 for (auto&& outputNodeInput : outputs.get_target_inputs()) {
418 if (ngraph::op::is_output(outputNodeInput.get_node())) {
419 supported.emplace(outputNodeInput.get_node()->get_friendly_name());
426 for (auto&& layerName : supported) {
427 if (!contains(unsupported, layerName)) {
428 res.supportedLayersMap.emplace(layerName, GetName());
432 details::CNNNetworkIterator i(&network);
433 while (i != details::CNNNetworkIterator()) {
435 mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
436 // if we can create and have not thrown exception, then layer is supported
437 std::unique_ptr <MKLDNNNode>(MKLDNNNode::factory().create(*i, eng, extensionManager, fake_w_cache));
438 res.supportedLayersMap.insert({ (*i)->name, GetName() });
439 } catch (InferenceEngine::details::InferenceEngineException&) {
448 static const Version version = {{2, 1}, CI_BUILD_NUMBER, "MKLDNNPlugin"};
449 IE_DEFINE_PLUGIN_CREATE_FUNCTION(Engine, version)