0cc8b2c9b7deaf842314a2f3e2c09318806f7a56
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / mkldnn_plugin.cpp
1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "ie_metric_helpers.hpp"
6 #include "mkldnn_plugin.h"
7 #include "mkldnn_extension_mngr.h"
8 #include "mkldnn_weights_cache.hpp"
9 #include "mkldnn_itt.h"
10
11 #include <legacy/net_pass.h>
12 #include <threading/ie_executor_manager.hpp>
13 #include <memory>
14 #include <ie_plugin_config.hpp>
15 #include <vector>
16 #include <tuple>
17 #include <ie_system_conf.h>
18 #include <generic_ie.hpp>
19 #include <nodes/list.hpp>
20 #include <legacy/ie_util_internal.hpp>
21 #include <legacy/graph_transformer.h>
22 #include <ie_ngraph_utils.hpp>
23
24 #include <legacy/convert_function_to_cnn_network.hpp>
25 #include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
26 #include <legacy/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
27 #include <legacy/transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
28 #include <legacy/ngraph_ops/fully_connected.hpp>
29
30 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
31 #include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
32
33 #include <transformations/common_optimizations/common_optimizations.hpp>
34 #include <transformations/common_optimizations/depth_to_space_fusion.hpp>
35 #include <transformations/control_flow/unroll_tensor_iterator.hpp>
36 #include <transformations/op_conversions/convert_depth_to_space.hpp>
37 #include <transformations/op_conversions/convert_space_to_depth.hpp>
38 #include <transformations/op_conversions/convert_gelu.hpp>
39 #include <transformations/op_conversions/hswish_decomposition.hpp>
40 #include <transformations/op_conversions/hsigmoid_decomposition.hpp>
41 #include <transformations/op_conversions/reduce_l1_decomposition.hpp>
42 #include <transformations/op_conversions/reduce_l2_decomposition.hpp>
43 #include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
44 #include <transformations/op_conversions/softplus_decomposition.hpp>
45 #include <transformations/op_conversions/convert_space_to_batch.hpp>
46 #include <transformations/op_conversions/convert_batch_to_space.hpp>
47 #include <transformations/op_conversions/convert_mod.hpp>
48 #include <transformations/op_conversions/log_softmax_decomposition.hpp>
49 #include <transformations/convert_precision.hpp>
50 #include <transformations/init_node_info.hpp>
51 #include <transformations/rt_info/fused_names_attribute.hpp>
52
53 #include <ngraph/opsets/opset2.hpp>
54 #include <ngraph/opsets/opset3.hpp>
55 #include <ngraph/opsets/opset4.hpp>
56 #include <ngraph/op/util/op_types.hpp>
57 #include <ngraph/pass/manager.hpp>
58
59 #include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
60
61 #ifndef USE_CNNNETWORK_LPT
62 # include <low_precision/transformer.hpp>
63 # include <low_precision/convolution.hpp>
64 # include <low_precision/group_convolution.hpp>
65 # include <low_precision/multiply_to_group_convolution.hpp>
66 #endif
67
68 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
69 #if defined(_WIN32) || defined(WIN32)
70 #include <intrin.h>
71 #include <windows.h>
72 #else
73 #include <cpuid.h>
74
75 #endif
76 #endif
77
78 using namespace MKLDNNPlugin;
79 using namespace InferenceEngine;
80
81 Engine::Engine() {
82     _pluginName = "CPU";
83     extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
84 }
85
86 Engine::~Engine() {
87     ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
88     ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
89 }
90
91 static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf) {
92     OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNNPlugin, "Transformation");
93
94     auto nGraphFunc = clonedNetwork->getFunction();
95     // Disable shape inference (WA for generic operations)
96     ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
97
98     ngraph::pass::Manager manager;
99     manager.register_pass<ngraph::pass::InitNodeInfo>();
100     // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
101     manager.register_pass<ngraph::pass::ConvertPriorBox>();
102     manager.register_pass<ngraph::pass::CommonOptimizations>();
103     manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
104     manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
105
106     std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list {
107             {ngraph::element::i64, ngraph::element::i32},
108             {ngraph::element::u64, ngraph::element::i32},
109             {ngraph::element::u16, ngraph::element::i32},
110             {ngraph::element::u32, ngraph::element::i32},
111             {ngraph::element::f16, ngraph::element::f32},
112             {ngraph::element::boolean, ngraph::element::u8},
113     };
114
115     for (auto & precision : convert_precision_list) {
116         manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
117     }
118
119     auto pass_config = manager.get_pass_config();
120
121     using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
122
123     // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
124     pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
125                               ngraph::pass::ConvertDepthToSpace>(
126             [](const_node_ptr &node) -> bool {
127                 return node->input_value(0).get_shape().size() <= 5lu &&
128                        node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
129             });
130
131     // Disable FC reshaping for 3D case
132     pass_config->set_callback<ngraph::pass::ReshapeFullyConnected>(
133             [](const_node_ptr &node) -> bool {
134                 return node->input_value(0).get_shape().size() == 3ul;
135             });
136
137     pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
138                               ngraph::pass::ConvertSpaceToBatch>(
139             [](const_node_ptr &node) -> bool {
140                 const auto & rank = node->input(0).get_partial_shape().rank().get_length();
141                 return rank == 4lu || rank == 5lu;
142             });
143
144     // List of enabled/disabled transformations
145     pass_config->disable<ngraph::pass::ConvertGELU>();
146     pass_config->disable<ngraph::pass::HSwishDecomposition>();
147     pass_config->disable<ngraph::pass::ReduceL1Decomposition>();
148     pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
149     pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
150     pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
151     pass_config->disable<ngraph::pass::ConvertMod>();
152     pass_config->disable<ngraph::pass::LogSoftmaxDecomposition>();
153
154     pass_config->enable<ngraph::pass::ConvertPadToGroupConvolution>();
155
156     manager.run_passes(nGraphFunc);
157
158 #ifndef USE_CNNNETWORK_LPT
159     using namespace ngraph::pass::low_precision;
160     if (conf.lpTransformsMode == Config::LPTransformsMode::On) {
161         auto params = LayerTransformation::Params(
162             true,  // updatePrecisions
163             LayerTransformation::QuantizedTensorAlignment::UpdateLevel,  // quantizedTensorAlignmentOnActivations
164             LayerTransformation::QuantizedTensorAlignment::None,  // quantizedTensorAlignmentOnWeights
165             true);  // supportAsymmetricQuantization
166         LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
167             .add<ConvolutionTransformation, ngraph::opset1::Convolution>(
168                 LayerTransformation::Params(params).setPrecisionsOnActivations({ngraph::element::u8}).setSupportAsymmetricQuantization(true))
169             .add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
170                 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
171             .addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
172                 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })));
173
174         transformer.transform(nGraphFunc);
175     }
176 #endif
177
178     ngraph::pass::Manager legacyManager;
179     legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
180     legacyManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
181     // not legacy actually, but it should be the last transformation in the transformation pipeline
182     legacyManager.register_pass<ngraph::pass::UnrollTensorIterator>();
183
184     auto legacyPassConfig = manager.get_pass_config();
185     legacyPassConfig->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
186         if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
187             auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
188             auto constant = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(mul_op->get_input_node_shared_ptr(1));
189             bool is_dequantization = mul_op->get_rt_info().count("DEQUANTIZATION") != 0;
190             if (add_op && constant && is_dequantization) {
191                 return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
192                     ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
193                     ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
194             }
195         }
196         return false;
197     });
198
199     legacyManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
200         // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
201         return node->get_rt_info().count("UNROLL_TI") == 0;
202     });
203     legacyManager.run_passes(nGraphFunc);
204
205     clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
206
207     // WA: after conversion to CNNNetwork user precision can redefine input/output precisions
208     // so we need to apply additional precision conversion but only for inputs and outputs
209     for (auto & precision : convert_precision_list) {
210         NetPass::ConvertIOPrecision(*clonedNetwork, convertPrecision(precision.first), convertPrecision(precision.second));
211     }
212 }
213
214 InferenceEngine::ExecutableNetworkInternal::Ptr
215 Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
216     OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
217
218     // verification of supported input
219     InferenceEngine::InputsDataMap _networkInputs;
220     network.getInputsInfo(_networkInputs);
221     for (const auto &ii : _networkInputs) {
222         auto input_precision = ii.second->getPrecision();
223         if (input_precision != InferenceEngine::Precision::FP32 &&
224             input_precision != InferenceEngine::Precision::I32 &&
225             input_precision != InferenceEngine::Precision::U16 &&
226             input_precision != InferenceEngine::Precision::I16 &&
227             input_precision != InferenceEngine::Precision::I8 &&
228             input_precision != InferenceEngine::Precision::U8 &&
229             input_precision != InferenceEngine::Precision::BOOL &&
230             input_precision != InferenceEngine::Precision::I64 &&
231             input_precision != InferenceEngine::Precision::U64) {
232             THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
233                                << "Input image format " << input_precision << " is not supported yet...";
234         }
235     }
236
237     // TODO: handle input precision differently - per input and not one per network...
238
239     // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
240     Config conf = engConfig;
241     conf.readProperties(config);
242
243     if (conf.enableDynamicBatch) {
244         conf.batchLimit = static_cast<int>(network.getBatchSize());
245     }
246
247     std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
248     bool is_transformed = false;
249     if (clonedNetwork->getFunction()) {
250         Transformation(clonedNetwork, conf);
251         is_transformed = true;
252     }
253     auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(clonedNetwork);
254     if (implNetwork) {
255         // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
256         ConstTransformer transformator(implNetwork.get());
257         transformator.fullTrim();
258         if (!is_transformed) {
259             NetPass::ConvertPrecision(*implNetwork, Precision::I64, Precision::I32);
260             NetPass::ConvertPrecision(*implNetwork, Precision::U64, Precision::I32);
261             NetPass::ConvertPrecision(*implNetwork, Precision::U32, Precision::I32);
262             NetPass::ConvertPrecision(*implNetwork, Precision::FP16, Precision::FP32);
263             NetPass::ConvertPrecision(*implNetwork, Precision::BOOL, Precision::U8);
264             NetPass::ConvertPrecision(*implNetwork, Precision::U16, Precision::I32);
265         }
266     }
267
268     return std::make_shared<MKLDNNExecNetwork>(*clonedNetwork, conf, extensionManager, weightsSharing);
269 }
270
271 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
272     // accumulate config parameters on engine level
273     engConfig.readProperties(config);
274 }
275
276 Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
277     Parameter result;
278     auto option = engConfig._config.find(name);
279     if (option != engConfig._config.end()) {
280         result = option->second;
281     } else {
282         THROW_IE_EXCEPTION << "Unsupported config key " << name;
283     }
284     return result;
285 }
286
287 static bool hasAVX512() {
288 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
289     unsigned int regs[4] = {7, 0, 0, 0};
290 #if defined(_WIN32) || defined(WIN32)
291     __cpuid(reinterpret_cast<int*>(regs), regs[0]);
292 #else
293     __cpuid_count(regs[0], regs[1], regs[0], regs[1], regs[2], regs[3]);
294 #endif
295     if (regs[1] & (1U << 16))
296         return true;
297 #endif
298     return false;
299 }
300
301 Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
302     if (name == METRIC_KEY(SUPPORTED_METRICS)) {
303         std::vector<std::string> metrics;
304         metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES));
305         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
306         metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
307         metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
308         metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
309         metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
310         metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
311         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
312     } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
313         std::string brand_string;
314 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
315         unsigned int addr_list[3] = { 0x80000002, 0x80000003, 0x80000004 };
316         unsigned int regs[4];
317         for (auto addr : addr_list) {
318             regs[0] = addr;
319 #if defined(_WIN32) || defined(WIN32)
320             __cpuid(reinterpret_cast<int*>(regs), regs[0]);
321 #else
322             __get_cpuid(regs[0], &regs[0], &regs[1], &regs[2], &regs[3]);
323 #endif
324             char *ch = reinterpret_cast<char*>(&regs[0]);
325             for (size_t j = 0; j < sizeof(regs); j++)
326                 brand_string += ch[j];
327         }
328 #else
329         brand_string = "Non Intel Architecture";
330 #endif
331         IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, brand_string);
332     } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
333         std::vector<std::string> availableDevices = { "" };
334         IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
335     } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
336         std::vector<std::string> capabilities;
337         if (with_cpu_x86_bfloat16())
338             capabilities.push_back(METRIC_VALUE(BF16));
339         if (hasAVX512())
340             capabilities.push_back(METRIC_VALUE(WINOGRAD));
341         capabilities.push_back(METRIC_VALUE(FP32));
342         capabilities.push_back(METRIC_VALUE(FP16));
343         capabilities.push_back(METRIC_VALUE(INT8));
344         capabilities.push_back(METRIC_VALUE(BIN));
345         IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
346     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
347         std::vector<std::string> configKeys;
348         for (auto && opt : engConfig._config)
349             configKeys.push_back(opt.first);
350         IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
351     } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
352         std::tuple<unsigned int, unsigned int, unsigned int> range = std::make_tuple(1, 1, 1);
353         IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range);
354     } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
355         std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
356         IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
357     } else {
358         THROW_IE_EXCEPTION << "Unsupported metric key " << name;
359     }
360 }
361
362 void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
363     extensionManager->AddExtension(extension);
364 }
365
366 QueryNetworkResult Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config) const {
367     QueryNetworkResult res;
368     MKLDNNWeightsSharing::Ptr fake_w_cache;
369     auto function = network.getFunction();
370     if (function != nullptr) {
371         std::unordered_set<std::string> originalOps;
372         for (auto&& node : function->get_ops()) {
373             originalOps.emplace(node->get_friendly_name());
374         }
375
376         // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
377         Config conf = engConfig;
378         conf.readProperties(config);
379
380         if (conf.enableDynamicBatch) {
381             conf.batchLimit = static_cast<int>(network.getBatchSize());
382         }
383
384         auto clonedNetwork = cloneNetwork(network);
385         Transformation(clonedNetwork, conf);
386         std::unordered_set<std::string> supported;
387         std::unordered_set<std::string> unsupported;
388         for (details::CNNNetworkIterator itLayer{clonedNetwork.get()}; itLayer != details::CNNNetworkIterator(); itLayer++) {
389             auto layerIsSupported = [&] {
390                 std::unique_ptr<MKLDNNNode> ptr;
391                 try {
392                     ptr.reset(MKLDNNNode::factory().create(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
393                 } catch (InferenceEngine::details::InferenceEngineException&) {
394                      return false;
395                 }
396                 return true;
397             } ();
398             for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
399                 if (contains(originalOps, fusedLayerName)) {
400                     if (layerIsSupported) {
401                         supported.emplace(fusedLayerName);
402                     } else {
403                         unsupported.emplace(fusedLayerName);
404                     }
405                 }
406             }
407         }
408
409         for (auto&& node : function->get_ops()) {
410             if (!contains(unsupported, node->get_friendly_name())) {
411                 for (auto&& inputNodeOutput : node->input_values()) {
412                     if (ngraph::op::is_constant(inputNodeOutput.get_node())) {
413                         supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
414                     }
415                 }
416                 for (auto&& outputs : node->outputs()) {
417                     for (auto&& outputNodeInput : outputs.get_target_inputs()) {
418                         if (ngraph::op::is_output(outputNodeInput.get_node())) {
419                             supported.emplace(outputNodeInput.get_node()->get_friendly_name());
420                         }
421                     }
422                 }
423             }
424         }
425
426         for (auto&& layerName : supported) {
427             if (!contains(unsupported, layerName)) {
428                 res.supportedLayersMap.emplace(layerName, GetName());
429             }
430         }
431     } else {
432         details::CNNNetworkIterator i(&network);
433         while (i != details::CNNNetworkIterator()) {
434             try {
435                 mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
436                 // if we can create and have not thrown exception, then layer is supported
437                 std::unique_ptr <MKLDNNNode>(MKLDNNNode::factory().create(*i, eng, extensionManager, fake_w_cache));
438                 res.supportedLayersMap.insert({ (*i)->name, GetName() });
439             } catch (InferenceEngine::details::InferenceEngineException&) {
440             }
441             i++;
442         }
443     }
444
445     return res;
446 }
447
448 static const Version version = {{2, 1}, CI_BUILD_NUMBER, "MKLDNNPlugin"};
449 IE_DEFINE_PLUGIN_CREATE_FUNCTION(Engine, version)