Removed IInferencePluginAPI interface (#1497)
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / mkldnn_plugin.cpp
1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "ie_metric_helpers.hpp"
6 #include "mkldnn_plugin.h"
7 #include "mkldnn_extension_mngr.h"
8 #include "mkldnn_weights_cache.hpp"
9 #include <cpp_interfaces/base/ie_plugin_base.hpp>
10 #include <threading/ie_executor_manager.hpp>
11 #include <memory>
12 #include <ie_plugin_config.hpp>
13 #include <vector>
14 #include <tuple>
15 #include <ie_system_conf.h>
16 #include <generic_ie.hpp>
17 #include <nodes/list.hpp>
18 #include <ie_util_internal.hpp>
19 #include <graph_transformer.h>
20
21 #include "convert_function_to_cnn_network.hpp"
22 #include <transformations/common_optimizations/common_optimizations.hpp>
23 #include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
24 #include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
25 #include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
26 #include <transformations/rt_info/fused_names_attribute.hpp>
27 #include <ngraph/opsets/opset1.hpp>
28 #include <ngraph/opsets/opset2.hpp>
29 #include <ngraph/opsets/opset3.hpp>
30 #include <ngraph/op/fused/gelu.hpp>
31 #include <ngraph/op/util/op_types.hpp>
32 #include <ngraph/pass/manager.hpp>
33 #include "ngraph_ops/fully_connected.hpp"
34
35 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
36 #if defined(_WIN32) || defined(WIN32)
37 #include <intrin.h>
38 #include <windows.h>
39 #else
40 #include <cpuid.h>
41
42 #endif
43 #endif
44
45 using namespace MKLDNNPlugin;
46 using namespace InferenceEngine;
47
48 Engine::Engine() {
49     _pluginName = "CPU";
50     extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
51 }
52
53 Engine::~Engine() {
54     ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
55     ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
56 }
57
58 static void Transformation(ICNNNetwork::Ptr& clonedNetwork) {
59     const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
60         // DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
61         if (auto dtsOp = std::dynamic_pointer_cast<const ::ngraph::opset3::DepthToSpace>(node)) {
62             return dtsOp->input_value(0).get_shape().size() <= 5lu && dtsOp->input_value(0).get_shape().size() == dtsOp->get_output_shape(0).size();
63         }
64
65         // SpaceToDepth node implementation supports only equal input/output tensors with rank <= 5
66         if (auto stdOp = std::dynamic_pointer_cast<const ::ngraph::opset3::SpaceToDepth>(node)) {
67             return stdOp->input_value(0).get_shape().size() <= 5lu && stdOp->input_value(0).get_shape().size() == stdOp->get_output_shape(0).size();
68         }
69
70         if (auto fc_op = std::dynamic_pointer_cast<const ngraph::op::FullyConnected>(node)) {
71             return fc_op->input_value(0).get_shape().size() == 3ul;
72         }
73
74         return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
75             std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
76             std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
77     };
78     auto nGraphFunc = clonedNetwork->getFunction();
79     // Disable shape inference (WA for generic operations)
80     ::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
81
82     // Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
83     ngraph::pass::Manager manager;
84     manager.register_pass<ngraph::pass::CommonOptimizations>();
85     manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
86     manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
87     manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
88
89     manager.set_callback(transformations_callback);
90     manager.run_passes(nGraphFunc);
91
92     clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
93 }
94
95 InferenceEngine::ExecutableNetworkInternal::Ptr
96 Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
97     // verification of supported input
98     InferenceEngine::InputsDataMap _networkInputs;
99     network.getInputsInfo(_networkInputs);
100     for (const auto &ii : _networkInputs) {
101         auto input_precision = ii.second->getPrecision();
102         if (input_precision != InferenceEngine::Precision::FP32 &&
103             input_precision != InferenceEngine::Precision::I32 &&
104             input_precision != InferenceEngine::Precision::U16 &&
105             input_precision != InferenceEngine::Precision::I16 &&
106             input_precision != InferenceEngine::Precision::I8 &&
107             input_precision != InferenceEngine::Precision::U8 &&
108             input_precision != InferenceEngine::Precision::BOOL) {
109             THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
110                                << "Input image format " << input_precision << " is not supported yet...";
111         }
112     }
113
114     // TODO: handle input precision differently - per input and not one per network...
115
116     // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
117     Config conf = engConfig;
118     conf.readProperties(config);
119
120     if (conf.enableDynamicBatch) {
121         conf.batchLimit = static_cast<int>(network.getBatchSize());
122     }
123
124     std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
125     if (clonedNetwork->getFunction()) {
126         Transformation(clonedNetwork);
127     }
128     auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(clonedNetwork);
129     if (implNetwork) {
130         // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
131         ConstTransformer transformator(implNetwork.get());
132         transformator.fullTrim();
133     }
134
135     return std::make_shared<MKLDNNExecNetwork>(*clonedNetwork, conf, extensionManager, weightsSharing);
136 }
137
138 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
139     // accumulate config parameters on engine level
140     engConfig.readProperties(config);
141 }
142
143 Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
144     Parameter result;
145     auto option = engConfig._config.find(name);
146     if (option != engConfig._config.end()) {
147         result = option->second;
148     } else {
149         THROW_IE_EXCEPTION << "Unsupported config key " << name;
150     }
151     return result;
152 }
153
154 static bool hasAVX512() {
155 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
156     unsigned int regs[4] = {7, 0, 0, 0};
157 #if defined(_WIN32) || defined(WIN32)
158     __cpuid(reinterpret_cast<int*>(regs), regs[0]);
159 #else
160     __cpuid_count(regs[0], regs[1], regs[0], regs[1], regs[2], regs[3]);
161 #endif
162     if (regs[1] & (1U << 16))
163         return true;
164 #endif
165     return false;
166 }
167
168 Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
169     if (name == METRIC_KEY(SUPPORTED_METRICS)) {
170         std::vector<std::string> metrics;
171         metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES));
172         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
173         metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
174         metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
175         metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
176         metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
177         metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
178         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
179     } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
180         std::string brand_string;
181 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
182         unsigned int addr_list[3] = { 0x80000002, 0x80000003, 0x80000004 };
183         unsigned int regs[4];
184         for (auto addr : addr_list) {
185             regs[0] = addr;
186 #if defined(_WIN32) || defined(WIN32)
187             __cpuid(reinterpret_cast<int*>(regs), regs[0]);
188 #else
189             __get_cpuid(regs[0], &regs[0], &regs[1], &regs[2], &regs[3]);
190 #endif
191             char *ch = reinterpret_cast<char*>(&regs[0]);
192             for (size_t j = 0; j < sizeof(regs); j++)
193                 brand_string += ch[j];
194         }
195 #else
196         brand_string = "Non Intel Architecture";
197 #endif
198         IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, brand_string);
199     } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
200         std::vector<std::string> availableDevices = { "" };
201         IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
202     } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
203         std::vector<std::string> capabilities;
204         if (with_cpu_x86_bfloat16())
205             capabilities.push_back(METRIC_VALUE(BF16));
206         if (hasAVX512())
207             capabilities.push_back(METRIC_VALUE(WINOGRAD));
208         capabilities.push_back(METRIC_VALUE(FP32));
209         capabilities.push_back(METRIC_VALUE(FP16));
210         capabilities.push_back(METRIC_VALUE(INT8));
211         capabilities.push_back(METRIC_VALUE(BIN));
212         IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
213     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
214         std::vector<std::string> configKeys;
215         for (auto && opt : engConfig._config)
216             configKeys.push_back(opt.first);
217         IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
218     } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
219         std::tuple<unsigned int, unsigned int, unsigned int> range = std::make_tuple(1, 1, 1);
220         IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range);
221     } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
222         std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
223         IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
224     } else {
225         THROW_IE_EXCEPTION << "Unsupported metric key " << name;
226     }
227 }
228
229 void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
230     extensionManager->AddExtension(extension);
231 }
232
233 void Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config, QueryNetworkResult& res) const {
234     MKLDNNWeightsSharing::Ptr fake_w_cache;
235     auto function = network.getFunction();
236     if (function != nullptr) {
237         std::unordered_set<std::string> originalOps;
238         for (auto&& node : function->get_ops()) {
239             if (!ngraph::op::is_constant(node) && !ngraph::op::is_parameter(node) && !ngraph::op::is_output(node)) {
240                 originalOps.emplace(node->get_friendly_name());
241             }
242         }
243         auto clonedNetwork = cloneNetwork(network);
244         Transformation(clonedNetwork);
245         std::unordered_set<std::string> supported;
246         std::unordered_set<std::string> unsupported;
247         for (details::CNNNetworkIterator itLayer{clonedNetwork.get()}; itLayer != details::CNNNetworkIterator(); itLayer++) {
248             auto layerIsSupported = [&] {
249                 std::unique_ptr<MKLDNNNode> ptr;
250                 try {
251                     ptr.reset(MKLDNNNode::CreateNode(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
252                 } catch (InferenceEngine::details::InferenceEngineException&) {
253                      return false;
254                 }
255                 return true;
256             } ();
257             for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
258                 if (contains(originalOps, fusedLayerName)) {
259                     if (layerIsSupported) {
260                         supported.emplace(fusedLayerName);
261                     } else {
262                         unsupported.emplace(fusedLayerName);
263                     }
264                 }
265             }
266         }
267         for (auto&& layerName : supported) {
268             if (!contains(unsupported, layerName)) {
269                 res.supportedLayersMap.emplace(layerName, GetName());
270             }
271         }
272     } else {
273         details::CNNNetworkIterator i(&network);
274         while (i != details::CNNNetworkIterator()) {
275             try {
276                 mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
277                 // if we can create and have not thrown exception, then layer is supported
278                 std::unique_ptr <MKLDNNNode>(MKLDNNNode::CreateNode(*i, eng, extensionManager, fake_w_cache));
279                 res.supportedLayersMap.insert({ (*i)->name, GetName() });
280             } catch (InferenceEngine::details::InferenceEngineException&) {
281             }
282             i++;
283         }
284     }
285 }
286
287 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin*& plugin, ResponseDesc *resp) noexcept {
288     try {
289         plugin = make_ie_compatible_plugin(
290                 {{2, 1},
291                  CI_BUILD_NUMBER,
292                  "MKLDNNPlugin"}, std::make_shared<Engine>());
293         return OK;
294     }
295     catch (std::exception &ex) {
296         return DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
297     }
298 }