1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ie_metric_helpers.hpp"
6 #include "mkldnn_plugin.h"
7 #include "mkldnn_extension_mngr.h"
8 #include "mkldnn_weights_cache.hpp"
9 #include <cpp_interfaces/base/ie_plugin_base.hpp>
10 #include <threading/ie_executor_manager.hpp>
12 #include <ie_plugin_config.hpp>
15 #include <ie_system_conf.h>
16 #include <generic_ie.hpp>
17 #include <nodes/list.hpp>
18 #include <ie_util_internal.hpp>
19 #include <graph_transformer.h>
21 #include "convert_function_to_cnn_network.hpp"
22 #include <transformations/common_optimizations/common_optimizations.hpp>
23 #include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
24 #include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
25 #include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
26 #include <transformations/rt_info/fused_names_attribute.hpp>
27 #include <ngraph/opsets/opset1.hpp>
28 #include <ngraph/opsets/opset2.hpp>
29 #include <ngraph/opsets/opset3.hpp>
30 #include <ngraph/op/fused/gelu.hpp>
31 #include <ngraph/op/util/op_types.hpp>
32 #include <ngraph/pass/manager.hpp>
33 #include "ngraph_ops/fully_connected.hpp"
35 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
36 #if defined(_WIN32) || defined(WIN32)
45 using namespace MKLDNNPlugin;
46 using namespace InferenceEngine;
50 extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
54 ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
55 ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
58 static void Transformation(ICNNNetwork::Ptr& clonedNetwork) {
59 const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
60 // DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
61 if (auto dtsOp = std::dynamic_pointer_cast<const ::ngraph::opset3::DepthToSpace>(node)) {
62 return dtsOp->input_value(0).get_shape().size() <= 5lu && dtsOp->input_value(0).get_shape().size() == dtsOp->get_output_shape(0).size();
65 // SpaceToDepth node implementation supports only equal input/output tensors with rank <= 5
66 if (auto stdOp = std::dynamic_pointer_cast<const ::ngraph::opset3::SpaceToDepth>(node)) {
67 return stdOp->input_value(0).get_shape().size() <= 5lu && stdOp->input_value(0).get_shape().size() == stdOp->get_output_shape(0).size();
70 if (auto fc_op = std::dynamic_pointer_cast<const ngraph::op::FullyConnected>(node)) {
71 return fc_op->input_value(0).get_shape().size() == 3ul;
74 return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
75 std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
76 std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
78 auto nGraphFunc = clonedNetwork->getFunction();
79 // Disable shape inference (WA for generic operations)
80 ::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
82 // Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
83 ngraph::pass::Manager manager;
84 manager.register_pass<ngraph::pass::CommonOptimizations>();
85 manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
86 manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
87 manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
89 manager.set_callback(transformations_callback);
90 manager.run_passes(nGraphFunc);
92 clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
95 InferenceEngine::ExecutableNetworkInternal::Ptr
96 Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
97 // verification of supported input
98 InferenceEngine::InputsDataMap _networkInputs;
99 network.getInputsInfo(_networkInputs);
100 for (const auto &ii : _networkInputs) {
101 auto input_precision = ii.second->getPrecision();
102 if (input_precision != InferenceEngine::Precision::FP32 &&
103 input_precision != InferenceEngine::Precision::I32 &&
104 input_precision != InferenceEngine::Precision::U16 &&
105 input_precision != InferenceEngine::Precision::I16 &&
106 input_precision != InferenceEngine::Precision::I8 &&
107 input_precision != InferenceEngine::Precision::U8 &&
108 input_precision != InferenceEngine::Precision::BOOL) {
109 THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
110 << "Input image format " << input_precision << " is not supported yet...";
114 // TODO: handle input precision differently - per input and not one per network...
116 // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
117 Config conf = engConfig;
118 conf.readProperties(config);
120 if (conf.enableDynamicBatch) {
121 conf.batchLimit = static_cast<int>(network.getBatchSize());
124 std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
125 if (clonedNetwork->getFunction()) {
126 Transformation(clonedNetwork);
128 auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(clonedNetwork);
130 // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
131 ConstTransformer transformator(implNetwork.get());
132 transformator.fullTrim();
135 return std::make_shared<MKLDNNExecNetwork>(*clonedNetwork, conf, extensionManager, weightsSharing);
138 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
139 // accumulate config parameters on engine level
140 engConfig.readProperties(config);
143 Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
145 auto option = engConfig._config.find(name);
146 if (option != engConfig._config.end()) {
147 result = option->second;
149 THROW_IE_EXCEPTION << "Unsupported config key " << name;
154 static bool hasAVX512() {
155 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
156 unsigned int regs[4] = {7, 0, 0, 0};
157 #if defined(_WIN32) || defined(WIN32)
158 __cpuid(reinterpret_cast<int*>(regs), regs[0]);
160 __cpuid_count(regs[0], regs[1], regs[0], regs[1], regs[2], regs[3]);
162 if (regs[1] & (1U << 16))
168 Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
169 if (name == METRIC_KEY(SUPPORTED_METRICS)) {
170 std::vector<std::string> metrics;
171 metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES));
172 metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
173 metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
174 metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
175 metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
176 metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
177 metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
178 IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
179 } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
180 std::string brand_string;
181 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
182 unsigned int addr_list[3] = { 0x80000002, 0x80000003, 0x80000004 };
183 unsigned int regs[4];
184 for (auto addr : addr_list) {
186 #if defined(_WIN32) || defined(WIN32)
187 __cpuid(reinterpret_cast<int*>(regs), regs[0]);
189 __get_cpuid(regs[0], ®s[0], ®s[1], ®s[2], ®s[3]);
191 char *ch = reinterpret_cast<char*>(®s[0]);
192 for (size_t j = 0; j < sizeof(regs); j++)
193 brand_string += ch[j];
196 brand_string = "Non Intel Architecture";
198 IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, brand_string);
199 } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
200 std::vector<std::string> availableDevices = { "" };
201 IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
202 } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
203 std::vector<std::string> capabilities;
204 if (with_cpu_x86_bfloat16())
205 capabilities.push_back(METRIC_VALUE(BF16));
207 capabilities.push_back(METRIC_VALUE(WINOGRAD));
208 capabilities.push_back(METRIC_VALUE(FP32));
209 capabilities.push_back(METRIC_VALUE(FP16));
210 capabilities.push_back(METRIC_VALUE(INT8));
211 capabilities.push_back(METRIC_VALUE(BIN));
212 IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
213 } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
214 std::vector<std::string> configKeys;
215 for (auto && opt : engConfig._config)
216 configKeys.push_back(opt.first);
217 IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
218 } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
219 std::tuple<unsigned int, unsigned int, unsigned int> range = std::make_tuple(1, 1, 1);
220 IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range);
221 } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
222 std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
223 IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
225 THROW_IE_EXCEPTION << "Unsupported metric key " << name;
229 void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
230 extensionManager->AddExtension(extension);
233 void Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config, QueryNetworkResult& res) const {
234 MKLDNNWeightsSharing::Ptr fake_w_cache;
235 auto function = network.getFunction();
236 if (function != nullptr) {
237 std::unordered_set<std::string> originalOps;
238 for (auto&& node : function->get_ops()) {
239 if (!ngraph::op::is_constant(node) && !ngraph::op::is_parameter(node) && !ngraph::op::is_output(node)) {
240 originalOps.emplace(node->get_friendly_name());
243 auto clonedNetwork = cloneNetwork(network);
244 Transformation(clonedNetwork);
245 std::unordered_set<std::string> supported;
246 std::unordered_set<std::string> unsupported;
247 for (details::CNNNetworkIterator itLayer{clonedNetwork.get()}; itLayer != details::CNNNetworkIterator(); itLayer++) {
248 auto layerIsSupported = [&] {
249 std::unique_ptr<MKLDNNNode> ptr;
251 ptr.reset(MKLDNNNode::CreateNode(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
252 } catch (InferenceEngine::details::InferenceEngineException&) {
257 for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
258 if (contains(originalOps, fusedLayerName)) {
259 if (layerIsSupported) {
260 supported.emplace(fusedLayerName);
262 unsupported.emplace(fusedLayerName);
267 for (auto&& layerName : supported) {
268 if (!contains(unsupported, layerName)) {
269 res.supportedLayersMap.emplace(layerName, GetName());
273 details::CNNNetworkIterator i(&network);
274 while (i != details::CNNNetworkIterator()) {
276 mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
277 // if we can create and have not thrown exception, then layer is supported
278 std::unique_ptr <MKLDNNNode>(MKLDNNNode::CreateNode(*i, eng, extensionManager, fake_w_cache));
279 res.supportedLayersMap.insert({ (*i)->name, GetName() });
280 } catch (InferenceEngine::details::InferenceEngineException&) {
287 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin*& plugin, ResponseDesc *resp) noexcept {
289 plugin = make_ie_compatible_plugin(
292 "MKLDNNPlugin"}, std::make_shared<Engine>());
295 catch (std::exception &ex) {
296 return DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();