inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp

   1 // Copyright (C) 2018-2020 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "ie_metric_helpers.hpp"
   6 #include "mkldnn_plugin.h"
   7 #include "mkldnn_extension_mngr.h"
   8 #include "mkldnn_weights_cache.hpp"
   9 #include <cpp_interfaces/base/ie_plugin_base.hpp>
  10 #include <threading/ie_executor_manager.hpp>
  11 #include <memory>
  12 #include <ie_plugin_config.hpp>
  13 #include <vector>
  14 #include <tuple>
  15 #include <ie_system_conf.h>
  16 #include <generic_ie.hpp>
  17 #include <nodes/list.hpp>
  18 #include <ie_util_internal.hpp>
  19 #include <graph_transformer.h>
  20
  21 #include "convert_function_to_cnn_network.hpp"
  22 #include <transformations/common_optimizations/common_optimizations.hpp>
  23 #include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
  24 #include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
  25 #include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
  26 #include <transformations/rt_info/fused_names_attribute.hpp>
  27 #include <ngraph/opsets/opset1.hpp>
  28 #include <ngraph/opsets/opset2.hpp>
  29 #include <ngraph/opsets/opset3.hpp>
  30 #include <ngraph/op/fused/gelu.hpp>
  31 #include <ngraph/op/util/op_types.hpp>
  32 #include <ngraph/pass/manager.hpp>
  33 #include "ngraph_ops/fully_connected.hpp"
  34
  35 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
  36 #if defined(_WIN32) || defined(WIN32)
  37 #include <intrin.h>
  38 #include <windows.h>
  39 #else
  40 #include <cpuid.h>
  41
  42 #endif
  43 #endif
  44
  45 using namespace MKLDNNPlugin;
  46 using namespace InferenceEngine;
  47
  48 Engine::Engine() {
  49     _pluginName = "CPU";
  50     extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
  51 }
  52
  53 Engine::~Engine() {
  54     ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
  55     ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
  56 }
  57
  58 static void Transformation(ICNNNetwork::Ptr& clonedNetwork) {
  59     const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
  60         // DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
  61         if (auto dtsOp = std::dynamic_pointer_cast<const ::ngraph::opset3::DepthToSpace>(node)) {
  62             return dtsOp->input_value(0).get_shape().size() <= 5lu && dtsOp->input_value(0).get_shape().size() == dtsOp->get_output_shape(0).size();
  63         }
  64
  65         // SpaceToDepth node implementation supports only equal input/output tensors with rank <= 5
  66         if (auto stdOp = std::dynamic_pointer_cast<const ::ngraph::opset3::SpaceToDepth>(node)) {
  67             return stdOp->input_value(0).get_shape().size() <= 5lu && stdOp->input_value(0).get_shape().size() == stdOp->get_output_shape(0).size();
  68         }
  69
  70         if (auto fc_op = std::dynamic_pointer_cast<const ngraph::op::FullyConnected>(node)) {
  71             return fc_op->input_value(0).get_shape().size() == 3ul;
  72         }
  73
  74         return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
  75             std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
  76             std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
  77     };
  78     auto nGraphFunc = clonedNetwork->getFunction();
  79     // Disable shape inference (WA for generic operations)
  80     ::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
  81
  82     // Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
  83     ngraph::pass::Manager manager;
  84     manager.register_pass<ngraph::pass::CommonOptimizations>();
  85     manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
  86     manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
  87     manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
  88
  89     manager.set_callback(transformations_callback);
  90     manager.run_passes(nGraphFunc);
  91
  92     clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
  93 }
  94
  95 InferenceEngine::ExecutableNetworkInternal::Ptr
  96 Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
  97     // verification of supported input
  98     InferenceEngine::InputsDataMap _networkInputs;
  99     network.getInputsInfo(_networkInputs);
 100     for (const auto &ii : _networkInputs) {
 101         auto input_precision = ii.second->getPrecision();
 102         if (input_precision != InferenceEngine::Precision::FP32 &&
 103             input_precision != InferenceEngine::Precision::I32 &&
 104             input_precision != InferenceEngine::Precision::U16 &&
 105             input_precision != InferenceEngine::Precision::I16 &&
 106             input_precision != InferenceEngine::Precision::I8 &&
 107             input_precision != InferenceEngine::Precision::U8 &&
 108             input_precision != InferenceEngine::Precision::BOOL) {
 109             THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
 110                                << "Input image format " << input_precision << " is not supported yet...";
 111         }
 112     }
 113
 114     // TODO: handle input precision differently - per input and not one per network...
 115
 116     // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
 117     Config conf = engConfig;
 118     conf.readProperties(config);
 119
 120     if (conf.enableDynamicBatch) {
 121         conf.batchLimit = static_cast<int>(network.getBatchSize());
 122     }
 123
 124     std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
 125     if (clonedNetwork->getFunction()) {
 126         Transformation(clonedNetwork);
 127     }
 128     auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(clonedNetwork);
 129     if (implNetwork) {
 130         // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
 131         ConstTransformer transformator(implNetwork.get());
 132         transformator.fullTrim();
 133     }
 134
 135     return std::make_shared<MKLDNNExecNetwork>(*clonedNetwork, conf, extensionManager, weightsSharing);
 136 }
 137
 138 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
 139     // accumulate config parameters on engine level
 140     engConfig.readProperties(config);
 141 }
 142
 143 Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
 144     Parameter result;
 145     auto option = engConfig._config.find(name);
 146     if (option != engConfig._config.end()) {
 147         result = option->second;
 148     } else {
 149         THROW_IE_EXCEPTION << "Unsupported config key " << name;
 150     }
 151     return result;
 152 }
 153
 154 static bool hasAVX512() {
 155 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 156     unsigned int regs[4] = {7, 0, 0, 0};
 157 #if defined(_WIN32) || defined(WIN32)
 158     __cpuid(reinterpret_cast<int*>(regs), regs[0]);
 159 #else
 160     __cpuid_count(regs[0], regs[1], regs[0], regs[1], regs[2], regs[3]);
 161 #endif
 162     if (regs[1] & (1U << 16))
 163         return true;
 164 #endif
 165     return false;
 166 }
 167
 168 Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
 169     if (name == METRIC_KEY(SUPPORTED_METRICS)) {
 170         std::vector<std::string> metrics;
 171         metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES));
 172         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
 173         metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
 174         metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
 175         metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
 176         metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
 177         metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
 178         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
 179     } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
 180         std::string brand_string;
 181 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 182         unsigned int addr_list[3] = { 0x80000002, 0x80000003, 0x80000004 };
 183         unsigned int regs[4];
 184         for (auto addr : addr_list) {
 185             regs[0] = addr;
 186 #if defined(_WIN32) || defined(WIN32)
 187             __cpuid(reinterpret_cast<int*>(regs), regs[0]);
 188 #else
 189             __get_cpuid(regs[0], &regs[0], &regs[1], &regs[2], &regs[3]);
 190 #endif
 191             char *ch = reinterpret_cast<char*>(&regs[0]);
 192             for (size_t j = 0; j < sizeof(regs); j++)
 193                 brand_string += ch[j];
 194         }
 195 #else
 196         brand_string = "Non Intel Architecture";
 197 #endif
 198         IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, brand_string);
 199     } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
 200         std::vector<std::string> availableDevices = { "" };
 201         IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
 202     } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
 203         std::vector<std::string> capabilities;
 204         if (with_cpu_x86_bfloat16())
 205             capabilities.push_back(METRIC_VALUE(BF16));
 206         if (hasAVX512())
 207             capabilities.push_back(METRIC_VALUE(WINOGRAD));
 208         capabilities.push_back(METRIC_VALUE(FP32));
 209         capabilities.push_back(METRIC_VALUE(FP16));
 210         capabilities.push_back(METRIC_VALUE(INT8));
 211         capabilities.push_back(METRIC_VALUE(BIN));
 212         IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
 213     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
 214         std::vector<std::string> configKeys;
 215         for (auto && opt : engConfig._config)
 216             configKeys.push_back(opt.first);
 217         IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
 218     } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
 219         std::tuple<unsigned int, unsigned int, unsigned int> range = std::make_tuple(1, 1, 1);
 220         IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range);
 221     } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
 222         std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
 223         IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
 224     } else {
 225         THROW_IE_EXCEPTION << "Unsupported metric key " << name;
 226     }
 227 }
 228
 229 void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
 230     extensionManager->AddExtension(extension);
 231 }
 232
 233 void Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config, QueryNetworkResult& res) const {
 234     MKLDNNWeightsSharing::Ptr fake_w_cache;
 235     auto function = network.getFunction();
 236     if (function != nullptr) {
 237         std::unordered_set<std::string> originalOps;
 238         for (auto&& node : function->get_ops()) {
 239             if (!ngraph::op::is_constant(node) && !ngraph::op::is_parameter(node) && !ngraph::op::is_output(node)) {
 240                 originalOps.emplace(node->get_friendly_name());
 241             }
 242         }
 243         auto clonedNetwork = cloneNetwork(network);
 244         Transformation(clonedNetwork);
 245         std::unordered_set<std::string> supported;
 246         std::unordered_set<std::string> unsupported;
 247         for (details::CNNNetworkIterator itLayer{clonedNetwork.get()}; itLayer != details::CNNNetworkIterator(); itLayer++) {
 248             auto layerIsSupported = [&] {
 249                 std::unique_ptr<MKLDNNNode> ptr;
 250                 try {
 251                     ptr.reset(MKLDNNNode::CreateNode(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
 252                 } catch (InferenceEngine::details::InferenceEngineException&) {
 253                      return false;
 254                 }
 255                 return true;
 256             } ();
 257             for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
 258                 if (contains(originalOps, fusedLayerName)) {
 259                     if (layerIsSupported) {
 260                         supported.emplace(fusedLayerName);
 261                     } else {
 262                         unsupported.emplace(fusedLayerName);
 263                     }
 264                 }
 265             }
 266         }
 267         for (auto&& layerName : supported) {
 268             if (!contains(unsupported, layerName)) {
 269                 res.supportedLayersMap.emplace(layerName, GetName());
 270             }
 271         }
 272     } else {
 273         details::CNNNetworkIterator i(&network);
 274         while (i != details::CNNNetworkIterator()) {
 275             try {
 276                 mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
 277                 // if we can create and have not thrown exception, then layer is supported
 278                 std::unique_ptr <MKLDNNNode>(MKLDNNNode::CreateNode(*i, eng, extensionManager, fake_w_cache));
 279                 res.supportedLayersMap.insert({ (*i)->name, GetName() });
 280             } catch (InferenceEngine::details::InferenceEngineException&) {
 281             }
 282             i++;
 283         }
 284     }
 285 }
 286
 287 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin*& plugin, ResponseDesc *resp) noexcept {
 288     try {
 289         plugin = make_ie_compatible_plugin(
 290                 {{2, 1},
 291                  CI_BUILD_NUMBER,
 292                  "MKLDNNPlugin"}, std::make_shared<Engine>());
 293         return OK;
 294     }
 295     catch (std::exception &ex) {
 296         return DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
 297     }
 298 }