inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp

   1 // Copyright (C) 2018-2020 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "ie_metric_helpers.hpp"
   6 #include "mkldnn_plugin.h"
   7 #include "mkldnn_extension_mngr.h"
   8 #include "mkldnn_weights_cache.hpp"
   9 #include "mkldnn_itt.h"
  10
  11 #include <legacy/net_pass.h>
  12 #include <threading/ie_executor_manager.hpp>
  13 #include <memory>
  14 #include <ie_plugin_config.hpp>
  15 #include <vector>
  16 #include <tuple>
  17 #include <ie_system_conf.h>
  18 #include <generic_ie.hpp>
  19 #include <nodes/list.hpp>
  20 #include <legacy/ie_util_internal.hpp>
  21 #include <legacy/graph_transformer.h>
  22 #include <legacy/ie_ngraph_utils.hpp>
  23
  24 #include <legacy/convert_function_to_cnn_network.hpp>
  25 #include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
  26 #include <legacy/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
  27 #include <legacy/transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
  28 #include <legacy/ngraph_ops/fully_connected.hpp>
  29
  30 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
  31 #include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
  32
  33 #include <transformations/common_optimizations/common_optimizations.hpp>
  34 #include <transformations/common_optimizations/depth_to_space_fusion.hpp>
  35 #include <transformations/op_conversions/convert_depth_to_space.hpp>
  36 #include <transformations/op_conversions/convert_space_to_depth.hpp>
  37 #include <transformations/op_conversions/convert_gelu.hpp>
  38 #include <transformations/op_conversions/hswish_decomposition.hpp>
  39 #include <transformations/op_conversions/hsigmoid_decomposition.hpp>
  40 #include <transformations/op_conversions/reduce_l1_decomposition.hpp>
  41 #include <transformations/op_conversions/reduce_l2_decomposition.hpp>
  42 #include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
  43 #include <transformations/op_conversions/softplus_decomposition.hpp>
  44 #include <transformations/op_conversions/convert_space_to_batch.hpp>
  45 #include <transformations/op_conversions/convert_batch_to_space.hpp>
  46 #include <transformations/op_conversions/convert_mod.hpp>
  47 #include <transformations/convert_precision.hpp>
  48 #include <transformations/init_node_info.hpp>
  49 #include <transformations/rt_info/fused_names_attribute.hpp>
  50
  51 #include <ngraph/opsets/opset2.hpp>
  52 #include <ngraph/opsets/opset3.hpp>
  53 #include <ngraph/opsets/opset4.hpp>
  54 #include <ngraph/op/util/op_types.hpp>
  55 #include <ngraph/pass/manager.hpp>
  56
  57 #include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
  58
  59 #ifndef USE_CNNNETWORK_LPT
  60 # include <low_precision/transformer.hpp>
  61 # include <low_precision/convolution.hpp>
  62 # include <low_precision/group_convolution.hpp>
  63 # include <low_precision/multiply_to_group_convolution.hpp>
  64 #endif
  65
  66 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
  67 #if defined(_WIN32) || defined(WIN32)
  68 #include <intrin.h>
  69 #include <windows.h>
  70 #else
  71 #include <cpuid.h>
  72
  73 #endif
  74 #endif
  75
  76 using namespace MKLDNNPlugin;
  77 using namespace InferenceEngine;
  78
  79 Engine::Engine() {
  80     _pluginName = "CPU";
  81     extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
  82 }
  83
  84 Engine::~Engine() {
  85     ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
  86     ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
  87 }
  88
  89 static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf) {
  90     OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNNPlugin, "Transformation");
  91
  92     auto nGraphFunc = clonedNetwork->getFunction();
  93     // Disable shape inference (WA for generic operations)
  94     ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
  95
  96     ngraph::pass::Manager manager;
  97     manager.register_pass<ngraph::pass::InitNodeInfo>();
  98     // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
  99     manager.register_pass<ngraph::pass::ConvertPriorBox>();
 100     manager.register_pass<ngraph::pass::CommonOptimizations>();
 101     manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
 102     manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
 103
 104     std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list {
 105             {ngraph::element::i64, ngraph::element::i32},
 106             {ngraph::element::u64, ngraph::element::i32},
 107             {ngraph::element::u16, ngraph::element::i32},
 108             {ngraph::element::u32, ngraph::element::i32},
 109             {ngraph::element::f16, ngraph::element::f32},
 110             {ngraph::element::boolean, ngraph::element::u8},
 111     };
 112
 113     for (auto & precision : convert_precision_list) {
 114         manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
 115     }
 116
 117     auto pass_config = manager.get_pass_config();
 118
 119     using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
 120
 121     // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
 122     pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
 123                               ngraph::pass::ConvertDepthToSpace>(
 124             [](const_node_ptr &node) -> bool {
 125                 return node->input_value(0).get_shape().size() <= 5lu &&
 126                        node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
 127             });
 128
 129     // Disable FC reshaping for 3D case
 130     pass_config->set_callback<ngraph::pass::ReshapeFullyConnected>(
 131             [](const_node_ptr &node) -> bool {
 132                 return node->input_value(0).get_shape().size() == 3ul;
 133             });
 134
 135     pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
 136                               ngraph::pass::ConvertSpaceToBatch>(
 137             [](const_node_ptr &node) -> bool {
 138                 const auto & rank = node->input(0).get_partial_shape().rank().get_length();
 139                 return rank == 4lu || rank == 5lu;
 140             });
 141
 142     // List of enabled/disabled transformations
 143     pass_config->disable<ngraph::pass::ConvertGELU>();
 144     pass_config->disable<ngraph::pass::HSwishDecomposition>();
 145     pass_config->disable<ngraph::pass::ReduceL1Decomposition>();
 146     pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
 147     pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
 148     pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
 149     pass_config->disable<ngraph::pass::ConvertMod>();
 150
 151     pass_config->enable<ngraph::pass::ConvertPadToGroupConvolution>();
 152
 153     manager.run_passes(nGraphFunc);
 154
 155 #ifndef USE_CNNNETWORK_LPT
 156     using namespace ngraph::pass::low_precision;
 157     if (conf.lpTransformsMode == Config::LPTransformsMode::On) {
 158         auto params = LayerTransformation::Params(
 159             true,  // updatePrecisions
 160             LayerTransformation::QuantizedTensorAlignment::UpdateLevel,  // quantizedTensorAlignmentOnActivations
 161             LayerTransformation::QuantizedTensorAlignment::None,  // quantizedTensorAlignmentOnWeights
 162             true);  // supportAsymmetricQuantization
 163         LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
 164             .add<ConvolutionTransformation, ngraph::opset1::Convolution>(
 165                 LayerTransformation::Params(params).setPrecisionsOnActivations({ngraph::element::u8}).setSupportAsymmetricQuantization(true))
 166             .add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
 167                 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
 168             .addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
 169                 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })));
 170
 171         transformer.transform(nGraphFunc);
 172     }
 173 #endif
 174
 175     ngraph::pass::Manager legacyManager;
 176     legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
 177     legacyManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
 178
 179     auto legacyPassConfig = manager.get_pass_config();
 180     legacyPassConfig->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
 181         if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
 182             auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
 183             auto constant = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(mul_op->get_input_node_shared_ptr(1));
 184             bool is_dequantization = mul_op->get_rt_info().count("DEQUANTIZATION") != 0;
 185             if (add_op && constant && is_dequantization) {
 186                 return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
 187                     ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
 188                     ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
 189             }
 190         }
 191         return false;
 192     });
 193
 194     legacyManager.run_passes(nGraphFunc);
 195
 196     clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
 197
 198     // WA: after conversion to CNNNetwork user precision can redefine input/output precisions
 199     // so we need to apply additional precision conversion but only for inputs and outputs
 200     for (auto & precision : convert_precision_list) {
 201         NetPass::ConvertIOPrecision(*clonedNetwork, convertPrecision(precision.first), convertPrecision(precision.second));
 202     }
 203 }
 204
 205 InferenceEngine::ExecutableNetworkInternal::Ptr
 206 Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
 207     OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
 208
 209     // verification of supported input
 210     InferenceEngine::InputsDataMap _networkInputs;
 211     network.getInputsInfo(_networkInputs);
 212     for (const auto &ii : _networkInputs) {
 213         auto input_precision = ii.second->getPrecision();
 214         if (input_precision != InferenceEngine::Precision::FP32 &&
 215             input_precision != InferenceEngine::Precision::I32 &&
 216             input_precision != InferenceEngine::Precision::U16 &&
 217             input_precision != InferenceEngine::Precision::I16 &&
 218             input_precision != InferenceEngine::Precision::I8 &&
 219             input_precision != InferenceEngine::Precision::U8 &&
 220             input_precision != InferenceEngine::Precision::BOOL) {
 221             THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
 222                                << "Input image format " << input_precision << " is not supported yet...";
 223         }
 224     }
 225
 226     // TODO: handle input precision differently - per input and not one per network...
 227
 228     // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
 229     Config conf = engConfig;
 230     conf.readProperties(config);
 231
 232     if (conf.enableDynamicBatch) {
 233         conf.batchLimit = static_cast<int>(network.getBatchSize());
 234     }
 235
 236     std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
 237     bool is_transformed = false;
 238     if (clonedNetwork->getFunction()) {
 239         Transformation(clonedNetwork, conf);
 240         is_transformed = true;
 241     }
 242     auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(clonedNetwork);
 243     if (implNetwork) {
 244         // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
 245         ConstTransformer transformator(implNetwork.get());
 246         transformator.fullTrim();
 247         if (!is_transformed) {
 248             NetPass::ConvertPrecision(*implNetwork, Precision::I64, Precision::I32);
 249             NetPass::ConvertPrecision(*implNetwork, Precision::U64, Precision::I32);
 250             NetPass::ConvertPrecision(*implNetwork, Precision::U32, Precision::I32);
 251             NetPass::ConvertPrecision(*implNetwork, Precision::FP16, Precision::FP32);
 252             NetPass::ConvertPrecision(*implNetwork, Precision::BOOL, Precision::U8);
 253             NetPass::ConvertPrecision(*implNetwork, Precision::U16, Precision::I32);
 254         }
 255     }
 256
 257     return std::make_shared<MKLDNNExecNetwork>(*clonedNetwork, conf, extensionManager, weightsSharing);
 258 }
 259
 260 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
 261     // accumulate config parameters on engine level
 262     engConfig.readProperties(config);
 263 }
 264
 265 Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
 266     Parameter result;
 267     auto option = engConfig._config.find(name);
 268     if (option != engConfig._config.end()) {
 269         result = option->second;
 270     } else {
 271         THROW_IE_EXCEPTION << "Unsupported config key " << name;
 272     }
 273     return result;
 274 }
 275
 276 static bool hasAVX512() {
 277 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 278     unsigned int regs[4] = {7, 0, 0, 0};
 279 #if defined(_WIN32) || defined(WIN32)
 280     __cpuid(reinterpret_cast<int*>(regs), regs[0]);
 281 #else
 282     __cpuid_count(regs[0], regs[1], regs[0], regs[1], regs[2], regs[3]);
 283 #endif
 284     if (regs[1] & (1U << 16))
 285         return true;
 286 #endif
 287     return false;
 288 }
 289
 290 Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
 291     if (name == METRIC_KEY(SUPPORTED_METRICS)) {
 292         std::vector<std::string> metrics;
 293         metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES));
 294         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
 295         metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
 296         metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
 297         metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
 298         metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
 299         metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
 300         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
 301     } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
 302         std::string brand_string;
 303 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 304         unsigned int addr_list[3] = { 0x80000002, 0x80000003, 0x80000004 };
 305         unsigned int regs[4];
 306         for (auto addr : addr_list) {
 307             regs[0] = addr;
 308 #if defined(_WIN32) || defined(WIN32)
 309             __cpuid(reinterpret_cast<int*>(regs), regs[0]);
 310 #else
 311             __get_cpuid(regs[0], &regs[0], &regs[1], &regs[2], &regs[3]);
 312 #endif
 313             char *ch = reinterpret_cast<char*>(&regs[0]);
 314             for (size_t j = 0; j < sizeof(regs); j++)
 315                 brand_string += ch[j];
 316         }
 317 #else
 318         brand_string = "Non Intel Architecture";
 319 #endif
 320         IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, brand_string);
 321     } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
 322         std::vector<std::string> availableDevices = { "" };
 323         IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
 324     } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
 325         std::vector<std::string> capabilities;
 326         if (with_cpu_x86_bfloat16())
 327             capabilities.push_back(METRIC_VALUE(BF16));
 328         if (hasAVX512())
 329             capabilities.push_back(METRIC_VALUE(WINOGRAD));
 330         capabilities.push_back(METRIC_VALUE(FP32));
 331         capabilities.push_back(METRIC_VALUE(FP16));
 332         capabilities.push_back(METRIC_VALUE(INT8));
 333         capabilities.push_back(METRIC_VALUE(BIN));
 334         IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
 335     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
 336         std::vector<std::string> configKeys;
 337         for (auto && opt : engConfig._config)
 338             configKeys.push_back(opt.first);
 339         IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
 340     } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
 341         std::tuple<unsigned int, unsigned int, unsigned int> range = std::make_tuple(1, 1, 1);
 342         IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range);
 343     } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
 344         std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
 345         IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
 346     } else {
 347         THROW_IE_EXCEPTION << "Unsupported metric key " << name;
 348     }
 349 }
 350
 351 void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
 352     extensionManager->AddExtension(extension);
 353 }
 354
 355 QueryNetworkResult Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config) const {
 356     QueryNetworkResult res;
 357     MKLDNNWeightsSharing::Ptr fake_w_cache;
 358     auto function = network.getFunction();
 359     if (function != nullptr) {
 360         std::unordered_set<std::string> originalOps;
 361         for (auto&& node : function->get_ops()) {
 362             originalOps.emplace(node->get_friendly_name());
 363         }
 364
 365         // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
 366         Config conf = engConfig;
 367         conf.readProperties(config);
 368
 369         if (conf.enableDynamicBatch) {
 370             conf.batchLimit = static_cast<int>(network.getBatchSize());
 371         }
 372
 373         auto clonedNetwork = cloneNetwork(network);
 374         Transformation(clonedNetwork, conf);
 375         std::unordered_set<std::string> supported;
 376         std::unordered_set<std::string> unsupported;
 377         for (details::CNNNetworkIterator itLayer{clonedNetwork.get()}; itLayer != details::CNNNetworkIterator(); itLayer++) {
 378             auto layerIsSupported = [&] {
 379                 std::unique_ptr<MKLDNNNode> ptr;
 380                 try {
 381                     ptr.reset(MKLDNNNode::factory().create(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
 382                 } catch (InferenceEngine::details::InferenceEngineException&) {
 383                      return false;
 384                 }
 385                 return true;
 386             } ();
 387             for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
 388                 if (contains(originalOps, fusedLayerName)) {
 389                     if (layerIsSupported) {
 390                         supported.emplace(fusedLayerName);
 391                     } else {
 392                         unsupported.emplace(fusedLayerName);
 393                     }
 394                 }
 395             }
 396         }
 397
 398         for (auto&& node : function->get_ops()) {
 399             if (!contains(unsupported, node->get_friendly_name())) {
 400                 for (auto&& inputNodeOutput : node->input_values()) {
 401                     if (ngraph::op::is_constant(inputNodeOutput.get_node())) {
 402                         supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
 403                     }
 404                 }
 405                 for (auto&& outputs : node->outputs()) {
 406                     for (auto&& outputNodeInput : outputs.get_target_inputs()) {
 407                         if (ngraph::op::is_output(outputNodeInput.get_node())) {
 408                             supported.emplace(outputNodeInput.get_node()->get_friendly_name());
 409                         }
 410                     }
 411                 }
 412             }
 413         }
 414
 415         for (auto&& layerName : supported) {
 416             if (!contains(unsupported, layerName)) {
 417                 res.supportedLayersMap.emplace(layerName, GetName());
 418             }
 419         }
 420     } else {
 421         details::CNNNetworkIterator i(&network);
 422         while (i != details::CNNNetworkIterator()) {
 423             try {
 424                 mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
 425                 // if we can create and have not thrown exception, then layer is supported
 426                 std::unique_ptr <MKLDNNNode>(MKLDNNNode::factory().create(*i, eng, extensionManager, fake_w_cache));
 427                 res.supportedLayersMap.insert({ (*i)->name, GetName() });
 428             } catch (InferenceEngine::details::InferenceEngineException&) {
 429             }
 430             i++;
 431         }
 432     }
 433
 434     return res;
 435 }
 436
 437 static const Version version = {{2, 1}, CI_BUILD_NUMBER, "MKLDNNPlugin"};
 438 IE_DEFINE_PLUGIN_CREATE_FUNCTION(Engine, version)