inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp

   1 // Copyright (C) 2018-2020 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "ie_metric_helpers.hpp"
   6 #include "mkldnn_plugin.h"
   7 #include "mkldnn_extension_mngr.h"
   8 #include "mkldnn_weights_cache.hpp"
   9 #include "mkldnn_itt.h"
  10
  11 #include <legacy/net_pass.h>
  12 #include <threading/ie_executor_manager.hpp>
  13 #include <memory>
  14 #include <ie_plugin_config.hpp>
  15 #include <vector>
  16 #include <tuple>
  17 #include <ie_system_conf.h>
  18 #include <generic_ie.hpp>
  19 #include <nodes/list.hpp>
  20 #include <legacy/ie_util_internal.hpp>
  21 #include <legacy/graph_transformer.h>
  22 #include <ie_ngraph_utils.hpp>
  23
  24 #include <legacy/convert_function_to_cnn_network.hpp>
  25 #include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
  26 #include <legacy/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
  27 #include <legacy/transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
  28 #include <legacy/ngraph_ops/fully_connected.hpp>
  29
  30 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
  31 #include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
  32
  33 #include <transformations/common_optimizations/common_optimizations.hpp>
  34 #include <transformations/common_optimizations/depth_to_space_fusion.hpp>
  35 #include <transformations/control_flow/unroll_tensor_iterator.hpp>
  36 #include <transformations/op_conversions/convert_depth_to_space.hpp>
  37 #include <transformations/op_conversions/convert_space_to_depth.hpp>
  38 #include <transformations/op_conversions/convert_gelu.hpp>
  39 #include <transformations/op_conversions/hswish_decomposition.hpp>
  40 #include <transformations/op_conversions/hsigmoid_decomposition.hpp>
  41 #include <transformations/op_conversions/reduce_l1_decomposition.hpp>
  42 #include <transformations/op_conversions/reduce_l2_decomposition.hpp>
  43 #include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
  44 #include <transformations/op_conversions/softplus_decomposition.hpp>
  45 #include <transformations/op_conversions/convert_space_to_batch.hpp>
  46 #include <transformations/op_conversions/convert_batch_to_space.hpp>
  47 #include <transformations/op_conversions/convert_mod.hpp>
  48 #include <transformations/op_conversions/log_softmax_decomposition.hpp>
  49 #include <transformations/convert_precision.hpp>
  50 #include <transformations/init_node_info.hpp>
  51 #include <transformations/rt_info/fused_names_attribute.hpp>
  52
  53 #include <ngraph/opsets/opset2.hpp>
  54 #include <ngraph/opsets/opset3.hpp>
  55 #include <ngraph/opsets/opset4.hpp>
  56 #include <ngraph/op/util/op_types.hpp>
  57 #include <ngraph/pass/manager.hpp>
  58
  59 #include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
  60
  61 #ifndef USE_CNNNETWORK_LPT
  62 # include <low_precision/transformer.hpp>
  63 # include <low_precision/convolution.hpp>
  64 # include <low_precision/group_convolution.hpp>
  65 # include <low_precision/multiply_to_group_convolution.hpp>
  66 #endif
  67
  68 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
  69 #if defined(_WIN32) || defined(WIN32)
  70 #include <intrin.h>
  71 #include <windows.h>
  72 #else
  73 #include <cpuid.h>
  74
  75 #endif
  76 #endif
  77
  78 using namespace MKLDNNPlugin;
  79 using namespace InferenceEngine;
  80
  81 Engine::Engine() {
  82     _pluginName = "CPU";
  83     extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
  84 }
  85
  86 Engine::~Engine() {
  87     ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
  88     ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
  89 }
  90
  91 static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf) {
  92     OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNNPlugin, "Transformation");
  93
  94     auto nGraphFunc = clonedNetwork->getFunction();
  95     // Disable shape inference (WA for generic operations)
  96     ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
  97
  98     ngraph::pass::Manager manager;
  99     manager.register_pass<ngraph::pass::InitNodeInfo>();
 100     // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
 101     manager.register_pass<ngraph::pass::ConvertPriorBox>();
 102     manager.register_pass<ngraph::pass::CommonOptimizations>();
 103     manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
 104     manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
 105
 106     std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list {
 107             {ngraph::element::i64, ngraph::element::i32},
 108             {ngraph::element::u64, ngraph::element::i32},
 109             {ngraph::element::u16, ngraph::element::i32},
 110             {ngraph::element::u32, ngraph::element::i32},
 111             {ngraph::element::f16, ngraph::element::f32},
 112             {ngraph::element::boolean, ngraph::element::u8},
 113     };
 114
 115     for (auto & precision : convert_precision_list) {
 116         manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
 117     }
 118
 119     auto pass_config = manager.get_pass_config();
 120
 121     using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
 122
 123     // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
 124     pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
 125                               ngraph::pass::ConvertDepthToSpace>(
 126             [](const_node_ptr &node) -> bool {
 127                 return node->input_value(0).get_shape().size() <= 5lu &&
 128                        node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
 129             });
 130
 131     // Disable FC reshaping for 3D case
 132     pass_config->set_callback<ngraph::pass::ReshapeFullyConnected>(
 133             [](const_node_ptr &node) -> bool {
 134                 return node->input_value(0).get_shape().size() == 3ul;
 135             });
 136
 137     pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
 138                               ngraph::pass::ConvertSpaceToBatch>(
 139             [](const_node_ptr &node) -> bool {
 140                 const auto & rank = node->input(0).get_partial_shape().rank().get_length();
 141                 return rank == 4lu || rank == 5lu;
 142             });
 143
 144     // List of enabled/disabled transformations
 145     pass_config->disable<ngraph::pass::ConvertGELU>();
 146     pass_config->disable<ngraph::pass::HSwishDecomposition>();
 147     pass_config->disable<ngraph::pass::ReduceL1Decomposition>();
 148     pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
 149     pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
 150     pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
 151     pass_config->disable<ngraph::pass::ConvertMod>();
 152     pass_config->disable<ngraph::pass::LogSoftmaxDecomposition>();
 153
 154     pass_config->enable<ngraph::pass::ConvertPadToGroupConvolution>();
 155
 156     manager.run_passes(nGraphFunc);
 157
 158 #ifndef USE_CNNNETWORK_LPT
 159     using namespace ngraph::pass::low_precision;
 160     if (conf.lpTransformsMode == Config::LPTransformsMode::On) {
 161         auto params = LayerTransformation::Params(
 162             true,  // updatePrecisions
 163             LayerTransformation::QuantizedTensorAlignment::UpdateLevel,  // quantizedTensorAlignmentOnActivations
 164             LayerTransformation::QuantizedTensorAlignment::None,  // quantizedTensorAlignmentOnWeights
 165             true);  // supportAsymmetricQuantization
 166         LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
 167             .add<ConvolutionTransformation, ngraph::opset1::Convolution>(
 168                 LayerTransformation::Params(params).setPrecisionsOnActivations({ngraph::element::u8}).setSupportAsymmetricQuantization(true))
 169             .add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
 170                 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
 171             .addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
 172                 LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })));
 173
 174         transformer.transform(nGraphFunc);
 175     }
 176 #endif
 177
 178     ngraph::pass::Manager legacyManager;
 179     legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
 180     legacyManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
 181     // not legacy actually, but it should be the last transformation in the transformation pipeline
 182     legacyManager.register_pass<ngraph::pass::UnrollTensorIterator>();
 183
 184     auto legacyPassConfig = legacyManager.get_pass_config();
 185     legacyPassConfig->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
 186         if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
 187             auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
 188             auto constant = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(mul_op->get_input_node_shared_ptr(1));
 189             bool is_dequantization = mul_op->get_rt_info().count("DEQUANTIZATION") != 0;
 190             if (add_op && constant && is_dequantization) {
 191                 return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
 192                     ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
 193                     ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
 194             }
 195         }
 196         return false;
 197     });
 198
 199     legacyManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
 200         // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
 201         return node->get_rt_info().count("UNROLL_TI") == 0;
 202     });
 203     legacyManager.run_passes(nGraphFunc);
 204
 205     clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
 206
 207     // WA: after conversion to CNNNetwork user precision can redefine input/output precisions
 208     // so we need to apply additional precision conversion but only for inputs and outputs
 209     for (auto & precision : convert_precision_list) {
 210         NetPass::ConvertIOPrecision(*clonedNetwork, convertPrecision(precision.first), convertPrecision(precision.second));
 211     }
 212 }
 213
 214 InferenceEngine::ExecutableNetworkInternal::Ptr
 215 Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
 216     OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
 217
 218     // verification of supported input
 219     InferenceEngine::InputsDataMap _networkInputs;
 220     network.getInputsInfo(_networkInputs);
 221     for (const auto &ii : _networkInputs) {
 222         auto input_precision = ii.second->getPrecision();
 223         if (input_precision != InferenceEngine::Precision::FP32 &&
 224             input_precision != InferenceEngine::Precision::I32 &&
 225             input_precision != InferenceEngine::Precision::U16 &&
 226             input_precision != InferenceEngine::Precision::I16 &&
 227             input_precision != InferenceEngine::Precision::I8 &&
 228             input_precision != InferenceEngine::Precision::U8 &&
 229             input_precision != InferenceEngine::Precision::BOOL &&
 230             input_precision != InferenceEngine::Precision::I64 &&
 231             input_precision != InferenceEngine::Precision::U64) {
 232             THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str
 233                                << "Input image format " << input_precision << " is not supported yet...";
 234         }
 235     }
 236
 237     // TODO: handle input precision differently - per input and not one per network...
 238
 239     // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
 240     Config conf = engConfig;
 241     conf.readProperties(config);
 242
 243     if (conf.enableDynamicBatch) {
 244         conf.batchLimit = static_cast<int>(network.getBatchSize());
 245     }
 246
 247     std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
 248     bool is_transformed = false;
 249     if (clonedNetwork->getFunction()) {
 250         Transformation(clonedNetwork, conf);
 251         is_transformed = true;
 252     }
 253     auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(clonedNetwork);
 254     if (implNetwork) {
 255         // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
 256         ConstTransformer transformator(implNetwork.get());
 257         transformator.fullTrim();
 258         if (!is_transformed) {
 259             NetPass::ConvertPrecision(*implNetwork, Precision::I64, Precision::I32);
 260             NetPass::ConvertPrecision(*implNetwork, Precision::U64, Precision::I32);
 261             NetPass::ConvertPrecision(*implNetwork, Precision::U32, Precision::I32);
 262             NetPass::ConvertPrecision(*implNetwork, Precision::FP16, Precision::FP32);
 263             NetPass::ConvertPrecision(*implNetwork, Precision::BOOL, Precision::U8);
 264             NetPass::ConvertPrecision(*implNetwork, Precision::U16, Precision::I32);
 265         }
 266     }
 267
 268     return std::make_shared<MKLDNNExecNetwork>(*clonedNetwork, conf, extensionManager, weightsSharing);
 269 }
 270
 271 void Engine::SetConfig(const std::map<std::string, std::string> &config) {
 272     // accumulate config parameters on engine level
 273     engConfig.readProperties(config);
 274 }
 275
 276 Parameter Engine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
 277     Parameter result;
 278     auto option = engConfig._config.find(name);
 279     if (option != engConfig._config.end()) {
 280         result = option->second;
 281     } else {
 282         THROW_IE_EXCEPTION << "Unsupported config key " << name;
 283     }
 284     return result;
 285 }
 286
 287 static bool hasAVX512() {
 288 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 289     unsigned int regs[4] = {7, 0, 0, 0};
 290 #if defined(_WIN32) || defined(WIN32)
 291     __cpuid(reinterpret_cast<int*>(regs), regs[0]);
 292 #else
 293     __cpuid_count(regs[0], regs[1], regs[0], regs[1], regs[2], regs[3]);
 294 #endif
 295     if (regs[1] & (1U << 16))
 296         return true;
 297 #endif
 298     return false;
 299 }
 300
 301 Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
 302     if (name == METRIC_KEY(SUPPORTED_METRICS)) {
 303         std::vector<std::string> metrics;
 304         metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES));
 305         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
 306         metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
 307         metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
 308         metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
 309         metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
 310         metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
 311         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
 312     } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
 313         std::string brand_string;
 314 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 315         unsigned int addr_list[3] = { 0x80000002, 0x80000003, 0x80000004 };
 316         unsigned int regs[4];
 317         for (auto addr : addr_list) {
 318             regs[0] = addr;
 319 #if defined(_WIN32) || defined(WIN32)
 320             __cpuid(reinterpret_cast<int*>(regs), regs[0]);
 321 #else
 322             __get_cpuid(regs[0], &regs[0], &regs[1], &regs[2], &regs[3]);
 323 #endif
 324             char *ch = reinterpret_cast<char*>(&regs[0]);
 325             for (size_t j = 0; j < sizeof(regs); j++)
 326                 brand_string += ch[j];
 327         }
 328 #else
 329         brand_string = "Non Intel Architecture";
 330 #endif
 331         IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, brand_string);
 332     } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
 333         std::vector<std::string> availableDevices = { "" };
 334         IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
 335     } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
 336         std::vector<std::string> capabilities;
 337         if (with_cpu_x86_bfloat16())
 338             capabilities.push_back(METRIC_VALUE(BF16));
 339         if (hasAVX512())
 340             capabilities.push_back(METRIC_VALUE(WINOGRAD));
 341         capabilities.push_back(METRIC_VALUE(FP32));
 342         capabilities.push_back(METRIC_VALUE(FP16));
 343         capabilities.push_back(METRIC_VALUE(INT8));
 344         capabilities.push_back(METRIC_VALUE(BIN));
 345         IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
 346     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
 347         std::vector<std::string> configKeys;
 348         for (auto && opt : engConfig._config)
 349             configKeys.push_back(opt.first);
 350         IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
 351     } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
 352         std::tuple<unsigned int, unsigned int, unsigned int> range = std::make_tuple(1, 1, 1);
 353         IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range);
 354     } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
 355         std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
 356         IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
 357     } else {
 358         THROW_IE_EXCEPTION << "Unsupported metric key " << name;
 359     }
 360 }
 361
 362 void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
 363     extensionManager->AddExtension(extension);
 364 }
 365
 366 QueryNetworkResult Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config) const {
 367     QueryNetworkResult res;
 368     MKLDNNWeightsSharing::Ptr fake_w_cache;
 369     auto function = network.getFunction();
 370     if (function != nullptr) {
 371         std::unordered_set<std::string> originalOps;
 372         for (auto&& node : function->get_ops()) {
 373             originalOps.emplace(node->get_friendly_name());
 374         }
 375
 376         // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
 377         Config conf = engConfig;
 378         conf.readProperties(config);
 379
 380         if (conf.enableDynamicBatch) {
 381             conf.batchLimit = static_cast<int>(network.getBatchSize());
 382         }
 383
 384         auto clonedNetwork = cloneNetwork(network);
 385         Transformation(clonedNetwork, conf);
 386         std::unordered_set<std::string> supported;
 387         std::unordered_set<std::string> unsupported;
 388         for (details::CNNNetworkIterator itLayer{clonedNetwork.get()}; itLayer != details::CNNNetworkIterator(); itLayer++) {
 389             auto layerIsSupported = [&] {
 390                 std::unique_ptr<MKLDNNNode> ptr;
 391                 try {
 392                     ptr.reset(MKLDNNNode::factory().create(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
 393                 } catch (InferenceEngine::details::InferenceEngineException&) {
 394                      return false;
 395                 }
 396                 return true;
 397             } ();
 398             for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
 399                 if (contains(originalOps, fusedLayerName)) {
 400                     if (layerIsSupported) {
 401                         supported.emplace(fusedLayerName);
 402                     } else {
 403                         unsupported.emplace(fusedLayerName);
 404                     }
 405                 }
 406             }
 407         }
 408
 409         for (auto&& node : function->get_ops()) {
 410             if (!contains(unsupported, node->get_friendly_name())) {
 411                 for (auto&& inputNodeOutput : node->input_values()) {
 412                     if (ngraph::op::is_constant(inputNodeOutput.get_node())) {
 413                         supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
 414                     }
 415                 }
 416                 for (auto&& outputs : node->outputs()) {
 417                     for (auto&& outputNodeInput : outputs.get_target_inputs()) {
 418                         if (ngraph::op::is_output(outputNodeInput.get_node())) {
 419                             supported.emplace(outputNodeInput.get_node()->get_friendly_name());
 420                         }
 421                     }
 422                 }
 423             }
 424         }
 425
 426         for (auto&& layerName : supported) {
 427             if (!contains(unsupported, layerName)) {
 428                 res.supportedLayersMap.emplace(layerName, GetName());
 429             }
 430         }
 431     } else {
 432         details::CNNNetworkIterator i(&network);
 433         while (i != details::CNNNetworkIterator()) {
 434             try {
 435                 mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
 436                 // if we can create and have not thrown exception, then layer is supported
 437                 std::unique_ptr <MKLDNNNode>(MKLDNNNode::factory().create(*i, eng, extensionManager, fake_w_cache));
 438                 res.supportedLayersMap.insert({ (*i)->name, GetName() });
 439             } catch (InferenceEngine::details::InferenceEngineException&) {
 440             }
 441             i++;
 442         }
 443     }
 444
 445     return res;
 446 }
 447
 448 static const Version version = {{2, 1}, CI_BUILD_NUMBER, "MKLDNNPlugin"};
 449 IE_DEFINE_PLUGIN_CREATE_FUNCTION(Engine, version)