inference-engine/src/cldnn_engine/cldnn_program.h

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #pragma once
   6
   7 #include <vector>
   8 #include <map>
   9 #include <set>
  10 #include <memory>
  11 #include <string>
  12 #include <utility>
  13 #include <algorithm>
  14
  15 #include <cpp/ie_cnn_network.h>
  16 #include <cpp_interfaces/exception2status.hpp>
  17 #include <ie_blob.h>
  18 #include <ie_plugin.hpp>
  19 #include <inference_engine.hpp>
  20
  21 #include "debug_options.h"
  22 #include "cldnn_custom_layer.h"
  23 #include "cldnn_config.h"
  24
  25 #include <CPP/engine.hpp>
  26 #include <CPP/memory.hpp>
  27 #include <CPP/topology.hpp>
  28 #include <CPP/primitive.hpp>
  29 #include <CPP/softmax.hpp>
  30 #include <CPP/upsampling.hpp>
  31 #include <CPP/pooling.hpp>
  32 #include <CPP/eltwise.hpp>
  33 #include <CPP/concatenation.hpp>
  34 #include <CPP/detection_output.hpp>
  35
  36 #ifndef NDEBUG
  37 #include <iostream>
  38 #include <iomanip>
  39
  40 #define THROW_CLDNN_EXCEPTION(desc)\
  41 do { \
  42 InferenceEngineException ex(__FILE__, __LINE__);\
  43 std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \
  44 __LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0);
  45 #else
  46 #define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc;
  47 #endif  // NDEBUG
  48 #define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
  49
  50 namespace CLDNNPlugin {
  51 template<typename LayerTypePtr>
  52 LayerTypePtr tryAs(const InferenceEngine::CNNLayerPtr& in_ptr) {
  53     return dynamic_cast<LayerTypePtr>(in_ptr.get());
  54 }
  55
  56 template<typename LayerTypePtr>
  57 LayerTypePtr as(const InferenceEngine::CNNLayerPtr& in_ptr) {
  58     auto result_ptr = dynamic_cast<LayerTypePtr> (in_ptr.get());
  59     if (nullptr == result_ptr) {
  60         THROW_IE_EXCEPTION << "CNNLayerPtr is not suitable for casting to requested layer type";
  61     }
  62     return result_ptr;
  63 }
  64
  65 inline std::string layer_type_lower(const InferenceEngine::CNNLayer* layer) {
  66     std::string layerType = layer->type;
  67     std::transform(layerType.begin(), layerType.end(), layerType.begin(),
  68         [](unsigned char c) -> unsigned char { return std::tolower(c); });
  69     return layerType;
  70 }
  71
  72 inline std::string layer_type_name_ID(const InferenceEngine::CNNLayer* layer) {
  73     return layer_type_lower(layer) + ":" + layer->name;
  74 }
  75
  76 inline std::string layer_type_lower(InferenceEngine::CNNLayerPtr layer) {
  77     return layer_type_lower(layer.get());
  78 }
  79
  80 inline std::string layer_type_name_ID(InferenceEngine::CNNLayerPtr layer) {
  81     return layer_type_name_ID(layer.get());
  82 }
  83
  84 struct PerfCounter {
  85     InferenceEngine::InferenceEngineProfileInfo::LayerStatus status;
  86     bool isCPU;
  87     uint64_t realTime_uSec;
  88     uint64_t cpu_uSec;
  89     uint32_t num;
  90     std::string layerType;
  91
  92 public:
  93     PerfCounter() : realTime_uSec(0), cpu_uSec(0), num(0),
  94                     status(InferenceEngine::InferenceEngineProfileInfo::NOT_RUN), isCPU(false) {}
  95
  96     long long realTime_avg() const { return (num == 0) ? 0 : realTime_uSec / num; }
  97     long long cpu_avg() const { return (num == 0) ? 0 : cpu_uSec / num; }
  98 };
  99
 100 class Program {
 101 public:
 102     Program(InferenceEngine::ICNNNetwork &network, std::shared_ptr<const cldnn::engine> engine, const Config& config);
 103     std::shared_ptr<cldnn::program> getCompiledProgram(int program_id = 0);
 104
 105     std::map<std::string, cldnn::primitive_id> primitiveIDs;
 106     std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;
 107     std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;
 108     std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;
 109
 110     std::vector<cldnn::primitive_id> profilingIDs;
 111
 112     std::map<std::string, InferenceEngine::SizeVector> outputDims;
 113     std::map<std::string, cldnn::layout> inputLayouts;
 114     std::map<const char *, cldnn::primitive_id> blobMemCache;
 115
 116     int m_max_batch;
 117     int m_curBatch;
 118
 119     InferenceEngine::OutputsDataMap p_currentOutputs;
 120
 121     std::vector<cldnn::primitive_id> GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const;
 122     const std::map<std::string, cldnn::layout>& getInputLayouts() const { return inputLayouts; }
 123     int GetMaxBatchSizeForSingleProgram();
 124
 125
 126     // internal types
 127     enum LayerType {
 128         Convolution,
 129         DeformableConvolution,
 130         ReLU,
 131         ReLU6,
 132         Sigmoid,
 133         TanH,
 134         ELU,
 135         Activation,
 136         Exp,
 137         Asin,
 138         Atan,
 139         Acos,
 140         Abs,
 141         Asinh,
 142         Acosh,
 143         Atanh,
 144         Not,
 145         LRN,
 146         Pooling,
 147         FullyConnected,
 148         SoftMax,
 149         Power,
 150         Split,
 151         Concatenate,
 152         Eltwise,
 153         SimplerNMS,
 154         ROIPooling,
 155         Crop,
 156         Deconvolution,
 157         PriorBox,
 158         DetectionOutput,
 159         Normalize,
 160         Reshape,
 161         Permute,
 162         Flatten,
 163         BatchNormalization,
 164         PReLU,
 165         ScaleShift,
 166         Proposal,
 167         PSROIPooling,
 168         Clamp,
 169         Copy,
 170         Upsampling,
 171         Resample,
 172         RegionYolo,
 173         ReorgYolo,
 174         ConstantBlob,
 175         ArgMax,
 176         ArgMin,
 177         MVN,
 178         Unpooling,
 179         Tile,
 180         Pad,
 181         LSTMCell,
 182         RNN,
 183         Gather,
 184         DepthToSpace,
 185         ShuffleChannels,
 186         StridedSlice,
 187         Broadcast,
 188         ReverseSequence,
 189         BinaryConvolution,
 190         Quantize,
 191         Squeeze,
 192         Unsqueeze,
 193         Reduce,
 194         TopK,
 195         Floor,
 196         Ceil,
 197         Erf,
 198         HardSigmoid,
 199         Log,
 200         Neg,
 201         Reciprocal,
 202         Selu,
 203         Sign,
 204         SoftPlus,
 205         SoftSign,
 206         Tan,
 207         Gemm,
 208         OneHot,
 209         NO_TYPE
 210     };
 211     using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>;
 212
 213     static LayerType LayerTypeFromStr(const std::string& str);
 214
 215 private:
 216     std::vector<std::shared_ptr<cldnn::program>> m_programs;
 217     std::shared_ptr<const cldnn::engine> m_engine;
 218     Config m_config;
 219
 220     std::shared_ptr<cldnn::program> BuildProgram(InferenceEngine::ICNNNetwork &network);
 221
 222     void InitProfileInfo(const std::string& layerName,
 223                          const std::string& layerType,
 224                          bool isCPU = false,
 225                          InferenceEngine::InferenceEngineProfileInfo::LayerStatus status
 226                          = InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
 227
 228     static const cldnn::primitive_id m_preProcessTag;
 229     static const cldnn::primitive_id m_weightsTag;
 230     static const cldnn::primitive_id m_biasesTag;
 231     static const cldnn::primitive_id m_meanValuesTag;
 232     static const cldnn::primitive_id m_postProcessTag;
 233     static const cldnn::primitive_id m_scalesTag;
 234     static const cldnn::primitive_id m_workaroundTag;
 235     static const cldnn::primitive_id m_preCustomLayerTag;
 236     static const cldnn::primitive_id m_postCustomLayerTag;
 237
 238
 239     enum WeightRearrangeType {
 240         BroadcastFeatures,
 241         FlipDeconvDims,
 242         NO_REARRANGE
 243     };
 244
 245     cldnn::format m_defaultFormat;
 246     void InitFormat(InferenceEngine::ICNNNetwork &network);
 247
 248     static cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p);
 249     static cldnn::format     FormatFromLayout(InferenceEngine::Layout l);
 250     static cldnn::upsampling_sample_type UpsamplingTypeFromString(const std::string& str);
 251
 252     void Load(InferenceEngine::ICNNNetwork &network);
 253     static cldnn::pooling_mode PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding = false);
 254     static cldnn::eltwise_mode EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op);
 255     static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str);
 256     static cldnn::softmax::dimension_t SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer);
 257     cldnn::primitive_id CreatePrimitiveFromBlob(cldnn::topology& topology,
 258                                                 cldnn::primitive_id primID,
 259                                                 const InferenceEngine::Blob::Ptr pBlob,
 260                                                 const cldnn::layout& blobLayout,
 261                                                 size_t blobByteOffset = 0,
 262                                                 WeightRearrangeType rearrange = NO_REARRANGE);
 263     void CreateWeightAndBiasPrimitives(cldnn::topology& topology,
 264                                        const InferenceEngine::CNNLayerPtr& layer,
 265                                        std::vector<cldnn::primitive_id>& weightsPrimID,
 266                                        std::vector<cldnn::primitive_id>& biasesPrimID);
 267     void CreateBinaryWeightAndBiasPrimitives(cldnn::topology& topology,
 268                                              const InferenceEngine::CNNLayerPtr& layer,
 269                                              std::vector<cldnn::primitive_id>& weightsPrimID,
 270                                              std::vector<cldnn::primitive_id>& biasesPrimID);
 271     void CreateScaleWeightsAndBiasesFromBN(cldnn::topology& topology,
 272                                            const InferenceEngine::BatchNormalizationLayer* bnLayer,
 273                                            cldnn::primitive_id& weightsPrimID,
 274                                            cldnn::primitive_id& biasesPrimID);
 275     void CreateQuantizationPrimitives(cldnn::topology& topology,
 276                                       const InferenceEngine::CNNLayerPtr& layer,
 277                                       std::vector<cldnn::primitive_id>& weightsQuantizationPrimID,
 278                                       bool supportsDequantization,
 279                                       size_t split = 1);
 280     void AddPreProcessPrimitive(InferenceEngine::InputInfo::Ptr inputInfo);
 281     void AddInputPrimitive(cldnn::topology& topology,
 282                            InferenceEngine::InputInfo::Ptr inputInfo, InferenceEngine::Precision inputPrecision, const std::string inputName);
 283     void AddOutputPrimitive(cldnn::topology& topology,
 284                             std::string outputName, const InferenceEngine::DataPtr outputData,
 285                             InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::UNSPECIFIED);
 286     void CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
 287     bool IsValidSplitConvMerge(const InferenceEngine::SplitLayer* splitLayer) const;
 288     bool CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const;
 289     static std::vector<InferenceEngine::CNNLayerPtr> GetNextLayers(const InferenceEngine::DataPtr data);
 290     static std::vector<InferenceEngine::CNNLayerPtr> GetNextLayers(const InferenceEngine::CNNLayerPtr layer);
 291     static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::DataPtr data);
 292     static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer);
 293     void AddSingleValuePrimitive(cldnn::topology& topology, cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value);
 294
 295     GenericBlobMap CreateGenericLayerBlobPrimitives(cldnn::topology& topology, const InferenceEngine::GenericLayer* layer);
 296     static void ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector<std::string>& blobNames);
 297     static bool HasParam(const std::map<std::string, std::string>& layerParams, std::string paramName) {
 298         auto p = layerParams.find(paramName);
 299         return p != layerParams.end();
 300     }
 301
 302     void changeInputBatch(int batch);
 303
 304     // Layer Primitive Creators
 305     void CreatePReLUPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 306     void CreateBatchNormalizationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer);
 307     void CreateFlattenPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 308     void CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 309     void CreateReshapePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 310     void CreateNormalizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 311     void CreateDetectionOutputPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 312     void CreatePriorBoxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 313     void CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 314     void CreateCropPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 315     void CreateROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 316     void CreateSimplerNMSPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 317     void CreateEltwisePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 318     void CreateConcatenatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 319     void CreateSplitPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 320     void CreateFusedSplitConvMergePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 321     void CreatePowerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 322     void CreateSoftMaxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 323     void CreateFullyConnectedPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 324     void CreatePoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 325     void CreateLRNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 326     void CreateActivationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type);
 327     void CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 328     void CreateDeformableConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 329     void CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 330     void CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 331     void CreatePSROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 332     void CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 333     void CreateUpsamplingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 334     void CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 335     void CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 336     void CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 337     void CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type);
 338     void CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 339     void CreateMaxUnpoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 340     void CreateMVNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 341     void CreateTilePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 342     void CreatePadPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 343     void CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 344     void CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 345     void CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 346     void CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 347     void AddConstantBlobInput(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 348     void CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, CLDNNCustomLayerPtr customLayer);
 349     void CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 350     void CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 351     void CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 352     void CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 353     void CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngine::CNNLayerPtr &layer);
 354     void CreateReverseSequencePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 355     void CreateBinaryConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 356     void CreateQuantizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 357     void CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 358     void CreateReducePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 359     void CreateOneHotPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
 360 };
 361
 362 }  // namespace CLDNNPlugin