inference-engine/src/gna_plugin/gna_model_serial.hpp

   1 // Copyright (C) 2018-2020 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #pragma once
   6
   7 #include <istream>
   8 #include <vector>
   9 #include <utility>
  10
  11 #include <gna-api.h>
  12 #include "descriptions/gna_input_desc.hpp"
  13 #include "descriptions/gna_output_desc.hpp"
  14 #include "gna_plugin_log.hpp"
  15 #if GNA_LIB_VER == 2
  16 #include "gna2-model-api.h"
  17 #endif
  18
  19 #pragma pack(push, 1)
  20
  21 /**
  22  * version history
  23  * 1.0 - basic support
  24  * 1.1 - added memory information
  25  * 2.0 - for use with GNA2 library
  26  * 2.1 - multiple i/o support
  27  */
  28 #if GNA_LIB_VER == 2
  29 #define HEADER_MAJOR 2
  30 #define HEADER_MINOR 1
  31 #else
  32 #define HEADER_MAJOR 1
  33 #define HEADER_MINOR 2
  34 #endif
  35
  36
  37 /**
  38  * @brief Header version 2.1
  39  */
  40 struct ModelHeader {
  41     /**
  42      *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
  43      */
  44     char gnam[4];
  45     /**
  46      * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
  47      * usually it is an indicator of working with version of model different that is current export function produce
  48      */
  49     uint32_t headerSize = 0u;
  50     struct Version {
  51         /**
  52          * @details Version of format Major – unsigned int, ex: 0x0001
  53          * every change in the header or in the layers definition should be reflected in version change
  54          * for backward compatibility new parsers can read old versions of model with certain restrictions
  55          */
  56         uint16_t major = 0u;
  57         /**
  58          * @details Version of Format Minor – unsigned int,  corresponding to build revision for example
  59          * changes in minor version are not affected layout of model
  60          */
  61         uint32_t minor = 0u;
  62     } version;
  63     /**
  64      * @brief Memory required to be allocated using GNAAlloc()
  65      */
  66     uint64_t gnaMemSize = 0ull;
  67     /**
  68      * @brief Number of GNA Layers
  69      */
  70     uint64_t layersCount = 0ull;
  71     /**
  72      * @brief Grouping level
  73      */
  74     uint32_t nGroup = 0u;
  75     /**
  76      * Convolution related setting - they are affecting input transformation
  77      */
  78     uint32_t nRotateRows = 0u;
  79     uint32_t nRotateColumns = 0u;
  80
  81     uint32_t nInputs = 0u;
  82     uint32_t nOutputs = 0u;
  83
  84     /**
  85      * Reserved Data might be here
  86      */
  87 };
  88 #pragma pack(pop)
  89
  90 /**
  91  * @brief implements serialisation tasks for GNAGraph
  92  */
  93 class GNAModelSerial {
  94  public:
  95     /*
  96      * In runtime endpoint mostly same as in serial version, except of descriptor field
  97      */
  98     struct RuntimeEndPoint {
  99         /**
 100          * if scale factor is different then pased into infer , network might need to be requantized
 101          */
 102         float scaleFactor = 0;
 103         /**
 104          * Pointer descriptor
 105          */
 106         void* descriptor_ptr = nullptr;
 107         /**
 108          * Endpoint resolution in bytes.
 109          */
 110         uint32_t element_size = 0;
 111         /**
 112          * Number of elements
 113          */
 114         uint32_t elements_count = 0;
 115         /**
 116          * Offset in bytes of pointer descriptor
 117         */
 118         uint64_t descriptor_offset = 0ull;
 119
 120         intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
 121
 122         RuntimeEndPoint() = default;
 123         RuntimeEndPoint(double scaleFactor,
 124                     void* descriptor_ptr,
 125                     uint32_t element_size,
 126                     uint32_t elements_count,
 127                     intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
 128                                     descriptor_ptr(descriptor_ptr),
 129                                     element_size(element_size),
 130                                     elements_count(elements_count),
 131                                     orientation(orientation) {
 132         }
 133     };
 134     using MemoryType = std::vector<std::pair<void*, uint32_t>>;
 135
 136 private:
 137 #if GNA_LIB_VER == 2
 138     Gna2Model * gna2Model;
 139 #else
 140     intel_nnet_type_t *ptr_nnet;
 141 #endif
 142     std::vector<RuntimeEndPoint> inputs;
 143     std::vector<RuntimeEndPoint> outputs;
 144     uint32_t nRotateRows = 0;
 145     uint32_t nRotateColumns = 0;
 146
 147     MemoryType states, *pstates = nullptr;
 148     ModelHeader modelHeader;
 149
 150     void ImportInputs(std::istream &is,
 151             void* basePtr,
 152             std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
 153             InferenceEngine::InputsDataMap& dataMap);
 154
 155     void ImportOutputs(std::istream &is,
 156             void* basePtr,
 157             std::vector<GNAPluginNS::OutputDesc> &desc,
 158             InferenceEngine::OutputsDataMap& dataMap);
 159
 160  public:
 161 #if GNA_LIB_VER == 2
 162     GNAModelSerial(Gna2Model * model, MemoryType & states_holder)
 163         : gna2Model(model), pstates(&states_holder) {
 164     }
 165
 166     GNAModelSerial(
 167         Gna2Model * model,
 168         const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
 169         const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
 170         const InferenceEngine::InputsDataMap& inputsDataMap,
 171         const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model),
 172             inputs(serializeInputs(inputsDataMap, inputDesc)),
 173             outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
 174     }
 175
 176 #else
 177      /**
 178   *
 179   * @brief Used for import/export
 180   * @param ptr_nnet
 181   * @param inputScale  - in/out parameter representing input scale factor
 182   * @param outputScale - in/out parameter representing output scale factor
 183   */
 184      GNAModelSerial(intel_nnet_type_t *ptr_nnet, MemoryType &states_holder)
 185          : ptr_nnet(ptr_nnet), pstates(&states_holder) {
 186      }
 187
 188      /**
 189       * @brief used for export only since runtime params are not passed by pointer
 190       * @param ptr_nnet
 191       * @param runtime
 192       */
 193      GNAModelSerial(
 194          intel_nnet_type_t *ptr_nnet,
 195          const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
 196          const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
 197          const InferenceEngine::InputsDataMap& inputsDataMap,
 198          const InferenceEngine::OutputsDataMap& outputsDataMap) : ptr_nnet(ptr_nnet),
 199                                                                   inputs(serializeInputs(inputsDataMap, inputDesc)),
 200                                                                   outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
 201      }
 202 #endif
 203
 204     GNAModelSerial & SetInputRotation(uint32_t nRotateRows, uint32_t nRotateColumns) {
 205       this->nRotateColumns = nRotateColumns;
 206       this->nRotateRows = nRotateRows;
 207       return *this;
 208     }
 209
 210     /**
 211      * mark certain part of gna_blob as state (in future naming is possible)
 212      * @param descriptor_ptr
 213      * @param size
 214      * @return
 215      */
 216     GNAModelSerial & AddState(void* descriptor_ptr, size_t size) {
 217         states.emplace_back(descriptor_ptr, size);
 218         return *this;
 219     }
 220
 221     /**
 222      * @brief calculate memory required for import gna graph
 223      * @param is - opened input stream
 224      * @return
 225      */
 226     static ModelHeader ReadHeader(std::istream &is);
 227
 228     /**
 229      * @brief Import model from FS into preallocated buffer,
 230      * buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
 231      * @param ptr_nnet
 232      * @param basePointer
 233      * @param is - stream without header structure - TBD heder might be needed
 234      */
 235     void Import(void *basePointer,
 236                                 size_t gnaGraphSize,
 237                                 std::istream & is,
 238                                 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
 239                                 std::vector<GNAPluginNS::OutputDesc> &desc,
 240                                 InferenceEngine::InputsDataMap& inputsDataMap,
 241                                 InferenceEngine::OutputsDataMap& outputsDataMap);
 242
 243     /**
 244      * save gna graph to an outpus stream
 245      * @param ptr_nnet
 246      * @param basePtr
 247      * @param gnaGraphSize
 248      * @param os
 249      */
 250     void Export(void *basePtr,
 251                 size_t gnaGraphSize,
 252                 std::ostream &os) const;
 253
 254     static std::vector<GNAModelSerial::RuntimeEndPoint> serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
 255             const std::vector<GNAPluginNS::OutputDesc>& outputsDesc);
 256
 257
 258     static std::vector<GNAModelSerial::RuntimeEndPoint> serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
 259                                                                         const std::shared_ptr<GNAPluginNS::InputDesc>);
 260
 261     void setHeader(ModelHeader header);
 262 };