inference-engine/src/gna_plugin/gna_model_serial.hpp

   1 // Copyright (C) 2018-2020 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #pragma once
   6
   7 #include <istream>
   8 #include <vector>
   9 #include <utility>
  10
  11 #include <gna-api.h>
  12 #include "descriptions/gna_input_desc.hpp"
  13 #include "descriptions/gna_output_desc.hpp"
  14 #include "gna_plugin_log.hpp"
  15 #if GNA_LIB_VER == 2
  16 #include "gna2-model-api.h"
  17 #endif
  18
  19 #pragma pack(push, 1)
  20
  21 /**
  22  * version history
  23  * 1.0 - basic support
  24  * 1.1 - added memory information
  25  * 2.0 - for use with GNA2 library
  26  * 2.1 - multiple i/o support
  27  */
  28 #if GNA_LIB_VER == 2
  29 #define HEADER_MAJOR 2
  30 #define HEADER_MINOR 1
  31 #else
  32 #define HEADER_MAJOR 1
  33 #define HEADER_MINOR 2
  34 #endif
  35
  36
  37 /**
  38  * @brief Header version 2.1
  39  */
  40 struct ModelHeader {
  41     /**
  42      *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
  43      */
  44     char gnam[4];
  45     /**
  46      * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
  47      * usually it is an indicator of working with version of model different that is current export function produce
  48      */
  49     uint32_t headerSize = 0u;
  50     struct Version {
  51         /**
  52          * @details Version of format Major – unsigned int, ex: 0x0001
  53          * every change in the header or in the layers definition should be reflected in version change
  54          * for backward compatibility new parsers can read old versions of model with certain restrictions
  55          */
  56         uint16_t major = 0u;
  57         /**
  58          * @details Version of Format Minor – unsigned int,  corresponding to build revision for example
  59          * changes in minor version are not affected layout of model
  60          */
  61         uint32_t minor = 0u;
  62     } version;
  63     /**
  64      * @brief Memory required to be allocated using GNAAlloc()
  65      */
  66     uint64_t gnaMemSize = 0ull;
  67     /**
  68      * @brief Number of GNA Layers
  69      */
  70     uint64_t layersCount = 0ull;
  71     /**
  72      * @brief Grouping level
  73      */
  74     uint32_t nGroup = 0u;
  75     /**
  76      * Convolution related setting - they are affecting input transformation
  77      */
  78     uint32_t nRotateRows = 0u;
  79     uint32_t nRotateColumns = 0u;
  80     bool doRotateInput = false;
  81
  82     uint32_t nInputs = 0u;
  83     uint32_t nOutputs = 0u;
  84
  85     /**
  86      * Reserved Data might be here
  87      */
  88 };
  89 #pragma pack(pop)
  90
  91 /**
  92  * @brief implements serialisation tasks for GNAGraph
  93  */
  94 class GNAModelSerial {
  95  public:
  96     /*
  97      * In runtime endpoint mostly same as in serial version, except of descriptor field
  98      */
  99     struct RuntimeEndPoint {
 100         /**
 101          * if scale factor is different then pased into infer , network might need to be requantized
 102          */
 103         float scaleFactor = 0;
 104         /**
 105          * Pointer descriptor
 106          */
 107         void* descriptor_ptr = nullptr;
 108         /**
 109          * Endpoint resolution in bytes.
 110          */
 111         uint32_t element_size = 0;
 112         /**
 113          * Number of elements
 114          */
 115         uint32_t elements_count = 0;
 116         /**
 117          * Offset in bytes of pointer descriptor
 118         */
 119         uint64_t descriptor_offset = 0ull;
 120
 121         intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
 122
 123         RuntimeEndPoint() = default;
 124         RuntimeEndPoint(double scaleFactor,
 125                     void* descriptor_ptr,
 126                     uint32_t element_size,
 127                     uint32_t elements_count,
 128                     intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
 129                                     descriptor_ptr(descriptor_ptr),
 130                                     element_size(element_size),
 131                                     elements_count(elements_count),
 132                                     orientation(orientation) {
 133         }
 134     };
 135     using MemoryType = std::vector<std::pair<void*, uint32_t>>;
 136
 137 private:
 138 #if GNA_LIB_VER == 2
 139     Gna2Model * gna2Model;
 140 #else
 141     intel_nnet_type_t *ptr_nnet;
 142 #endif
 143     std::vector<RuntimeEndPoint> inputs;
 144     std::vector<RuntimeEndPoint> outputs;
 145     uint32_t nRotateRows = 0;
 146     uint32_t nRotateColumns = 0;
 147     bool doRotateInput = false;
 148
 149     MemoryType states, *pstates = nullptr;
 150     ModelHeader modelHeader;
 151
 152     void ImportInputs(std::istream &is,
 153             void* basePtr,
 154             std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
 155             InferenceEngine::InputsDataMap& dataMap);
 156
 157     void ImportOutputs(std::istream &is,
 158             void* basePtr,
 159             std::vector<GNAPluginNS::OutputDesc> &desc,
 160             InferenceEngine::OutputsDataMap& dataMap);
 161
 162  public:
 163 #if GNA_LIB_VER == 2
 164     GNAModelSerial(Gna2Model * model, MemoryType & states_holder)
 165         : gna2Model(model), pstates(&states_holder) {
 166     }
 167
 168     GNAModelSerial(
 169         Gna2Model * model,
 170         const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
 171         const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
 172         const InferenceEngine::InputsDataMap& inputsDataMap,
 173         const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model),
 174             inputs(serializeInputs(inputsDataMap, inputDesc)),
 175             outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
 176     }
 177
 178 #else
 179      /**
 180   *
 181   * @brief Used for import/export
 182   * @param ptr_nnet
 183   * @param inputScale  - in/out parameter representing input scale factor
 184   * @param outputScale - in/out parameter representing output scale factor
 185   */
 186      GNAModelSerial(intel_nnet_type_t *ptr_nnet, MemoryType &states_holder)
 187          : ptr_nnet(ptr_nnet), pstates(&states_holder) {
 188      }
 189
 190      /**
 191       * @brief used for export only since runtime params are not passed by pointer
 192       * @param ptr_nnet
 193       * @param runtime
 194       */
 195      GNAModelSerial(
 196          intel_nnet_type_t *ptr_nnet,
 197          const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
 198          const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
 199          const InferenceEngine::InputsDataMap& inputsDataMap,
 200          const InferenceEngine::OutputsDataMap& outputsDataMap) : ptr_nnet(ptr_nnet),
 201                                                                   inputs(serializeInputs(inputsDataMap, inputDesc)),
 202                                                                   outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
 203      }
 204 #endif
 205
 206     GNAModelSerial & SetInputRotation(uint32_t nRotateRows, uint32_t nRotateColumns, bool do_rotate_inputs) {
 207       this->nRotateColumns = nRotateColumns;
 208       this->nRotateRows = nRotateRows;
 209       this->doRotateInput = do_rotate_inputs;
 210       return *this;
 211     }
 212
 213     /**
 214      * mark certain part of gna_blob as state (in future naming is possible)
 215      * @param descriptor_ptr
 216      * @param size
 217      * @return
 218      */
 219     GNAModelSerial & AddState(void* descriptor_ptr, size_t size) {
 220         states.emplace_back(descriptor_ptr, size);
 221         return *this;
 222     }
 223
 224     /**
 225      * @brief calculate memory required for import gna graph
 226      * @param is - opened input stream
 227      * @return
 228      */
 229     static ModelHeader ReadHeader(std::istream &is);
 230
 231     /**
 232      * @brief Import model from FS into preallocated buffer,
 233      * buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
 234      * @param ptr_nnet
 235      * @param basePointer
 236      * @param is - stream without header structure - TBD heder might be needed
 237      */
 238     void Import(void *basePointer,
 239                                 size_t gnaGraphSize,
 240                                 std::istream & is,
 241                                 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
 242                                 std::vector<GNAPluginNS::OutputDesc> &desc,
 243                                 InferenceEngine::InputsDataMap& inputsDataMap,
 244                                 InferenceEngine::OutputsDataMap& outputsDataMap);
 245
 246     /**
 247      * save gna graph to an outpus stream
 248      * @param ptr_nnet
 249      * @param basePtr
 250      * @param gnaGraphSize
 251      * @param os
 252      */
 253     void Export(void *basePtr,
 254                 size_t gnaGraphSize,
 255                 std::ostream &os) const;
 256
 257     static std::vector<GNAModelSerial::RuntimeEndPoint> serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
 258             const std::vector<GNAPluginNS::OutputDesc>& outputsDesc);
 259
 260
 261     static std::vector<GNAModelSerial::RuntimeEndPoint> serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
 262                                                                         const std::shared_ptr<GNAPluginNS::InputDesc>);
 263
 264     void setHeader(ModelHeader header);
 265 };