inference-engine/src/gna_plugin/gna_model_serial.hpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #pragma once
   6
   7 #include <istream>
   8 #include <vector>
   9 #include <utility>
  10 #include "gna-api.h"
  11
  12 #pragma pack(push, 1)
  13
  14 /**
  15  * version history
  16  * 1.0 - basic support
  17  * 1.1 - added memory information
  18  */
  19
  20 #define HEADER_MAJOR 1
  21 #define HEADER_MINOR 1
  22
  23 /**
  24  * @brief Header version 1.0
  25  */
  26 struct ModelHeader {
  27     /**
  28      *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
  29      */
  30     char gnam[4];
  31     /**
  32      * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
  33      * usually it is an indicator of working with version of model different that is current export function produce
  34      */
  35     uint32_t headerSize = 0u;
  36     struct Version {
  37         /**
  38          * @details Version of format Major – unsigned int, ex: 0x0001
  39          * every change in the header or in the layers definition should be reflected in version change
  40          * for backward compatibility new parsers can read old versions of model with certain restrictions
  41          */
  42         uint16_t major = 0u;
  43         /**
  44          * @details Version of Format Minor – unsigned int,  corresponding to build revision for example
  45          * changes in minor version are not affected layout of model
  46          */
  47         uint32_t minor = 0u;
  48     } version;
  49     /**
  50      * @brief Memory required to be allocated using GNAAlloc()
  51      */
  52     uint64_t gnaMemSize = 0ull;
  53     /**
  54      * @brief Number of GNA Layers
  55      */
  56     uint64_t layersCount = 0ull;
  57
  58     /**
  59      * @brief Grouping level
  60      */
  61     uint32_t nGroup = 0u;
  62
  63     /**
  64      * Convolution related setting - they are affecting input transformation
  65      */
  66     uint32_t nRotateRows = 0u;
  67     uint32_t nRotateColumns = 0u;
  68
  69
  70     struct EndPoint {
  71         /**
  72          * if scale factor is different then pased into infer , network might need to be requantized
  73          */
  74         float scaleFactor = 0.f;
  75         /**
  76          * Offset in bytes of pointer descriptor
  77          */
  78         uint64_t descriptor_offset = 0ull;
  79         /**
  80          * Endpoint resolution in bytes.
  81          */
  82         uint32_t element_size = 0u;
  83         /**
  84          * Number of elements
  85          */
  86         uint32_t elements_count = 0u;
  87     };
  88     EndPoint input;
  89     EndPoint output;
  90
  91     /**
  92      * Reserved Data might be here
  93      */
  94 };
  95 #pragma pack(pop)
  96
  97 /**
  98  * @brief implements serialisation tasks for GNAGraph
  99  */
 100 class GNAModelSerial {
 101  public:
 102     /*
 103      * In runtime endpoint mostly same as in serial version, except pf descriptor field
 104      */
 105     struct RuntimeEndPoint {
 106         /**
 107          * if scale factor is different then pased into infer , network might need to be requantized
 108          */
 109         float scaleFactor;
 110         /**
 111          * Pointer descriptor
 112          */
 113         void* descriptor_ptr;
 114         /**
 115          * Endpoint resolution in bytes.
 116          */
 117         uint32_t element_size;
 118         /**
 119          * Number of elements
 120          */
 121         uint32_t elements_count;
 122
 123         RuntimeEndPoint() = default;
 124         RuntimeEndPoint(double scaleFactor,
 125                     void* descriptor_ptr,
 126                     uint32_t element_size,
 127                     uint32_t elements_count) : scaleFactor(scaleFactor),
 128                                     descriptor_ptr(descriptor_ptr),
 129                                     element_size(element_size),
 130                                     elements_count(elements_count) {
 131         }
 132     };
 133     using MemoryType = std::vector<std::pair<void*, uint32_t>>;
 134
 135 private:
 136     intel_nnet_type_t *ptr_nnet;
 137     RuntimeEndPoint input, output;
 138     uint32_t nRotateRows = 0;
 139     uint32_t nRotateColumns = 0;
 140
 141     MemoryType states, *pstates = nullptr;
 142
 143  public:
 144     /**
 145      *
 146      * @brief Used for import/export
 147      * @param ptr_nnet
 148      * @param inputScale  - in/out parameter representing input scale factor
 149      * @param outputScale - in/out parameter representing output scale factor
 150      */
 151     GNAModelSerial(intel_nnet_type_t *ptr_nnet, MemoryType &states_holder)
 152         : ptr_nnet(ptr_nnet) , pstates(&states_holder) {
 153     }
 154
 155     /**
 156      * @brief used for export only since runtime params are not passed by pointer
 157      * @param ptr_nnet
 158      * @param runtime
 159      */
 160     GNAModelSerial(
 161         intel_nnet_type_t *ptr_nnet,
 162         RuntimeEndPoint input,
 163         RuntimeEndPoint output) : ptr_nnet(ptr_nnet), input(input), output(output) {
 164     }
 165
 166     GNAModelSerial & SetInputRotation(uint32_t nRotateRows, uint32_t nRotateColumns) {
 167       this->nRotateColumns = nRotateColumns;
 168       this->nRotateRows = nRotateRows;
 169       return *this;
 170     }
 171
 172     /**
 173      * mark certain part of gna_blob as state (in future naming is possible)
 174      * @param descriptor_ptr
 175      * @param size
 176      * @return
 177      */
 178     GNAModelSerial & AddState(void* descriptor_ptr, size_t size) {
 179         states.emplace_back(descriptor_ptr, size);
 180         return *this;
 181     }
 182
 183     /**
 184      * @brief calculate memory required for import gna graph
 185      * @param is - opened input stream
 186      * @return
 187      */
 188     static ModelHeader ReadHeader(std::istream &is);
 189
 190     /**
 191      * @brief Import model from FS into preallocated buffer,
 192      * buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
 193      * @param ptr_nnet
 194      * @param basePointer
 195      * @param is - stream without header structure - TBD heder might be needed
 196      */
 197     void Import(void *basePointer, size_t gnaGraphSize, std::istream &is);
 198
 199     /**
 200      * save gna graph to an outpus stream
 201      * @param ptr_nnet
 202      * @param basePtr
 203      * @param gnaGraphSize
 204      * @param os
 205      */
 206     void Export(void *basePtr,
 207                 size_t gnaGraphSize,
 208                 std::ostream &os) const;
 209 };