inference-engine/src/gna_plugin/gna_model_serial.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include <vector>
   6 #include <array>
   7 #include <details/ie_exception.hpp>
   8 #include <ios>
   9 #include <iomanip>
  10 #ifndef _WIN32
  11 #include <mm_malloc.h>
  12 #endif
  13 #include <gna-api-types-xnn.h>
  14 #include "gna_model_serial.hpp"
  15 #include "gna_plugin_log.hpp"
  16
  17 template <class T>
  18 inline void writeBits(const T & obj, std::ostream & os) {
  19     os.write(reinterpret_cast<const char *>(&obj), sizeof(T));
  20 }
  21
  22 template <class T>
  23 inline void readBits(T & obj, std::istream & is) {
  24     is.read(reinterpret_cast<char *>(&obj), sizeof(T));
  25 }
  26
  27 template <int nBits, class T>
  28 inline void readNBits(T & obj, std::istream & is) {
  29     std::array<uint8_t, nBits / 8> tmp;
  30     is.read(reinterpret_cast<char *>(&tmp), nBits / 8);
  31
  32     obj = * reinterpret_cast<T*>(&tmp.front());
  33 }
  34
  35 template <class T>
  36 inline void readOffset(T & ptr, void *base,  std::istream & is) {
  37     uint64_t offset = 0ull;
  38     readBits(offset, is);
  39     ptr = reinterpret_cast<T>(reinterpret_cast<uint8_t *>(base) + offset);
  40 }
  41
  42 union {
  43     uint16_t s;
  44     uint8_t  c[2];
  45 } constexpr static  LECheck {1};
  46
  47 bool is_little_endian() {
  48     return LECheck.c[0] == 1;
  49 }
  50
  51 const int gna_header_magic = is_little_endian() ?  0x4d414e47 : 0x474e414d;
  52
  53 ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
  54     is.exceptions(std::istream::failbit);
  55
  56     ModelHeader header;
  57     readBits(header, is);
  58     if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
  59         THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
  60                            << std::setfill('0') <<
  61                            std::hex << std::setw(2) << static_cast<short>(header.gnam[0]) <<
  62                            std::hex << std::setw(2) << static_cast<short>(header.gnam[1]) <<
  63                            std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
  64                            std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
  65     }
  66     if (header.version.major < 1) {
  67         THROW_GNA_EXCEPTION << "Imported file unsupported: major version sould be > 1";
  68     }
  69     if (header.headerSize < sizeof(header)) {
  70         THROW_GNA_EXCEPTION << "Unsupported header size minimal value is : " << sizeof (header) << ", but read: " << header.headerSize;
  71     }
  72     /*
  73      * extra data need to be added into new header and modify check as appropriate
  74      */
  75
  76     //  forward compatible
  77     if (header.headerSize > sizeof(header)) {
  78         is.seekg(header.headerSize - sizeof(header), std::ios_base::cur);
  79     }
  80     return header;
  81 }
  82
  83 void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize,  std::istream & is) {
  84     is.exceptions(std::istream::failbit);
  85
  86     auto readPwl = [&is, basePointer] (intel_pwl_func_t & value) {
  87         readBits(value.nSegments, is);
  88         if (value.nSegments != 0) {
  89             readOffset(value.pSegments, basePointer, is);
  90         } else {
  91             value.pSegments = nullptr;
  92         }
  93     };
  94
  95     for (auto layer = ptr_nnet->pLayers; layer != ptr_nnet->pLayers + ptr_nnet->nLayers; ++layer) {
  96         readBits(layer->nInputColumns, is);
  97         readBits(layer->nInputRows, is);
  98         readBits(layer->nOutputColumns, is);
  99         readBits(layer->nOutputRows, is);
 100         readBits(layer->nBytesPerInput, is);
 101         readBits(layer->nBytesPerOutput, is);
 102         readBits(layer->nBytesPerIntermediateOutput, is);
 103         readNBits<32>(layer->nLayerKind, is);
 104
 105         // reading layers structs
 106         switch (layer->nLayerKind) {
 107             case INTEL_AFFINE_DIAGONAL:
 108             case INTEL_AFFINE: {
 109                 layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
 110                 if (layer->pLayerStruct == nullptr) {
 111                     THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
 112                 }
 113
 114                 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
 115                 readBits(affine.affine.nBytesPerWeight, is);
 116                 readBits(affine.affine.nBytesPerBias, is);
 117                 readOffset(affine.affine.pWeights, basePointer, is);
 118                 readOffset(affine.affine.pBiases, basePointer, is);
 119                 readPwl(affine.pwl);
 120                 break;
 121             }
 122             case INTEL_CONVOLUTIONAL: {
 123                 layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
 124                 if (layer->pLayerStruct == nullptr) {
 125                     THROW_GNA_EXCEPTION <<"could not allocate memory for intel_convolutional_layer_t structure.";
 126                 }
 127
 128                 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
 129                 readBits(convolution.nFilterCoefficients, is);
 130                 readBits(convolution.nBytesFilterCoefficient, is);
 131                 readBits(convolution.nBytesBias, is);
 132                 readBits(convolution.nFilters, is);
 133                 readBits(convolution.nFeatureMaps, is);
 134                 readBits(convolution.nFeatureMapRows, is);
 135                 readBits(convolution.nFeatureMapColumns, is);
 136                 readBits(convolution.nFilterRows, is);
 137                 readOffset(convolution.pFilters, basePointer, is);
 138                 readOffset(convolution.pBiases, basePointer, is);
 139                 readBits(convolution.nPoolSize, is);
 140                 readBits(convolution.nPoolStride, is);
 141                 readBits(convolution.poolType, is);
 142                 readPwl(convolution.pwl);
 143                 break;
 144             }
 145
 146             case INTEL_RECURRENT:
 147                 THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
 148             case INTEL_INTERLEAVE:
 149                 THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
 150             case INTEL_DEINTERLEAVE:
 151                 THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
 152             case INTEL_COPY:
 153                 THROW_GNA_EXCEPTION << "Importing of copy layer not supported";
 154             default:
 155                 THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ")  not supported";
 156         }
 157
 158         // reading offsets of inputs/outputs
 159         readOffset(layer->pInputs, basePointer, is);
 160         readOffset(layer->pOutputsIntermediate, basePointer, is);
 161         readOffset(layer->pOutputs, basePointer, is);
 162     }
 163
 164     // writing memory information
 165     uint32_t nStates = 0;
 166     readBits(nStates, is);
 167     if (pstates != nullptr) {
 168         pstates->resize(nStates);
 169     }
 170
 171     for (int i = 0; i != nStates; i++) {
 172        void *pSegment;
 173        readOffset(pSegment, basePointer, is);
 174        uint32_t segmentSz;
 175        readBits(segmentSz, is);
 176        if (pstates) {
 177            (*pstates)[i] = {pSegment, segmentSz};
 178        }
 179     }
 180
 181
 182     // once structure has been read lets read whole gna graph
 183     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 184 }
 185
 186 #define offsetFromBase(field)\
 187 getOffsetFromBase(field, #field)
 188
 189
 190 /**
 191  *
 192  * @param ptr_nnet
 193  * @param gnaAllocSize - it can be calculated based on nnet, however it will overcomplicate export
 194  * about base adress it is relatively easy to calculate
 195  * @param os
 196  */
 197 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
 198     os.exceptions(std::ostream::failbit);
 199
 200     std::vector<intel_nnet_layer_t>
 201         layers(ptr_nnet->pLayers, ptr_nnet->pLayers + ptr_nnet->nLayers);
 202
 203
 204     // all offsets will be from this pointer
 205     auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
 206         auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
 207         if (offset > gnaGraphSize) {
 208             THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
 209                                << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
 210                                << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
 211         }
 212         return offset;
 213     };
 214
 215     auto writePwl = [&os, getOffsetFromBase] (intel_pwl_func_t & value) {
 216         writeBits(value.nSegments, os);
 217         // export require certain offset, since offset from base to nullptr cannot be correct, we are not store it at all
 218         if (value.nSegments != 0) {
 219             writeBits(offsetFromBase(value.pSegments), os);
 220         }
 221     };
 222
 223     auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
 224         ModelHeader::EndPoint out;
 225         out.elements_count = ep.elements_count;
 226         out.element_size = ep.element_size;
 227         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
 228         out.scaleFactor = ep.scaleFactor;
 229         return out;
 230     };
 231     /**
 232      * writing header
 233      */
 234     ModelHeader header;
 235     header.gnam[0] = 'G';
 236     header.gnam[1] = 'N';
 237     header.gnam[2] = 'A';
 238     header.gnam[3] = 'M';
 239     header.version.major = HEADER_MAJOR;
 240     header.version.minor = HEADER_MINOR;
 241     header.gnaMemSize = gnaGraphSize;
 242     header.layersCount = layers.size();
 243     header.nGroup = ptr_nnet->nGroup;
 244     header.input  = convert_to_serial(input);
 245     header.output = convert_to_serial(output);
 246     header.headerSize = sizeof(ModelHeader);
 247     header.nRotateRows = nRotateRows;
 248     header.nRotateColumns = nRotateColumns;
 249
 250
 251     writeBits(header, os);
 252
 253     for (auto & layer : layers) {
 254         writeBits(layer.nInputColumns, os);
 255         writeBits(layer.nInputRows, os);
 256         writeBits(layer.nOutputColumns, os);
 257         writeBits(layer.nOutputRows, os);
 258         writeBits(layer.nBytesPerInput, os);
 259         writeBits(layer.nBytesPerOutput, os);
 260         writeBits(layer.nBytesPerIntermediateOutput, os);
 261         writeBits(static_cast<uint32_t>(layer.nLayerKind), os);
 262
 263         // writing layers structs
 264         switch (layer.nLayerKind) {
 265             case INTEL_AFFINE_DIAGONAL:
 266             case INTEL_AFFINE: {
 267                 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer.pLayerStruct);
 268                 writeBits(affine.affine.nBytesPerWeight, os);
 269                 writeBits(affine.affine.nBytesPerBias, os);
 270                 writeBits(offsetFromBase(affine.affine.pWeights), os);
 271                 writeBits(offsetFromBase(affine.affine.pBiases), os);
 272                 writePwl(affine.pwl);
 273                 break;
 274             }
 275             case INTEL_CONVOLUTIONAL: {
 276                 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer.pLayerStruct);
 277                 writeBits(convolution.nFilterCoefficients, os);
 278                 writeBits(convolution.nBytesFilterCoefficient, os);
 279                 writeBits(convolution.nBytesBias, os);
 280                 writeBits(convolution.nFilters, os);
 281                 writeBits(convolution.nFeatureMaps, os);
 282                 writeBits(convolution.nFeatureMapRows, os);
 283                 writeBits(convolution.nFeatureMapColumns, os);
 284                 writeBits(convolution.nFilterRows, os);
 285                 writeBits(offsetFromBase(convolution.pFilters), os);
 286                 writeBits(offsetFromBase(convolution.pBiases), os);
 287                 writeBits(convolution.nPoolSize, os);
 288                 writeBits(convolution.nPoolStride, os);
 289                 writeBits(convolution.poolType, os);
 290                 writePwl(convolution.pwl);
 291                 break;
 292             }
 293
 294             case INTEL_RECURRENT:
 295                 THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
 296             case INTEL_INTERLEAVE:
 297                 THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
 298             case INTEL_DEINTERLEAVE:
 299                 THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
 300             case INTEL_COPY:
 301                 THROW_GNA_EXCEPTION << "Exporting of copy layer not supported";
 302             default:
 303                 THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ")  not supported";
 304         }
 305
 306         // writing offsets from base.
 307         writeBits(offsetFromBase(layer.pInputs), os);
 308         writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
 309         writeBits(offsetFromBase(layer.pOutputs), os);
 310     }
 311     // writing memory information
 312     writeBits(static_cast<uint32_t>(states.size()), os);
 313     for (auto && state : states) {
 314         writeBits(offsetFromBase(state.first), os);
 315         writeBits(state.second, os);
 316     }
 317
 318     // once structure has been written lets push gna graph
 319     os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 320 }