inference-engine/src/gna_plugin/gna_model_serial.cpp

   1 // Copyright (C) 2018-2020 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include <vector>
   6 #include <array>
   7 #include <details/ie_exception.hpp>
   8 #include <ios>
   9 #include <iomanip>
  10 #include <map>
  11 #include <ie_algorithm.hpp>
  12 #include <ie_common.h>
  13 #include <ie_precision.hpp>
  14
  15 #if defined __INTEL_COMPILER || defined _MSC_VER
  16 #include <malloc.h>
  17 #else
  18 #include <mm_malloc.h>
  19 #endif
  20
  21 #include "gna_plugin.hpp"
  22 #include "gna_model_serial.hpp"
  23 #include "serial/headers/latest/gna_model_header.hpp"
  24
  25 using namespace GNAPluginNS;
  26
  27 inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
  28     os.write(static_cast<const char*>(ptr), size);
  29 }
  30
  31 template <class T>
  32 inline void writeBits(const T & obj, std::ostream & os) {
  33     os.write(reinterpret_cast<const char *>(&obj), sizeof(T));
  34 }
  35
  36 template <class T>
  37 inline void readBits(T & obj, std::istream & is) {
  38     is.read(reinterpret_cast<char *>(&obj), sizeof(T));
  39 }
  40
  41 inline void readNBytes(void * ptr, uint32_t size, std::istream & is) {
  42     is.read(reinterpret_cast<char *>(ptr), size);
  43 }
  44
  45 template <int nBits, class T>
  46 inline void readNBits(T & obj, std::istream & is) {
  47     std::array<uint8_t, nBits / 8> tmp;
  48     is.read(reinterpret_cast<char *>(&tmp), nBits / 8);
  49
  50     obj = * reinterpret_cast<T*>(&tmp.front());
  51 }
  52
  53 inline void * offsetToPointer(void * const base, uint64_t offset) {
  54     return reinterpret_cast<uint8_t *>(base) + offset;
  55 }
  56
  57 template <class T>
  58 inline void readOffset(T & ptr, void *base,  std::istream & is) {
  59     uint64_t offset = 0ull;
  60     readBits(offset, is);
  61     ptr = reinterpret_cast<T>(offsetToPointer(base, offset));
  62 }
  63
  64 union {
  65     uint16_t s;
  66     uint8_t  c[2];
  67 } constexpr static  LECheck {1};
  68
  69 bool is_little_endian() {
  70     return LECheck.c[0] == 1;
  71 }
  72
  73 const int gna_header_magic = is_little_endian() ?  0x4d414e47 : 0x474e414d;
  74
  75 GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
  76     is.exceptions(std::istream::failbit);
  77     is.seekg(0, is.end);
  78     auto stream_len = is.tellg();
  79     if (stream_len == -1) {
  80         THROW_GNA_EXCEPTION << "Can't open file to import";
  81     }
  82     is.seekg(0, is.beg);
  83
  84     HeaderLatest::ModelHeader header;
  85     header.version.major = 0u;
  86     header.version.minor = 0u;
  87     auto size_of_headers_header = sizeof(HeaderLatest::ModelHeader::gnam) + sizeof(HeaderLatest::ModelHeader::headerSize)
  88                                 + sizeof(HeaderLatest::ModelHeader::Version);
  89     if (stream_len > size_of_headers_header) {
  90         readNBytes(&header, size_of_headers_header, is);
  91     } else {
  92         readNBytes(&header, stream_len, is);
  93     }
  94     if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
  95         THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
  96                            << std::setfill('0') <<
  97                            std::hex << std::setw(2) << static_cast<short>(header.gnam[0]) <<
  98                            std::hex << std::setw(2) << static_cast<short>(header.gnam[1]) <<
  99                            std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
 100                            std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
 101     }
 102
 103     is.seekg(0, is.beg);
 104     Header2dot1::ModelHeader tempHeader2dot1;
 105     switch (header.version.major) {
 106         case 2:
 107             switch (header.version.minor) {
 108                 case 1:
 109                     readBits(tempHeader2dot1, is);
 110                     header = Header2dot3::ModelHeader(tempHeader2dot1);
 111                     break;
 112                 case 2:
 113                 case 3:
 114                     readBits(header, is);
 115                     break;
 116                 default:
 117                     THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should be equal to 1 or 2 and is: " << header.version.minor;
 118             }
 119             break;
 120         default:
 121             THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: " << header.version.major << " is not implemented";
 122     }
 123
 124     /*
 125      * extra data need to be added into new header and modify check as appropriate
 126      */
 127
 128     //  forward compatible
 129     if (header.headerSize > sizeof(header)) {
 130         is.seekg(header.headerSize - sizeof(header), std::ios_base::cur);
 131     }
 132     return header;
 133 }
 134
 135 #define offsetFromBase(field)\
 136 getOffsetFromBase(field, #field)
 137
 138 #if GNA_LIB_VER == 2
 139
 140 bool IsEmptyTensor(const Gna2Tensor& t) {
 141     return t.Type == Gna2DataTypeNone &&
 142         t.Data == nullptr &&
 143         t.Layout[0] == '\0' &&
 144         t.Mode == Gna2TensorModeDefault &&
 145         t.Shape.NumberOfDimensions == 0;
 146 }
 147
 148 const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
 149     {Gna2OperationTypeFullyConnectedAffine, {sizeof(Gna2BiasMode), sizeof(uint32_t)}},
 150     {Gna2OperationTypeConvolution, {
 151         sizeof(Gna2Shape),
 152         sizeof(Gna2BiasMode),
 153         sizeof(Gna2PoolingMode),
 154         sizeof(Gna2Shape),
 155         sizeof(Gna2Shape),
 156         sizeof(Gna2Shape)}},
 157     {Gna2OperationTypeCopy, {sizeof(Gna2Shape)}},
 158     {Gna2OperationTypeTransposition, {sizeof(Gna2Shape)}},
 159 };
 160
 161 void GNAModelSerial::Import(void *basePointer,
 162         size_t gnaGraphSize,
 163         std::istream & is,
 164         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
 165         std::vector<GNAPluginNS::OutputDesc> &desc,
 166         InferenceEngine::InputsDataMap& inputsDataMap,
 167         InferenceEngine::OutputsDataMap& outputsDataMap) {
 168     is.exceptions(std::istream::failbit);
 169
 170     if (modelHeader.version.major == 2) {
 171         if (modelHeader.version.minor >= 3) {
 172             for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
 173                 uint32_t nameSize = 0;
 174                 readNBits<32>(nameSize, is);
 175                 std::string inName(nameSize, '\0');
 176                 readNBytes(&inName[0], nameSize, is);
 177                 inputNames.push_back(inName.substr(0, nameSize - 1));
 178             }
 179         }
 180     }
 181     ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
 182
 183     if (modelHeader.version.major == 2) {
 184         if (modelHeader.version.minor >= 3) {
 185             for (auto inputIndex = 0; inputIndex < modelHeader.nOutputs; inputIndex++) {
 186                 uint32_t nameSize = 0;
 187                 readNBits<32>(nameSize, is);
 188                 std::string outName(nameSize, '\0');
 189                 readNBytes(&outName[0], nameSize, is);
 190                 outputNames.push_back(outName.substr(0, nameSize - 1));
 191             }
 192         }
 193     }
 194     ImportOutputs(is, basePointer, desc, outputsDataMap);
 195
 196     for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
 197         readNBits<32>(operation->Type, is);
 198         readBits(operation->NumberOfOperands, is);
 199         operation->Operands = static_cast<Gna2Tensor const **>(gnaUserAllocator(sizeof(Gna2Tensor*) * operation->NumberOfOperands));
 200         IE_ASSERT(operation->Operands != nullptr);
 201         for (uint32_t i = 0; i < operation->NumberOfOperands; i++) {
 202             Gna2Tensor t{};
 203             readBits(t, is);
 204             if (IsEmptyTensor(t)) {
 205                 operation->Operands[i] = nullptr;
 206             } else {
 207                 operation->Operands[i] = static_cast<Gna2Tensor const *>(gnaUserAllocator(sizeof(Gna2Tensor)));
 208                 t.Data = offsetToPointer(basePointer, reinterpret_cast<uint64_t>(t.Data));
 209                 const_cast<Gna2Tensor&>(*operation->Operands[i]) = t;
 210             }
 211         }
 212         readBits(operation->NumberOfParameters, is);
 213         switch (operation->Type) {
 214         case Gna2OperationTypeElementWiseAffine:
 215         case Gna2OperationTypeFullyConnectedAffine:
 216         case Gna2OperationTypeConvolution:
 217         case Gna2OperationTypeCopy:
 218         case Gna2OperationTypeTransposition:
 219             break;
 220         case Gna2OperationTypeRecurrent:
 221             THROW_GNA_EXCEPTION << "Importing of recurrent operation not supported";
 222         default:
 223             THROW_GNA_EXCEPTION << "Importing of unknown GNA operation type(" << operation->Type << ")  not supported";
 224         }
 225         if (operation->NumberOfParameters > 0)
 226             operation->Parameters = static_cast<void **>(gnaUserAllocator(sizeof(void*) * operation->NumberOfParameters));
 227         else
 228             operation->Parameters = nullptr;
 229         for (uint32_t i = 0; i < operation->NumberOfParameters; i++) {
 230             uint32_t paramSize = 0;
 231             readBits(paramSize, is);
 232             IE_ASSERT(operation->Parameters != nullptr);
 233             if (paramSize == 0) {
 234                 IE_ASSERT(operation->Parameters != nullptr);
 235                 operation->Parameters[i] = nullptr;
 236                 continue;
 237             }
 238             operation->Parameters[i] = gnaUserAllocator(paramSize);
 239             readNBytes(operation->Parameters[i], paramSize, is);
 240
 241             if (GnaParamSize.at(operation->Type).size() <= i) {
 242                 THROW_GNA_EXCEPTION << "Cannot import parameter of index: " << i;
 243             }
 244             if (paramSize != GnaParamSize.at(operation->Type).at(i)) {
 245                 THROW_GNA_EXCEPTION << "Parameter size mismatch on import: " << i;
 246             }
 247         }
 248     }
 249
 250     // writing memory information
 251     uint32_t nStates = 0;
 252     readBits(nStates, is);
 253     if (pstates != nullptr) {
 254         pstates->resize(nStates);
 255     }
 256
 257     for (int i = 0; i != nStates; i++) {
 258         void *pSegment;
 259         readOffset(pSegment, basePointer, is);
 260         uint32_t segmentSz;
 261         readBits(segmentSz, is);
 262         if (pstates) {
 263             (*pstates)[i] = { pSegment, segmentSz };
 264         }
 265     }
 266
 267
 268     // once structure has been read lets read whole gna graph
 269     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 270 }
 271
 272
 273 uint32_t guessGrouping(Gna2Model const& model) {
 274     if (model.NumberOfOperations == 0 ||
 275         model.Operations == nullptr ||
 276         model.Operations[0].Operands == nullptr ||
 277         model.Operations[0].NumberOfOperands == 0 ||
 278         model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
 279         THROW_GNA_EXCEPTION << "Can not guess grouping";
 280     }
 281     return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
 282 }
 283
 284 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
 285     os.exceptions(std::ostream::failbit);
 286
 287     const std::vector<Gna2Operation>
 288         layers(gna2Model->Operations, gna2Model->Operations + gna2Model->NumberOfOperations);
 289
 290
 291     // all offsets will be from this pointer
 292     auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
 293         auto offset = static_cast<uint64_t>(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
 294         if (offset > gnaGraphSize) {
 295             THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
 296                 << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
 297                 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
 298         }
 299         return offset;
 300     };
 301
 302     auto getTensorWithProperOffset = [&getOffsetFromBase](const Gna2Tensor& tensor) {
 303         Gna2Tensor out = tensor;
 304         out.Data = reinterpret_cast<void*>(getOffsetFromBase(tensor.Data));
 305         return out;
 306     };
 307
 308     auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep) {
 309         HeaderLatest::RuntimeEndPoint out;
 310         out.elements_count = ep.elements_count;
 311         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
 312         out.scaleFactor = ep.scaleFactor;
 313         out.element_size = ep.element_size;
 314         out.orientation = ep.orientation;
 315         return out;
 316     };
 317     /**
 318      * writing header
 319      */
 320     HeaderLatest::ModelHeader header;
 321     header.gnam[0] = 'G';
 322     header.gnam[1] = 'N';
 323     header.gnam[2] = 'A';
 324     header.gnam[3] = 'M';
 325     header.headerSize = sizeof(HeaderLatest::ModelHeader);
 326     header.gnaMemSize = gnaGraphSize;
 327     header.layersCount = layers.size();
 328     header.nGroup = guessGrouping(*gna2Model);
 329     header.nInputs = inputs.size();
 330     header.nOutputs = outputs.size();
 331     header.nRotateRows = nRotateRows;
 332     header.nRotateColumns = nRotateColumns;
 333     header.doRotateInput = doRotateInput;
 334
 335
 336     writeBits(header, os);
 337
 338     for (auto &name : inputNames) {
 339         const auto nameSize = strlen(name.c_str()) + 1;
 340         writeBits(static_cast<uint32_t>(nameSize), os);
 341         writeNBytes(name.c_str(), nameSize , os);
 342     }
 343     for (const auto &input : inputs) {
 344         writeBits(convert_to_serial(input), os);
 345     }
 346     for (auto &name : outputNames) {
 347         const auto nameSize = strlen(name.c_str()) + 1;
 348         writeBits(static_cast<uint32_t>(nameSize), os);
 349         writeNBytes(name.c_str(), nameSize, os);
 350     }
 351     for (const auto &output : outputs) {
 352         writeBits(convert_to_serial(output), os);
 353     }
 354
 355     for (const auto & layer : layers) {
 356         writeBits(static_cast<uint32_t>(layer.Type), os);
 357         writeBits(layer.NumberOfOperands, os);
 358
 359         for (uint32_t i = 0; i < layer.NumberOfOperands; i++) {
 360             if (layer.Operands[i] == nullptr)
 361                 writeBits(Gna2Tensor{}, os);
 362             else
 363                 writeBits(getTensorWithProperOffset(*layer.Operands[i]), os);
 364         }
 365
 366         writeBits(layer.NumberOfParameters, os);
 367
 368         // writing parameters
 369         switch (layer.Type) {
 370         case Gna2OperationTypeElementWiseAffine:
 371         case Gna2OperationTypeFullyConnectedAffine:
 372         case Gna2OperationTypeConvolution:
 373         case Gna2OperationTypeCopy:
 374         case Gna2OperationTypeTransposition:
 375             break;
 376         case Gna2OperationTypeRecurrent:
 377             THROW_GNA_EXCEPTION << "Exporting of recurrent operation not supported";
 378         default:
 379             THROW_GNA_EXCEPTION << "Exporting of unknown GNA operation type(" << layer.Type << ")  not supported";
 380         }
 381         for (uint32_t i = 0; i < layer.NumberOfParameters; i++) {
 382             if (layer.Parameters[i] == nullptr) {
 383                 writeBits(static_cast<uint32_t>(0), os);
 384                 continue;
 385             }
 386             const auto paramSize = GnaParamSize.at(layer.Type).at(i);
 387             writeBits(paramSize, os);
 388             writeNBytes(layer.Parameters[i], paramSize, os);
 389         }
 390     }
 391     // writing memory information
 392     writeBits(static_cast<uint32_t>(states.size()), os);
 393     for (auto && state : states) {
 394         writeBits(offsetFromBase(state.first), os);
 395         writeBits(state.second, os);
 396     }
 397
 398     // once structure has been written lets push gna graph
 399     os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 400 }
 401 #else
 402
 403 void GNAModelSerial::Import(void *basePointer,
 404         size_t gnaGraphSize,
 405         std::istream & is,
 406         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
 407         std::vector<GNAPluginNS::OutputDesc> &desc,
 408         InferenceEngine::InputsDataMap& inputsDataMap,
 409         InferenceEngine::OutputsDataMap& outputsDataMap) {
 410     is.exceptions(std::istream::failbit);
 411
 412     ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
 413     ImportOutputs(is, basePointer, desc, outputsDataMap);
 414
 415     auto readPwl = [&is, basePointer](intel_pwl_func_t & value) {
 416         readBits(value.nSegments, is);
 417         if (value.nSegments != 0) {
 418             readOffset(value.pSegments, basePointer, is);
 419         } else {
 420             value.pSegments = nullptr;
 421         }
 422     };
 423
 424     for (auto layer = ptr_nnet->pLayers; layer != ptr_nnet->pLayers + ptr_nnet->nLayers; ++layer) {
 425         readBits(layer->nInputColumns, is);
 426         readBits(layer->nInputRows, is);
 427         readBits(layer->nOutputColumns, is);
 428         readBits(layer->nOutputRows, is);
 429         readBits(layer->nBytesPerInput, is);
 430         readBits(layer->nBytesPerOutput, is);
 431         readBits(layer->nBytesPerIntermediateOutput, is);
 432         readNBits<32>(layer->nLayerKind, is);
 433
 434         // reading layers structs
 435         switch (layer->nLayerKind) {
 436         case INTEL_AFFINE_DIAGONAL:
 437         case INTEL_AFFINE: {
 438             layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
 439             if (layer->pLayerStruct == nullptr) {
 440                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
 441             }
 442
 443             auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
 444             readBits(affine.affine.nBytesPerWeight, is);
 445             readBits(affine.affine.nBytesPerBias, is);
 446             readOffset(affine.affine.pWeights, basePointer, is);
 447             readOffset(affine.affine.pBiases, basePointer, is);
 448             readPwl(affine.pwl);
 449             break;
 450         }
 451         case INTEL_CONVOLUTIONAL: {
 452             layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
 453             if (layer->pLayerStruct == nullptr) {
 454                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_convolutional_layer_t structure.";
 455             }
 456
 457             auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
 458             readBits(convolution.nFilterCoefficients, is);
 459             readBits(convolution.nBytesFilterCoefficient, is);
 460             readBits(convolution.nBytesBias, is);
 461             readBits(convolution.nFilters, is);
 462             readBits(convolution.nFeatureMaps, is);
 463             readBits(convolution.nFeatureMapRows, is);
 464             readBits(convolution.nFeatureMapColumns, is);
 465             readBits(convolution.nFilterRows, is);
 466             readOffset(convolution.pFilters, basePointer, is);
 467             readOffset(convolution.pBiases, basePointer, is);
 468             readBits(convolution.nPoolSize, is);
 469             readBits(convolution.nPoolStride, is);
 470             readBits(convolution.poolType, is);
 471             readPwl(convolution.pwl);
 472             break;
 473         }
 474
 475         case INTEL_COPY: {
 476             layer->pLayerStruct = _mm_malloc(sizeof(intel_copy_layer_t), 64);
 477             if (layer->pLayerStruct == nullptr) {
 478                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_copy_layer_t structure.";
 479             }
 480
 481             auto &copy = *reinterpret_cast<intel_copy_layer_t *>(layer->pLayerStruct);
 482             readBits(copy.nCopyRows, is);
 483             readBits(copy.nCopyCols, is);
 484             break;
 485         }
 486
 487         case INTEL_RECURRENT:
 488             THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
 489         case INTEL_INTERLEAVE:
 490             THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
 491         case INTEL_DEINTERLEAVE:
 492             THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
 493         default:
 494             THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ")  not supported";
 495         }
 496
 497         // reading offsets of inputs/outputs
 498         readOffset(layer->pInputs, basePointer, is);
 499         if (layer->nLayerKind == INTEL_COPY) {
 500             layer->pOutputsIntermediate = nullptr;
 501         } else {
 502             readOffset(layer->pOutputsIntermediate, basePointer, is);
 503         }
 504         readOffset(layer->pOutputs, basePointer, is);
 505     }
 506
 507     // writing memory information
 508     uint32_t nStates = 0;
 509     readBits(nStates, is);
 510     if (pstates != nullptr) {
 511         pstates->resize(nStates);
 512     }
 513
 514     for (int i = 0; i != nStates; i++) {
 515         void *pSegment;
 516         readOffset(pSegment, basePointer, is);
 517         uint32_t segmentSz;
 518         readBits(segmentSz, is);
 519         if (pstates) {
 520             (*pstates)[i] = { pSegment, segmentSz };
 521         }
 522     }
 523
 524
 525     // once structure has been read lets read whole gna graph
 526     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 527 }
 528
 529 /**
 530  *
 531  * @param ptr_nnet
 532  * @param gnaAllocSize - it can be calculated based on nnet, however it will overcomplicate export
 533  * about base adress it is relatively easy to calculate
 534  * @param os
 535  */
 536
 537 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
 538     os.exceptions(std::ostream::failbit);
 539
 540     std::vector<intel_nnet_layer_t>
 541         layers(ptr_nnet->pLayers, ptr_nnet->pLayers + ptr_nnet->nLayers);
 542
 543
 544     // all offsets will be from this pointer
 545     auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
 546         auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
 547         if (offset > gnaGraphSize) {
 548             THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
 549                                << ") not in range segment returned from GNAAlloc(0x" << basePointer << "-0x"
 550                                << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
 551         }
 552         return offset;
 553     };
 554
 555     auto writePwl = [&os, getOffsetFromBase] (intel_pwl_func_t & value) {
 556         writeBits(value.nSegments, os);
 557         // export require certain offset, since offset from base to nullptr cannot be correct, we are not store it at all
 558         if (value.nSegments != 0) {
 559             writeBits(offsetFromBase(value.pSegments), os);
 560         }
 561     };
 562
 563     auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep){
 564         HeaderLatest::RuntimeEndPoint out;
 565         out.elements_count = ep.elements_count;
 566         out.element_size = ep.element_size;
 567         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
 568         out.scaleFactor = ep.scaleFactor;
 569         out.orientation = ep.orientation;
 570         return out;
 571     };
 572     /**
 573      * writing header
 574      */
 575     HeaderLatest::ModelHeader header;
 576     header.gnam[0] = 'G';
 577     header.gnam[1] = 'N';
 578     header.gnam[2] = 'A';
 579     header.gnam[3] = 'M';
 580     header.version.major = 1u;
 581     header.version.minor = 1u;
 582     header.gnaMemSize = gnaGraphSize;
 583     header.layersCount = layers.size();
 584     header.nGroup = ptr_nnet->nGroup;
 585     header.nInputs = 1;
 586     header.nOutputs = 1;
 587     header.headerSize = sizeof(HeaderLatest::ModelHeader);
 588     header.nRotateRows = nRotateRows;
 589     header.nRotateColumns = nRotateColumns;
 590
 591
 592     writeBits(header, os);
 593     writeBits(convert_to_serial(inputs[0]), os);
 594     writeBits(convert_to_serial(outputs[0]), os);
 595
 596     for (auto & layer : layers) {
 597         writeBits(layer.nInputColumns, os);
 598         writeBits(layer.nInputRows, os);
 599         writeBits(layer.nOutputColumns, os);
 600         writeBits(layer.nOutputRows, os);
 601         writeBits(layer.nBytesPerInput, os);
 602         writeBits(layer.nBytesPerOutput, os);
 603         writeBits(layer.nBytesPerIntermediateOutput, os);
 604         writeBits(static_cast<uint32_t>(layer.nLayerKind), os);
 605
 606         // writing layers structs
 607         switch (layer.nLayerKind) {
 608             case INTEL_AFFINE_DIAGONAL:
 609             case INTEL_AFFINE: {
 610                 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer.pLayerStruct);
 611                 writeBits(affine.affine.nBytesPerWeight, os);
 612                 writeBits(affine.affine.nBytesPerBias, os);
 613                 writeBits(offsetFromBase(affine.affine.pWeights), os);
 614                 writeBits(offsetFromBase(affine.affine.pBiases), os);
 615                 writePwl(affine.pwl);
 616                 break;
 617             }
 618             case INTEL_CONVOLUTIONAL: {
 619                 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer.pLayerStruct);
 620                 writeBits(convolution.nFilterCoefficients, os);
 621                 writeBits(convolution.nBytesFilterCoefficient, os);
 622                 writeBits(convolution.nBytesBias, os);
 623                 writeBits(convolution.nFilters, os);
 624                 writeBits(convolution.nFeatureMaps, os);
 625                 writeBits(convolution.nFeatureMapRows, os);
 626                 writeBits(convolution.nFeatureMapColumns, os);
 627                 writeBits(convolution.nFilterRows, os);
 628                 writeBits(offsetFromBase(convolution.pFilters), os);
 629                 writeBits(offsetFromBase(convolution.pBiases), os);
 630                 writeBits(convolution.nPoolSize, os);
 631                 writeBits(convolution.nPoolStride, os);
 632                 writeBits(convolution.poolType, os);
 633                 writePwl(convolution.pwl);
 634                 break;
 635             }
 636
 637             case INTEL_COPY: {
 638                 auto &copy = *reinterpret_cast<intel_copy_layer_t *>(layer.pLayerStruct);
 639                 writeBits(copy.nCopyRows, os);
 640                 writeBits(copy.nCopyCols, os);
 641                 break;
 642             }
 643
 644             case INTEL_RECURRENT:
 645                 THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
 646             case INTEL_INTERLEAVE:
 647                 THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
 648             case INTEL_DEINTERLEAVE:
 649                 THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
 650             default:
 651                 THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ")  not supported";
 652         }
 653
 654         // writing offsets from base.
 655         writeBits(offsetFromBase(layer.pInputs), os);
 656         if (layer.nLayerKind != INTEL_COPY) {
 657             writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
 658         }
 659         writeBits(offsetFromBase(layer.pOutputs), os);
 660     }
 661     // writing memory information
 662     writeBits(static_cast<uint32_t>(states.size()), os);
 663     for (auto && state : states) {
 664         writeBits(offsetFromBase(state.first), os);
 665         writeBits(state.second, os);
 666     }
 667
 668     // once structure has been written lets push gna graph
 669     os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 670 }
 671
 672 #endif
 673
 674 std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
 675         const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
 676     std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
 677     std::size_t outputIndex = 0;
 678     for (auto const &output : outputsDataMap) {
 679         auto outputName = output.first;
 680         auto inputDims = output.second->getTensorDesc().getDims();
 681         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
 682
 683         HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
 684                                                  outputsDesc[outputIndex].ptrs[0],
 685                                                  outputsDesc[outputIndex].num_bytes_per_element,
 686                                                  elementsCount,
 687                                                  outputsDesc[outputIndex].orientation);
 688         endPoints.push_back(endPoint);
 689         outputIndex++;
 690     }
 691     return endPoints;
 692 }
 693
 694 std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
 695                                                                              std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
 696     std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
 697
 698     std::size_t inputIndex = 0;
 699     for (auto const& input : inputsDataMap) {
 700         auto inputName = input.first;
 701         auto inputDims = input.second->getTensorDesc().getDims();
 702
 703         double scaleFactor = inputDesc->getScaleFactor(inputIndex);
 704         std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
 705         IE_ASSERT(descriptor_ptr.size() > 0);
 706         uint32_t element_size = 2u;
 707         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
 708         intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
 709
 710         HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
 711                                                  descriptor_ptr[0],
 712                                                  element_size,
 713                                                  elementsCount,
 714                                                  orientation);
 715         endPoints.push_back(endPoint);
 716         inputIndex++;
 717     }
 718     return endPoints;
 719 }
 720
 721 void GNAModelSerial::ImportInputs(std::istream &is,
 722         void* basePtr,
 723         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
 724         InferenceEngine::InputsDataMap& dataMap) {
 725     dataMap.clear();
 726
 727     for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
 728         const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
 729                 ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
 730         HeaderLatest::RuntimeEndPoint input;
 731         is.read(reinterpret_cast<char *>(&input), sizeof(input));
 732         inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
 733         inputsDesc->orientation_in[name] = input.orientation;
 734         inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
 735
 736         auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
 737
 738         dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
 739         dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
 740                                                             InferenceEngine::TensorDesc(
 741                                                                     InferenceEngine::Precision::FP32,
 742                                                                     inputDims,
 743                                                                     InferenceEngine::Layout::NC)));
 744         inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
 745     }
 746 }
 747
 748 void GNAModelSerial::ImportOutputs(std::istream &is,
 749         void* basePtr,
 750         std::vector<GNAPluginNS::OutputDesc> &desc,
 751         InferenceEngine::OutputsDataMap& dataMap) {
 752     desc.clear();
 753     dataMap.clear();
 754     desc.resize(modelHeader.nOutputs);
 755
 756     for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
 757         const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
 758                                   ? outputNames.at(outputIndex) : std::string("input" + std::to_string(outputIndex));
 759         HeaderLatest::RuntimeEndPoint output;
 760         is.read(reinterpret_cast<char *>(&output), sizeof(output));
 761         OutputDesc description;
 762         description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
 763         description.orientation = kDnnInterleavedOrientation;
 764         description.orientation = output.orientation;
 765         description.num_bytes_per_element = output.element_size;
 766         description.scale_factor = output.scaleFactor;
 767
 768         auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
 769         dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
 770                                                  InferenceEngine::TensorDesc(
 771                                                          InferenceEngine::Precision::FP32,
 772                                                          outputDims,
 773                                                          InferenceEngine::Layout::NC));
 774         desc.at(outputIndex) = description;
 775     }
 776 }
 777
 778 void GNAModelSerial::setHeader(HeaderLatest::ModelHeader header) {
 779     modelHeader = header;
 780 }