[GNA] Fix a global buffer overflow in GNAModelSerial::Import (#3290) (#3327)
[platform/upstream/dldt.git] / inference-engine / src / gna_plugin / gna_model_serial.cpp
1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <vector>
6 #include <array>
7 #include <details/ie_exception.hpp>
8 #include <ios>
9 #include <iomanip>
10 #include <map>
11 #include <ie_algorithm.hpp>
12 #include <ie_common.h>
13 #include <ie_precision.hpp>
14
15 #if defined __INTEL_COMPILER || defined _MSC_VER
16 #include <malloc.h>
17 #else
18 #include <mm_malloc.h>
19 #endif
20
21 #include "gna_plugin.hpp"
22 #include "gna_model_serial.hpp"
23 #include "serial/headers/latest/gna_model_header.hpp"
24
25 using namespace GNAPluginNS;
26
27 inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
28     os.write(static_cast<const char*>(ptr), size);
29 }
30
31 template <class T>
32 inline void writeBits(const T & obj, std::ostream & os) {
33     os.write(reinterpret_cast<const char *>(&obj), sizeof(T));
34 }
35
36 template <class T>
37 inline void readBits(T & obj, std::istream & is) {
38     is.read(reinterpret_cast<char *>(&obj), sizeof(T));
39 }
40
41 inline void readNBytes(void * ptr, uint32_t size, std::istream & is) {
42     is.read(reinterpret_cast<char *>(ptr), size);
43 }
44
45 template <int nBits, class T>
46 inline void readNBits(T & obj, std::istream & is) {
47     std::array<uint8_t, nBits / 8> tmp;
48     is.read(reinterpret_cast<char *>(&tmp), nBits / 8);
49
50     obj = * reinterpret_cast<T*>(&tmp.front());
51 }
52
53 inline void * offsetToPointer(void * const base, uint64_t offset) {
54     return reinterpret_cast<uint8_t *>(base) + offset;
55 }
56
57 template <class T>
58 inline void readOffset(T & ptr, void *base,  std::istream & is) {
59     uint64_t offset = 0ull;
60     readBits(offset, is);
61     ptr = reinterpret_cast<T>(offsetToPointer(base, offset));
62 }
63
64 union {
65     uint16_t s;
66     uint8_t  c[2];
67 } constexpr static  LECheck {1};
68
69 bool is_little_endian() {
70     return LECheck.c[0] == 1;
71 }
72
73 const int gna_header_magic = is_little_endian() ?  0x4d414e47 : 0x474e414d;
74
75 GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
76     is.exceptions(std::istream::failbit);
77     is.seekg(0, is.end);
78     auto stream_len = is.tellg();
79     if (stream_len == -1) {
80         THROW_GNA_EXCEPTION << "Can't open file to import";
81     }
82     is.seekg(0, is.beg);
83
84     HeaderLatest::ModelHeader header;
85     header.version.major = 0u;
86     header.version.minor = 0u;
87     auto size_of_headers_header = sizeof(HeaderLatest::ModelHeader::gnam) + sizeof(HeaderLatest::ModelHeader::headerSize)
88                                 + sizeof(HeaderLatest::ModelHeader::Version);
89     if (stream_len > size_of_headers_header) {
90         readNBytes(&header, size_of_headers_header, is);
91     } else {
92         readNBytes(&header, stream_len, is);
93     }
94     if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
95         THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
96                            << std::setfill('0') <<
97                            std::hex << std::setw(2) << static_cast<short>(header.gnam[0]) <<
98                            std::hex << std::setw(2) << static_cast<short>(header.gnam[1]) <<
99                            std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
100                            std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
101     }
102
103     is.seekg(0, is.beg);
104     Header2dot1::ModelHeader tempHeader2dot1;
105     switch (header.version.major) {
106         case 2:
107             switch (header.version.minor) {
108                 case 1:
109                     readBits(tempHeader2dot1, is);
110                     header = Header2dot3::ModelHeader(tempHeader2dot1);
111                     break;
112                 case 2:
113                 case 3:
114                     readBits(header, is);
115                     break;
116                 default:
117                     THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should be equal to 1 or 2 and is: " << header.version.minor;
118             }
119             break;
120         default:
121             THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: " << header.version.major << " is not implemented";
122     }
123
124     /*
125      * extra data need to be added into new header and modify check as appropriate
126      */
127
128     //  forward compatible
129     if (header.headerSize > sizeof(header)) {
130         is.seekg(header.headerSize - sizeof(header), std::ios_base::cur);
131     }
132     return header;
133 }
134
135 #define offsetFromBase(field)\
136 getOffsetFromBase(field, #field)
137
138 #if GNA_LIB_VER == 2
139
140 bool IsEmptyTensor(const Gna2Tensor& t) {
141     return t.Type == Gna2DataTypeNone &&
142         t.Data == nullptr &&
143         t.Layout[0] == '\0' &&
144         t.Mode == Gna2TensorModeDefault &&
145         t.Shape.NumberOfDimensions == 0;
146 }
147
148 const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
149     {Gna2OperationTypeFullyConnectedAffine, {sizeof(Gna2BiasMode), sizeof(uint32_t)}},
150     {Gna2OperationTypeConvolution, {
151         sizeof(Gna2Shape),
152         sizeof(Gna2BiasMode),
153         sizeof(Gna2PoolingMode),
154         sizeof(Gna2Shape),
155         sizeof(Gna2Shape),
156         sizeof(Gna2Shape)}},
157     {Gna2OperationTypeCopy, {sizeof(Gna2Shape)}},
158     {Gna2OperationTypeTransposition, {sizeof(Gna2Shape)}},
159 };
160
161 void GNAModelSerial::Import(void *basePointer,
162         size_t gnaGraphSize,
163         std::istream & is,
164         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
165         std::vector<GNAPluginNS::OutputDesc> &desc,
166         InferenceEngine::InputsDataMap& inputsDataMap,
167         InferenceEngine::OutputsDataMap& outputsDataMap) {
168     is.exceptions(std::istream::failbit);
169
170     if (modelHeader.version.major == 2) {
171         if (modelHeader.version.minor >= 3) {
172             for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
173                 uint32_t nameSize = 0;
174                 readNBits<32>(nameSize, is);
175                 std::string inName(nameSize, '\0');
176                 readNBytes(&inName[0], nameSize, is);
177                 inputNames.push_back(inName.substr(0, nameSize - 1));
178             }
179         }
180     }
181     ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
182
183     if (modelHeader.version.major == 2) {
184         if (modelHeader.version.minor >= 3) {
185             for (auto inputIndex = 0; inputIndex < modelHeader.nOutputs; inputIndex++) {
186                 uint32_t nameSize = 0;
187                 readNBits<32>(nameSize, is);
188                 std::string outName(nameSize, '\0');
189                 readNBytes(&outName[0], nameSize, is);
190                 outputNames.push_back(outName.substr(0, nameSize - 1));
191             }
192         }
193     }
194     ImportOutputs(is, basePointer, desc, outputsDataMap);
195
196     for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
197         readNBits<32>(operation->Type, is);
198         readBits(operation->NumberOfOperands, is);
199         operation->Operands = static_cast<Gna2Tensor const **>(gnaUserAllocator(sizeof(Gna2Tensor*) * operation->NumberOfOperands));
200         IE_ASSERT(operation->Operands != nullptr);
201         for (uint32_t i = 0; i < operation->NumberOfOperands; i++) {
202             Gna2Tensor t{};
203             readBits(t, is);
204             if (IsEmptyTensor(t)) {
205                 operation->Operands[i] = nullptr;
206             } else {
207                 operation->Operands[i] = static_cast<Gna2Tensor const *>(gnaUserAllocator(sizeof(Gna2Tensor)));
208                 t.Data = offsetToPointer(basePointer, reinterpret_cast<uint64_t>(t.Data));
209                 const_cast<Gna2Tensor&>(*operation->Operands[i]) = t;
210             }
211         }
212         readBits(operation->NumberOfParameters, is);
213         switch (operation->Type) {
214         case Gna2OperationTypeElementWiseAffine:
215         case Gna2OperationTypeFullyConnectedAffine:
216         case Gna2OperationTypeConvolution:
217         case Gna2OperationTypeCopy:
218         case Gna2OperationTypeTransposition:
219             break;
220         case Gna2OperationTypeRecurrent:
221             THROW_GNA_EXCEPTION << "Importing of recurrent operation not supported";
222         default:
223             THROW_GNA_EXCEPTION << "Importing of unknown GNA operation type(" << operation->Type << ")  not supported";
224         }
225         if (operation->NumberOfParameters > 0)
226             operation->Parameters = static_cast<void **>(gnaUserAllocator(sizeof(void*) * operation->NumberOfParameters));
227         else
228             operation->Parameters = nullptr;
229         for (uint32_t i = 0; i < operation->NumberOfParameters; i++) {
230             uint32_t paramSize = 0;
231             readBits(paramSize, is);
232             IE_ASSERT(operation->Parameters != nullptr);
233             if (paramSize == 0) {
234                 IE_ASSERT(operation->Parameters != nullptr);
235                 operation->Parameters[i] = nullptr;
236                 continue;
237             }
238             operation->Parameters[i] = gnaUserAllocator(paramSize);
239             readNBytes(operation->Parameters[i], paramSize, is);
240
241             if (GnaParamSize.at(operation->Type).size() <= i) {
242                 THROW_GNA_EXCEPTION << "Cannot import parameter of index: " << i;
243             }
244             if (paramSize != GnaParamSize.at(operation->Type).at(i)) {
245                 THROW_GNA_EXCEPTION << "Parameter size mismatch on import: " << i;
246             }
247         }
248     }
249
250     // writing memory information
251     uint32_t nStates = 0;
252     readBits(nStates, is);
253     if (pstates != nullptr) {
254         pstates->resize(nStates);
255     }
256
257     for (int i = 0; i != nStates; i++) {
258         void *pSegment;
259         readOffset(pSegment, basePointer, is);
260         uint32_t segmentSz;
261         readBits(segmentSz, is);
262         if (pstates) {
263             (*pstates)[i] = { pSegment, segmentSz };
264         }
265     }
266
267
268     // once structure has been read lets read whole gna graph
269     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
270 }
271
272
273 uint32_t guessGrouping(Gna2Model const& model) {
274     if (model.NumberOfOperations == 0 ||
275         model.Operations == nullptr ||
276         model.Operations[0].Operands == nullptr ||
277         model.Operations[0].NumberOfOperands == 0 ||
278         model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
279         THROW_GNA_EXCEPTION << "Can not guess grouping";
280     }
281     return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
282 }
283
284 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
285     os.exceptions(std::ostream::failbit);
286
287     const std::vector<Gna2Operation>
288         layers(gna2Model->Operations, gna2Model->Operations + gna2Model->NumberOfOperations);
289
290
291     // all offsets will be from this pointer
292     auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
293         auto offset = static_cast<uint64_t>(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
294         if (offset > gnaGraphSize) {
295             THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
296                 << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
297                 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
298         }
299         return offset;
300     };
301
302     auto getTensorWithProperOffset = [&getOffsetFromBase](const Gna2Tensor& tensor) {
303         Gna2Tensor out = tensor;
304         out.Data = reinterpret_cast<void*>(getOffsetFromBase(tensor.Data));
305         return out;
306     };
307
308     auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep) {
309         HeaderLatest::RuntimeEndPoint out;
310         out.elements_count = ep.elements_count;
311         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
312         out.scaleFactor = ep.scaleFactor;
313         out.element_size = ep.element_size;
314         out.orientation = ep.orientation;
315         return out;
316     };
317     /**
318      * writing header
319      */
320     HeaderLatest::ModelHeader header;
321     header.gnam[0] = 'G';
322     header.gnam[1] = 'N';
323     header.gnam[2] = 'A';
324     header.gnam[3] = 'M';
325     header.headerSize = sizeof(HeaderLatest::ModelHeader);
326     header.gnaMemSize = gnaGraphSize;
327     header.layersCount = layers.size();
328     header.nGroup = guessGrouping(*gna2Model);
329     header.nInputs = inputs.size();
330     header.nOutputs = outputs.size();
331     header.nRotateRows = nRotateRows;
332     header.nRotateColumns = nRotateColumns;
333     header.doRotateInput = doRotateInput;
334
335
336     writeBits(header, os);
337
338     for (auto &name : inputNames) {
339         const auto nameSize = strlen(name.c_str()) + 1;
340         writeBits(static_cast<uint32_t>(nameSize), os);
341         writeNBytes(name.c_str(), nameSize , os);
342     }
343     for (const auto &input : inputs) {
344         writeBits(convert_to_serial(input), os);
345     }
346     for (auto &name : outputNames) {
347         const auto nameSize = strlen(name.c_str()) + 1;
348         writeBits(static_cast<uint32_t>(nameSize), os);
349         writeNBytes(name.c_str(), nameSize, os);
350     }
351     for (const auto &output : outputs) {
352         writeBits(convert_to_serial(output), os);
353     }
354
355     for (const auto & layer : layers) {
356         writeBits(static_cast<uint32_t>(layer.Type), os);
357         writeBits(layer.NumberOfOperands, os);
358
359         for (uint32_t i = 0; i < layer.NumberOfOperands; i++) {
360             if (layer.Operands[i] == nullptr)
361                 writeBits(Gna2Tensor{}, os);
362             else
363                 writeBits(getTensorWithProperOffset(*layer.Operands[i]), os);
364         }
365
366         writeBits(layer.NumberOfParameters, os);
367
368         // writing parameters
369         switch (layer.Type) {
370         case Gna2OperationTypeElementWiseAffine:
371         case Gna2OperationTypeFullyConnectedAffine:
372         case Gna2OperationTypeConvolution:
373         case Gna2OperationTypeCopy:
374         case Gna2OperationTypeTransposition:
375             break;
376         case Gna2OperationTypeRecurrent:
377             THROW_GNA_EXCEPTION << "Exporting of recurrent operation not supported";
378         default:
379             THROW_GNA_EXCEPTION << "Exporting of unknown GNA operation type(" << layer.Type << ")  not supported";
380         }
381         for (uint32_t i = 0; i < layer.NumberOfParameters; i++) {
382             if (layer.Parameters[i] == nullptr) {
383                 writeBits(static_cast<uint32_t>(0), os);
384                 continue;
385             }
386             const auto paramSize = GnaParamSize.at(layer.Type).at(i);
387             writeBits(paramSize, os);
388             writeNBytes(layer.Parameters[i], paramSize, os);
389         }
390     }
391     // writing memory information
392     writeBits(static_cast<uint32_t>(states.size()), os);
393     for (auto && state : states) {
394         writeBits(offsetFromBase(state.first), os);
395         writeBits(state.second, os);
396     }
397
398     // once structure has been written lets push gna graph
399     os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
400 }
401 #else
402
403 void GNAModelSerial::Import(void *basePointer,
404         size_t gnaGraphSize,
405         std::istream & is,
406         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
407         std::vector<GNAPluginNS::OutputDesc> &desc,
408         InferenceEngine::InputsDataMap& inputsDataMap,
409         InferenceEngine::OutputsDataMap& outputsDataMap) {
410     is.exceptions(std::istream::failbit);
411
412     ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
413     ImportOutputs(is, basePointer, desc, outputsDataMap);
414
415     auto readPwl = [&is, basePointer](intel_pwl_func_t & value) {
416         readBits(value.nSegments, is);
417         if (value.nSegments != 0) {
418             readOffset(value.pSegments, basePointer, is);
419         } else {
420             value.pSegments = nullptr;
421         }
422     };
423
424     for (auto layer = ptr_nnet->pLayers; layer != ptr_nnet->pLayers + ptr_nnet->nLayers; ++layer) {
425         readBits(layer->nInputColumns, is);
426         readBits(layer->nInputRows, is);
427         readBits(layer->nOutputColumns, is);
428         readBits(layer->nOutputRows, is);
429         readBits(layer->nBytesPerInput, is);
430         readBits(layer->nBytesPerOutput, is);
431         readBits(layer->nBytesPerIntermediateOutput, is);
432         readNBits<32>(layer->nLayerKind, is);
433
434         // reading layers structs
435         switch (layer->nLayerKind) {
436         case INTEL_AFFINE_DIAGONAL:
437         case INTEL_AFFINE: {
438             layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
439             if (layer->pLayerStruct == nullptr) {
440                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
441             }
442
443             auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
444             readBits(affine.affine.nBytesPerWeight, is);
445             readBits(affine.affine.nBytesPerBias, is);
446             readOffset(affine.affine.pWeights, basePointer, is);
447             readOffset(affine.affine.pBiases, basePointer, is);
448             readPwl(affine.pwl);
449             break;
450         }
451         case INTEL_CONVOLUTIONAL: {
452             layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
453             if (layer->pLayerStruct == nullptr) {
454                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_convolutional_layer_t structure.";
455             }
456
457             auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
458             readBits(convolution.nFilterCoefficients, is);
459             readBits(convolution.nBytesFilterCoefficient, is);
460             readBits(convolution.nBytesBias, is);
461             readBits(convolution.nFilters, is);
462             readBits(convolution.nFeatureMaps, is);
463             readBits(convolution.nFeatureMapRows, is);
464             readBits(convolution.nFeatureMapColumns, is);
465             readBits(convolution.nFilterRows, is);
466             readOffset(convolution.pFilters, basePointer, is);
467             readOffset(convolution.pBiases, basePointer, is);
468             readBits(convolution.nPoolSize, is);
469             readBits(convolution.nPoolStride, is);
470             readBits(convolution.poolType, is);
471             readPwl(convolution.pwl);
472             break;
473         }
474
475         case INTEL_COPY: {
476             layer->pLayerStruct = _mm_malloc(sizeof(intel_copy_layer_t), 64);
477             if (layer->pLayerStruct == nullptr) {
478                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_copy_layer_t structure.";
479             }
480
481             auto &copy = *reinterpret_cast<intel_copy_layer_t *>(layer->pLayerStruct);
482             readBits(copy.nCopyRows, is);
483             readBits(copy.nCopyCols, is);
484             break;
485         }
486
487         case INTEL_RECURRENT:
488             THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
489         case INTEL_INTERLEAVE:
490             THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
491         case INTEL_DEINTERLEAVE:
492             THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
493         default:
494             THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ")  not supported";
495         }
496
497         // reading offsets of inputs/outputs
498         readOffset(layer->pInputs, basePointer, is);
499         if (layer->nLayerKind == INTEL_COPY) {
500             layer->pOutputsIntermediate = nullptr;
501         } else {
502             readOffset(layer->pOutputsIntermediate, basePointer, is);
503         }
504         readOffset(layer->pOutputs, basePointer, is);
505     }
506
507     // writing memory information
508     uint32_t nStates = 0;
509     readBits(nStates, is);
510     if (pstates != nullptr) {
511         pstates->resize(nStates);
512     }
513
514     for (int i = 0; i != nStates; i++) {
515         void *pSegment;
516         readOffset(pSegment, basePointer, is);
517         uint32_t segmentSz;
518         readBits(segmentSz, is);
519         if (pstates) {
520             (*pstates)[i] = { pSegment, segmentSz };
521         }
522     }
523
524
525     // once structure has been read lets read whole gna graph
526     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
527 }
528
529 /**
530  *
531  * @param ptr_nnet
532  * @param gnaAllocSize - it can be calculated based on nnet, however it will overcomplicate export
533  * about base adress it is relatively easy to calculate
534  * @param os
535  */
536
537 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
538     os.exceptions(std::ostream::failbit);
539
540     std::vector<intel_nnet_layer_t>
541         layers(ptr_nnet->pLayers, ptr_nnet->pLayers + ptr_nnet->nLayers);
542
543
544     // all offsets will be from this pointer
545     auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
546         auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
547         if (offset > gnaGraphSize) {
548             THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
549                                << ") not in range segment returned from GNAAlloc(0x" << basePointer << "-0x"
550                                << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
551         }
552         return offset;
553     };
554
555     auto writePwl = [&os, getOffsetFromBase] (intel_pwl_func_t & value) {
556         writeBits(value.nSegments, os);
557         // export require certain offset, since offset from base to nullptr cannot be correct, we are not store it at all
558         if (value.nSegments != 0) {
559             writeBits(offsetFromBase(value.pSegments), os);
560         }
561     };
562
563     auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep){
564         HeaderLatest::RuntimeEndPoint out;
565         out.elements_count = ep.elements_count;
566         out.element_size = ep.element_size;
567         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
568         out.scaleFactor = ep.scaleFactor;
569         out.orientation = ep.orientation;
570         return out;
571     };
572     /**
573      * writing header
574      */
575     HeaderLatest::ModelHeader header;
576     header.gnam[0] = 'G';
577     header.gnam[1] = 'N';
578     header.gnam[2] = 'A';
579     header.gnam[3] = 'M';
580     header.version.major = 1u;
581     header.version.minor = 1u;
582     header.gnaMemSize = gnaGraphSize;
583     header.layersCount = layers.size();
584     header.nGroup = ptr_nnet->nGroup;
585     header.nInputs = 1;
586     header.nOutputs = 1;
587     header.headerSize = sizeof(HeaderLatest::ModelHeader);
588     header.nRotateRows = nRotateRows;
589     header.nRotateColumns = nRotateColumns;
590
591
592     writeBits(header, os);
593     writeBits(convert_to_serial(inputs[0]), os);
594     writeBits(convert_to_serial(outputs[0]), os);
595
596     for (auto & layer : layers) {
597         writeBits(layer.nInputColumns, os);
598         writeBits(layer.nInputRows, os);
599         writeBits(layer.nOutputColumns, os);
600         writeBits(layer.nOutputRows, os);
601         writeBits(layer.nBytesPerInput, os);
602         writeBits(layer.nBytesPerOutput, os);
603         writeBits(layer.nBytesPerIntermediateOutput, os);
604         writeBits(static_cast<uint32_t>(layer.nLayerKind), os);
605
606         // writing layers structs
607         switch (layer.nLayerKind) {
608             case INTEL_AFFINE_DIAGONAL:
609             case INTEL_AFFINE: {
610                 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer.pLayerStruct);
611                 writeBits(affine.affine.nBytesPerWeight, os);
612                 writeBits(affine.affine.nBytesPerBias, os);
613                 writeBits(offsetFromBase(affine.affine.pWeights), os);
614                 writeBits(offsetFromBase(affine.affine.pBiases), os);
615                 writePwl(affine.pwl);
616                 break;
617             }
618             case INTEL_CONVOLUTIONAL: {
619                 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer.pLayerStruct);
620                 writeBits(convolution.nFilterCoefficients, os);
621                 writeBits(convolution.nBytesFilterCoefficient, os);
622                 writeBits(convolution.nBytesBias, os);
623                 writeBits(convolution.nFilters, os);
624                 writeBits(convolution.nFeatureMaps, os);
625                 writeBits(convolution.nFeatureMapRows, os);
626                 writeBits(convolution.nFeatureMapColumns, os);
627                 writeBits(convolution.nFilterRows, os);
628                 writeBits(offsetFromBase(convolution.pFilters), os);
629                 writeBits(offsetFromBase(convolution.pBiases), os);
630                 writeBits(convolution.nPoolSize, os);
631                 writeBits(convolution.nPoolStride, os);
632                 writeBits(convolution.poolType, os);
633                 writePwl(convolution.pwl);
634                 break;
635             }
636
637             case INTEL_COPY: {
638                 auto &copy = *reinterpret_cast<intel_copy_layer_t *>(layer.pLayerStruct);
639                 writeBits(copy.nCopyRows, os);
640                 writeBits(copy.nCopyCols, os);
641                 break;
642             }
643
644             case INTEL_RECURRENT:
645                 THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
646             case INTEL_INTERLEAVE:
647                 THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
648             case INTEL_DEINTERLEAVE:
649                 THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
650             default:
651                 THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ")  not supported";
652         }
653
654         // writing offsets from base.
655         writeBits(offsetFromBase(layer.pInputs), os);
656         if (layer.nLayerKind != INTEL_COPY) {
657             writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
658         }
659         writeBits(offsetFromBase(layer.pOutputs), os);
660     }
661     // writing memory information
662     writeBits(static_cast<uint32_t>(states.size()), os);
663     for (auto && state : states) {
664         writeBits(offsetFromBase(state.first), os);
665         writeBits(state.second, os);
666     }
667
668     // once structure has been written lets push gna graph
669     os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
670 }
671
672 #endif
673
674 std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
675         const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
676     std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
677     std::size_t outputIndex = 0;
678     for (auto const &output : outputsDataMap) {
679         auto outputName = output.first;
680         auto inputDims = output.second->getTensorDesc().getDims();
681         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
682
683         HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
684                                                  outputsDesc[outputIndex].ptrs[0],
685                                                  outputsDesc[outputIndex].num_bytes_per_element,
686                                                  elementsCount,
687                                                  outputsDesc[outputIndex].orientation);
688         endPoints.push_back(endPoint);
689         outputIndex++;
690     }
691     return endPoints;
692 }
693
694 std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
695                                                                              std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
696     std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
697
698     std::size_t inputIndex = 0;
699     for (auto const& input : inputsDataMap) {
700         auto inputName = input.first;
701         auto inputDims = input.second->getTensorDesc().getDims();
702
703         double scaleFactor = inputDesc->getScaleFactor(inputIndex);
704         std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
705         IE_ASSERT(descriptor_ptr.size() > 0);
706         uint32_t element_size = 2u;
707         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
708         intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
709
710         HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
711                                                  descriptor_ptr[0],
712                                                  element_size,
713                                                  elementsCount,
714                                                  orientation);
715         endPoints.push_back(endPoint);
716         inputIndex++;
717     }
718     return endPoints;
719 }
720
721 void GNAModelSerial::ImportInputs(std::istream &is,
722         void* basePtr,
723         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
724         InferenceEngine::InputsDataMap& dataMap) {
725     dataMap.clear();
726
727     for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
728         const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
729                 ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
730         HeaderLatest::RuntimeEndPoint input;
731         is.read(reinterpret_cast<char *>(&input), sizeof(input));
732         inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
733         inputsDesc->orientation_in[name] = input.orientation;
734         inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
735
736         auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
737
738         dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
739         dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
740                                                             InferenceEngine::TensorDesc(
741                                                                     InferenceEngine::Precision::FP32,
742                                                                     inputDims,
743                                                                     InferenceEngine::Layout::NC)));
744         inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
745     }
746 }
747
748 void GNAModelSerial::ImportOutputs(std::istream &is,
749         void* basePtr,
750         std::vector<GNAPluginNS::OutputDesc> &desc,
751         InferenceEngine::OutputsDataMap& dataMap) {
752     desc.clear();
753     dataMap.clear();
754     desc.resize(modelHeader.nOutputs);
755
756     for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
757         const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
758                                   ? outputNames.at(outputIndex) : std::string("input" + std::to_string(outputIndex));
759         HeaderLatest::RuntimeEndPoint output;
760         is.read(reinterpret_cast<char *>(&output), sizeof(output));
761         OutputDesc description;
762         description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
763         description.orientation = kDnnInterleavedOrientation;
764         description.orientation = output.orientation;
765         description.num_bytes_per_element = output.element_size;
766         description.scale_factor = output.scaleFactor;
767
768         auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
769         dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
770                                                  InferenceEngine::TensorDesc(
771                                                          InferenceEngine::Precision::FP32,
772                                                          outputDims,
773                                                          InferenceEngine::Layout::NC));
774         desc.at(outputIndex) = description;
775     }
776 }
777
778 void GNAModelSerial::setHeader(HeaderLatest::ModelHeader header) {
779     modelHeader = header;
780 }