[GNA] Initialize a local variable (#1067)
[platform/upstream/dldt.git] / inference-engine / src / gna_plugin / gna_model_serial.cpp
1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <vector>
6 #include <array>
7 #include <details/ie_exception.hpp>
8 #include <ios>
9 #include <iomanip>
10 #include <map>
11 #include <ie_algorithm.hpp>
12 #include <ie_common.h>
13 #include <ie_precision.hpp>
14
15 #if defined __INTEL_COMPILER || defined _MSC_VER
16 #include <malloc.h>
17 #else
18 #include <mm_malloc.h>
19 #endif
20
21 #include "gna_plugin.hpp"
22 #include "gna_model_serial.hpp"
23
24 inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
25     os.write(static_cast<const char*>(ptr), size);
26 }
27
28 template <class T>
29 inline void writeBits(const T & obj, std::ostream & os) {
30     os.write(reinterpret_cast<const char *>(&obj), sizeof(T));
31 }
32
33 template <class T>
34 inline void readBits(T & obj, std::istream & is) {
35     is.read(reinterpret_cast<char *>(&obj), sizeof(T));
36 }
37
38 inline void readNBytes(void * ptr, uint32_t size, std::istream & is) {
39     is.read(reinterpret_cast<char *>(ptr), size);
40 }
41
42 template <int nBits, class T>
43 inline void readNBits(T & obj, std::istream & is) {
44     std::array<uint8_t, nBits / 8> tmp;
45     is.read(reinterpret_cast<char *>(&tmp), nBits / 8);
46
47     obj = * reinterpret_cast<T*>(&tmp.front());
48 }
49
50 inline void * offsetToPointer(void * const base, uint64_t offset) {
51     return reinterpret_cast<uint8_t *>(base) + offset;
52 }
53
54 template <class T>
55 inline void readOffset(T & ptr, void *base,  std::istream & is) {
56     uint64_t offset = 0ull;
57     readBits(offset, is);
58     ptr = reinterpret_cast<T>(offsetToPointer(base, offset));
59 }
60
61 union {
62     uint16_t s;
63     uint8_t  c[2];
64 } constexpr static  LECheck {1};
65
66 bool is_little_endian() {
67     return LECheck.c[0] == 1;
68 }
69
70 const int gna_header_magic = is_little_endian() ?  0x4d414e47 : 0x474e414d;
71
72 ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
73     is.exceptions(std::istream::failbit);
74
75     ModelHeader header;
76     readBits(header, is);
77     if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
78         THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
79                            << std::setfill('0') <<
80                            std::hex << std::setw(2) << static_cast<short>(header.gnam[0]) <<
81                            std::hex << std::setw(2) << static_cast<short>(header.gnam[1]) <<
82                            std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
83                            std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
84     }
85     if (header.version.major != HEADER_MAJOR) {
86         THROW_GNA_EXCEPTION << "Imported file unsupported: major version should be == " << HEADER_MAJOR;
87     }
88     if (header.headerSize < sizeof(header)) {
89         THROW_GNA_EXCEPTION << "Unsupported header size minimal value is : " << sizeof (header) << ", but read: " << header.headerSize;
90     }
91     /*
92      * extra data need to be added into new header and modify check as appropriate
93      */
94
95     //  forward compatible
96     if (header.headerSize > sizeof(header)) {
97         is.seekg(header.headerSize - sizeof(header), std::ios_base::cur);
98     }
99     return header;
100 }
101
102 #define offsetFromBase(field)\
103 getOffsetFromBase(field, #field)
104
105 #if GNA_LIB_VER == 2
106
107 bool IsEmptyTensor(const Gna2Tensor& t) {
108     return t.Type == Gna2DataTypeNone &&
109         t.Data == nullptr &&
110         t.Layout[0] == '\0' &&
111         t.Mode == Gna2TensorModeDefault &&
112         t.Shape.NumberOfDimensions == 0;
113 }
114
115 const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
116     {Gna2OperationTypeFullyConnectedAffine, {sizeof(Gna2BiasMode), sizeof(uint32_t)}},
117     {Gna2OperationTypeConvolution, {
118         sizeof(Gna2Shape),
119         sizeof(Gna2BiasMode),
120         sizeof(Gna2PoolingMode),
121         sizeof(Gna2Shape),
122         sizeof(Gna2Shape),
123         sizeof(Gna2Shape)}},
124     {Gna2OperationTypeCopy, {sizeof(Gna2Shape)}},
125     {Gna2OperationTypeTransposition, {sizeof(Gna2Shape)}},
126 };
127
128 void GNAModelSerial::Import(void *basePointer,
129         size_t gnaGraphSize,
130         std::istream & is,
131         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
132         std::vector<GNAPluginNS::OutputDesc> &desc,
133         InferenceEngine::InputsDataMap& inputsDataMap,
134         InferenceEngine::OutputsDataMap& outputsDataMap) {
135     is.exceptions(std::istream::failbit);
136
137     ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
138     ImportOutputs(is, basePointer, desc, outputsDataMap);
139
140     for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
141         readNBits<32>(operation->Type, is);
142         readBits(operation->NumberOfOperands, is);
143         operation->Operands = static_cast<Gna2Tensor const **>(gnaUserAllocator(sizeof(Gna2Tensor*) * operation->NumberOfOperands));
144         for (uint32_t i = 0; i < operation->NumberOfOperands; i++) {
145             Gna2Tensor t{};
146             readBits(t, is);
147             if (IsEmptyTensor(t)) {
148                 operation->Operands[i] = nullptr;
149             } else {
150                 operation->Operands[i] = static_cast<Gna2Tensor const *>(gnaUserAllocator(sizeof(Gna2Tensor)));
151                 t.Data = offsetToPointer(basePointer, reinterpret_cast<uint64_t>(t.Data));
152                 const_cast<Gna2Tensor&>(*operation->Operands[i]) = t;
153             }
154         }
155         readBits(operation->NumberOfParameters, is);
156         switch (operation->Type) {
157         case Gna2OperationTypeElementWiseAffine:
158         case Gna2OperationTypeFullyConnectedAffine:
159         case Gna2OperationTypeConvolution:
160         case Gna2OperationTypeCopy:
161         case Gna2OperationTypeTransposition:
162             break;
163         case Gna2OperationTypeRecurrent:
164             THROW_GNA_EXCEPTION << "Importing of recurrent operation not supported";
165         default:
166             THROW_GNA_EXCEPTION << "Importing of unknown GNA operation type(" << operation->Type << ")  not supported";
167         }
168         if (operation->NumberOfParameters > 0)
169             operation->Parameters = static_cast<void **>(gnaUserAllocator(sizeof(void*) * operation->NumberOfParameters));
170         else
171             operation->Parameters = nullptr;
172         for (uint32_t i = 0; i < operation->NumberOfParameters; i++) {
173             uint32_t paramSize = 0;
174             readBits(paramSize, is);
175             if (paramSize == 0) {
176                 operation->Parameters[i] = nullptr;
177                 continue;
178             }
179             operation->Parameters[i] = gnaUserAllocator(paramSize);
180             readNBytes(operation->Parameters[i], paramSize, is);
181
182             if (GnaParamSize.at(operation->Type).size() <= i) {
183                 THROW_GNA_EXCEPTION << "Cannot import parameter of index: " << i;
184             }
185             if (paramSize != GnaParamSize.at(operation->Type).at(i)) {
186                 THROW_GNA_EXCEPTION << "Parameter size mismatch on import: " << i;
187             }
188         }
189     }
190
191     // writing memory information
192     uint32_t nStates = 0;
193     readBits(nStates, is);
194     if (pstates != nullptr) {
195         pstates->resize(nStates);
196     }
197
198     for (int i = 0; i != nStates; i++) {
199         void *pSegment;
200         readOffset(pSegment, basePointer, is);
201         uint32_t segmentSz;
202         readBits(segmentSz, is);
203         if (pstates) {
204             (*pstates)[i] = { pSegment, segmentSz };
205         }
206     }
207
208
209     // once structure has been read lets read whole gna graph
210     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
211 }
212
213
214 uint32_t guessGrouping(Gna2Model const& model) {
215     if (model.NumberOfOperations == 0 ||
216         model.Operations == nullptr ||
217         model.Operations[0].Operands == nullptr ||
218         model.Operations[0].NumberOfOperands == 0 ||
219         model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
220         THROW_GNA_EXCEPTION << "Can not guess grouping";
221     }
222     return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
223 }
224
225 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
226     os.exceptions(std::ostream::failbit);
227
228     const std::vector<Gna2Operation>
229         layers(gna2Model->Operations, gna2Model->Operations + gna2Model->NumberOfOperations);
230
231
232     // all offsets will be from this pointer
233     auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
234         auto offset = static_cast<uint64_t>(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
235         if (offset > gnaGraphSize) {
236             THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
237                 << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
238                 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
239         }
240         return offset;
241     };
242
243     auto getTensorWithProperOffset = [&getOffsetFromBase](const Gna2Tensor& tensor) {
244         Gna2Tensor out = tensor;
245         out.Data = reinterpret_cast<void*>(getOffsetFromBase(tensor.Data));
246         return out;
247     };
248
249     auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep) {
250         RuntimeEndPoint out;
251         out.elements_count = ep.elements_count;
252         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
253         out.scaleFactor = ep.scaleFactor;
254         out.element_size = ep.element_size;
255         out.orientation = ep.orientation;
256         return out;
257     };
258     /**
259      * writing header
260      */
261     ModelHeader header;
262     header.gnam[0] = 'G';
263     header.gnam[1] = 'N';
264     header.gnam[2] = 'A';
265     header.gnam[3] = 'M';
266     header.headerSize = sizeof(ModelHeader);
267     header.version.major = HEADER_MAJOR;
268     header.version.minor = HEADER_MINOR;
269     header.gnaMemSize = gnaGraphSize;
270     header.layersCount = layers.size();
271     header.nGroup = guessGrouping(*gna2Model);
272     header.nInputs = inputs.size();
273     header.nOutputs = outputs.size();
274     header.nRotateRows = nRotateRows;
275     header.nRotateColumns = nRotateColumns;
276
277
278     writeBits(header, os);
279
280     for (const auto &input : inputs) {
281         writeBits(convert_to_serial(input), os);
282     }
283     for (const auto &output : outputs) {
284         writeBits(convert_to_serial(output), os);
285     }
286
287     for (const auto & layer : layers) {
288         writeBits(static_cast<uint32_t>(layer.Type), os);
289         writeBits(layer.NumberOfOperands, os);
290
291         for (uint32_t i = 0; i < layer.NumberOfOperands; i++) {
292             if (layer.Operands[i] == nullptr)
293                 writeBits(Gna2Tensor{}, os);
294             else
295                 writeBits(getTensorWithProperOffset(*layer.Operands[i]), os);
296         }
297
298         writeBits(layer.NumberOfParameters, os);
299
300         // writing parameters
301         switch (layer.Type) {
302         case Gna2OperationTypeElementWiseAffine:
303         case Gna2OperationTypeFullyConnectedAffine:
304         case Gna2OperationTypeConvolution:
305         case Gna2OperationTypeCopy:
306         case Gna2OperationTypeTransposition:
307             break;
308         case Gna2OperationTypeRecurrent:
309             THROW_GNA_EXCEPTION << "Exporting of recurrent operation not supported";
310         default:
311             THROW_GNA_EXCEPTION << "Exporting of unknown GNA operation type(" << layer.Type << ")  not supported";
312         }
313         for (uint32_t i = 0; i < layer.NumberOfParameters; i++) {
314             if (layer.Parameters[i] == nullptr) {
315                 writeBits(static_cast<uint32_t>(0), os);
316                 continue;
317             }
318             const auto paramSize = GnaParamSize.at(layer.Type).at(i);
319             writeBits(paramSize, os);
320             writeNBytes(layer.Parameters[i], paramSize, os);
321         }
322     }
323     // writing memory information
324     writeBits(static_cast<uint32_t>(states.size()), os);
325     for (auto && state : states) {
326         writeBits(offsetFromBase(state.first), os);
327         writeBits(state.second, os);
328     }
329
330     // once structure has been written lets push gna graph
331     os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
332 }
333 #else
334
335 void GNAModelSerial::Import(void *basePointer,
336         size_t gnaGraphSize,
337         std::istream & is,
338         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
339         std::vector<GNAPluginNS::OutputDesc> &desc,
340         InferenceEngine::InputsDataMap& inputsDataMap,
341         InferenceEngine::OutputsDataMap& outputsDataMap) {
342     is.exceptions(std::istream::failbit);
343
344     ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
345     ImportOutputs(is, basePointer, desc, outputsDataMap);
346
347     auto readPwl = [&is, basePointer](intel_pwl_func_t & value) {
348         readBits(value.nSegments, is);
349         if (value.nSegments != 0) {
350             readOffset(value.pSegments, basePointer, is);
351         } else {
352             value.pSegments = nullptr;
353         }
354     };
355
356     for (auto layer = ptr_nnet->pLayers; layer != ptr_nnet->pLayers + ptr_nnet->nLayers; ++layer) {
357         readBits(layer->nInputColumns, is);
358         readBits(layer->nInputRows, is);
359         readBits(layer->nOutputColumns, is);
360         readBits(layer->nOutputRows, is);
361         readBits(layer->nBytesPerInput, is);
362         readBits(layer->nBytesPerOutput, is);
363         readBits(layer->nBytesPerIntermediateOutput, is);
364         readNBits<32>(layer->nLayerKind, is);
365
366         // reading layers structs
367         switch (layer->nLayerKind) {
368         case INTEL_AFFINE_DIAGONAL:
369         case INTEL_AFFINE: {
370             layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
371             if (layer->pLayerStruct == nullptr) {
372                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
373             }
374
375             auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
376             readBits(affine.affine.nBytesPerWeight, is);
377             readBits(affine.affine.nBytesPerBias, is);
378             readOffset(affine.affine.pWeights, basePointer, is);
379             readOffset(affine.affine.pBiases, basePointer, is);
380             readPwl(affine.pwl);
381             break;
382         }
383         case INTEL_CONVOLUTIONAL: {
384             layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
385             if (layer->pLayerStruct == nullptr) {
386                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_convolutional_layer_t structure.";
387             }
388
389             auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
390             readBits(convolution.nFilterCoefficients, is);
391             readBits(convolution.nBytesFilterCoefficient, is);
392             readBits(convolution.nBytesBias, is);
393             readBits(convolution.nFilters, is);
394             readBits(convolution.nFeatureMaps, is);
395             readBits(convolution.nFeatureMapRows, is);
396             readBits(convolution.nFeatureMapColumns, is);
397             readBits(convolution.nFilterRows, is);
398             readOffset(convolution.pFilters, basePointer, is);
399             readOffset(convolution.pBiases, basePointer, is);
400             readBits(convolution.nPoolSize, is);
401             readBits(convolution.nPoolStride, is);
402             readBits(convolution.poolType, is);
403             readPwl(convolution.pwl);
404             break;
405         }
406
407         case INTEL_COPY: {
408             layer->pLayerStruct = _mm_malloc(sizeof(intel_copy_layer_t), 64);
409             if (layer->pLayerStruct == nullptr) {
410                 THROW_GNA_EXCEPTION << "could not allocate memory for intel_copy_layer_t structure.";
411             }
412
413             auto &copy = *reinterpret_cast<intel_copy_layer_t *>(layer->pLayerStruct);
414             readBits(copy.nCopyRows, is);
415             readBits(copy.nCopyCols, is);
416             break;
417         }
418
419         case INTEL_RECURRENT:
420             THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
421         case INTEL_INTERLEAVE:
422             THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
423         case INTEL_DEINTERLEAVE:
424             THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
425         default:
426             THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ")  not supported";
427         }
428
429         // reading offsets of inputs/outputs
430         readOffset(layer->pInputs, basePointer, is);
431         if (layer->nLayerKind == INTEL_COPY) {
432             layer->pOutputsIntermediate = nullptr;
433         } else {
434             readOffset(layer->pOutputsIntermediate, basePointer, is);
435         }
436         readOffset(layer->pOutputs, basePointer, is);
437     }
438
439     // writing memory information
440     uint32_t nStates = 0;
441     readBits(nStates, is);
442     if (pstates != nullptr) {
443         pstates->resize(nStates);
444     }
445
446     for (int i = 0; i != nStates; i++) {
447         void *pSegment;
448         readOffset(pSegment, basePointer, is);
449         uint32_t segmentSz;
450         readBits(segmentSz, is);
451         if (pstates) {
452             (*pstates)[i] = { pSegment, segmentSz };
453         }
454     }
455
456
457     // once structure has been read lets read whole gna graph
458     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
459 }
460
461 /**
462  *
463  * @param ptr_nnet
464  * @param gnaAllocSize - it can be calculated based on nnet, however it will overcomplicate export
465  * about base adress it is relatively easy to calculate
466  * @param os
467  */
468
469 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
470     os.exceptions(std::ostream::failbit);
471
472     std::vector<intel_nnet_layer_t>
473         layers(ptr_nnet->pLayers, ptr_nnet->pLayers + ptr_nnet->nLayers);
474
475
476     // all offsets will be from this pointer
477     auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
478         auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
479         if (offset > gnaGraphSize) {
480             THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
481                                << ") not in range segment returned from GNAAlloc(0x" << basePointer << "-0x"
482                                << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
483         }
484         return offset;
485     };
486
487     auto writePwl = [&os, getOffsetFromBase] (intel_pwl_func_t & value) {
488         writeBits(value.nSegments, os);
489         // export require certain offset, since offset from base to nullptr cannot be correct, we are not store it at all
490         if (value.nSegments != 0) {
491             writeBits(offsetFromBase(value.pSegments), os);
492         }
493     };
494
495     auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
496         RuntimeEndPoint out;
497         out.elements_count = ep.elements_count;
498         out.element_size = ep.element_size;
499         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
500         out.scaleFactor = ep.scaleFactor;
501         out.orientation = ep.orientation;
502         return out;
503     };
504     /**
505      * writing header
506      */
507     ModelHeader header;
508     header.gnam[0] = 'G';
509     header.gnam[1] = 'N';
510     header.gnam[2] = 'A';
511     header.gnam[3] = 'M';
512     header.version.major = HEADER_MAJOR;
513     header.version.minor = HEADER_MINOR;
514     header.gnaMemSize = gnaGraphSize;
515     header.layersCount = layers.size();
516     header.nGroup = ptr_nnet->nGroup;
517     header.nInputs = 1;
518     header.nOutputs = 1;
519     header.headerSize = sizeof(ModelHeader);
520     header.nRotateRows = nRotateRows;
521     header.nRotateColumns = nRotateColumns;
522
523
524     writeBits(header, os);
525     writeBits(convert_to_serial(inputs[0]), os);
526     writeBits(convert_to_serial(outputs[0]), os);
527
528     for (auto & layer : layers) {
529         writeBits(layer.nInputColumns, os);
530         writeBits(layer.nInputRows, os);
531         writeBits(layer.nOutputColumns, os);
532         writeBits(layer.nOutputRows, os);
533         writeBits(layer.nBytesPerInput, os);
534         writeBits(layer.nBytesPerOutput, os);
535         writeBits(layer.nBytesPerIntermediateOutput, os);
536         writeBits(static_cast<uint32_t>(layer.nLayerKind), os);
537
538         // writing layers structs
539         switch (layer.nLayerKind) {
540             case INTEL_AFFINE_DIAGONAL:
541             case INTEL_AFFINE: {
542                 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer.pLayerStruct);
543                 writeBits(affine.affine.nBytesPerWeight, os);
544                 writeBits(affine.affine.nBytesPerBias, os);
545                 writeBits(offsetFromBase(affine.affine.pWeights), os);
546                 writeBits(offsetFromBase(affine.affine.pBiases), os);
547                 writePwl(affine.pwl);
548                 break;
549             }
550             case INTEL_CONVOLUTIONAL: {
551                 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer.pLayerStruct);
552                 writeBits(convolution.nFilterCoefficients, os);
553                 writeBits(convolution.nBytesFilterCoefficient, os);
554                 writeBits(convolution.nBytesBias, os);
555                 writeBits(convolution.nFilters, os);
556                 writeBits(convolution.nFeatureMaps, os);
557                 writeBits(convolution.nFeatureMapRows, os);
558                 writeBits(convolution.nFeatureMapColumns, os);
559                 writeBits(convolution.nFilterRows, os);
560                 writeBits(offsetFromBase(convolution.pFilters), os);
561                 writeBits(offsetFromBase(convolution.pBiases), os);
562                 writeBits(convolution.nPoolSize, os);
563                 writeBits(convolution.nPoolStride, os);
564                 writeBits(convolution.poolType, os);
565                 writePwl(convolution.pwl);
566                 break;
567             }
568
569             case INTEL_COPY: {
570                 auto &copy = *reinterpret_cast<intel_copy_layer_t *>(layer.pLayerStruct);
571                 writeBits(copy.nCopyRows, os);
572                 writeBits(copy.nCopyCols, os);
573                 break;
574             }
575
576             case INTEL_RECURRENT:
577                 THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
578             case INTEL_INTERLEAVE:
579                 THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
580             case INTEL_DEINTERLEAVE:
581                 THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
582             default:
583                 THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ")  not supported";
584         }
585
586         // writing offsets from base.
587         writeBits(offsetFromBase(layer.pInputs), os);
588         if (layer.nLayerKind != INTEL_COPY) {
589             writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
590         }
591         writeBits(offsetFromBase(layer.pOutputs), os);
592     }
593     // writing memory information
594     writeBits(static_cast<uint32_t>(states.size()), os);
595     for (auto && state : states) {
596         writeBits(offsetFromBase(state.first), os);
597         writeBits(state.second, os);
598     }
599
600     // once structure has been written lets push gna graph
601     os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
602 }
603
604 #endif
605
606 std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
607         const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
608     std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
609     std::size_t outputIndex = 0;
610     for (auto const &output : outputsDataMap) {
611         auto outputName = output.first;
612         auto inputDims = output.second->getTensorDesc().getDims();
613         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
614
615         GNAModelSerial::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
616                                                  outputsDesc[outputIndex].ptrs[0],
617                                                  outputsDesc[outputIndex].num_bytes_per_element,
618                                                  elementsCount,
619                                                  outputsDesc[outputIndex].orientation);
620         endPoints.push_back(endPoint);
621         outputIndex++;
622     }
623     return endPoints;
624 }
625
626 std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
627                                                                              std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
628     std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
629
630     std::size_t inputIndex = 0;
631     for (auto const& input : inputsDataMap) {
632         auto inputName = input.first;
633         auto inputDims = input.second->getTensorDesc().getDims();
634
635         double scaleFactor = inputDesc->getScaleFactor(inputIndex);
636         std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
637         IE_ASSERT(descriptor_ptr.size() > 0);
638         uint32_t element_size = 2u;
639         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
640         intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
641
642         GNAModelSerial::RuntimeEndPoint endPoint(scaleFactor,
643                                                  descriptor_ptr[0],
644                                                  element_size,
645                                                  elementsCount,
646                                                  orientation);
647         endPoints.push_back(endPoint);
648         inputIndex++;
649     }
650     return endPoints;
651 }
652
653 void GNAModelSerial::ImportInputs(std::istream &is,
654         void* basePtr,
655         std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
656         InferenceEngine::InputsDataMap& dataMap) {
657     dataMap.clear();
658
659     for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
660         std::string name = "input" + std::to_string(inputIndex);
661         RuntimeEndPoint input;
662         is.read(reinterpret_cast<char *>(&input), sizeof(input));
663         inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
664         inputsDesc->orientation_in[name] = input.orientation;
665
666         auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
667
668         dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
669         dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
670                                                             InferenceEngine::TensorDesc(
671                                                                     InferenceEngine::Precision::FP32,
672                                                                     inputDims,
673                                                                     InferenceEngine::Layout::NC)));
674         inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
675     }
676 }
677
678 void GNAModelSerial::ImportOutputs(std::istream &is,
679         void* basePtr,
680         std::vector<GNAPluginNS::OutputDesc> &desc,
681         InferenceEngine::OutputsDataMap& dataMap) {
682     desc.clear();
683     dataMap.clear();
684     desc.resize(modelHeader.nOutputs);
685
686     for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
687         std::string name = "output" + std::to_string(outputIndex);
688         RuntimeEndPoint output;
689         is.read(reinterpret_cast<char *>(&output), sizeof(output));
690         GNAPluginNS::OutputDesc description;
691         description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
692         description.orientation = kDnnInterleavedOrientation;
693         description.orientation = output.orientation;
694         description.num_bytes_per_element = output.element_size;
695         description.scale_factor = output.scaleFactor;
696
697         auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
698         dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
699                                                  InferenceEngine::TensorDesc(
700                                                          InferenceEngine::Precision::FP32,
701                                                          outputDims,
702                                                          InferenceEngine::Layout::NC));
703         desc.at(outputIndex) = description;
704     }
705 }
706
707 void GNAModelSerial::setHeader(ModelHeader header) {
708     modelHeader = header;
709 }