Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / gna_plugin / gna_model_serial.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <vector>
6 #include <array>
7 #include <details/ie_exception.hpp>
8 #include <ios>
9 #include <iomanip>
10 #ifndef _WIN32
11 #include <mm_malloc.h>
12 #endif
13 #include <gna-api-types-xnn.h>
14 #include "gna_model_serial.hpp"
15 #include "gna_plugin_log.hpp"
16
17 template <class T>
18 inline void writeBits(const T & obj, std::ostream & os) {
19     os.write(reinterpret_cast<const char *>(&obj), sizeof(T));
20 }
21
22 template <class T>
23 inline void readBits(T & obj, std::istream & is) {
24     is.read(reinterpret_cast<char *>(&obj), sizeof(T));
25 }
26
27 template <int nBits, class T>
28 inline void readNBits(T & obj, std::istream & is) {
29     std::array<uint8_t, nBits / 8> tmp;
30     is.read(reinterpret_cast<char *>(&tmp), nBits / 8);
31
32     obj = * reinterpret_cast<T*>(&tmp.front());
33 }
34
35 template <class T>
36 inline void readOffset(T & ptr, void *base,  std::istream & is) {
37     uint64_t offset = 0ull;
38     readBits(offset, is);
39     ptr = reinterpret_cast<T>(reinterpret_cast<uint8_t *>(base) + offset);
40 }
41
42 union {
43     uint16_t s;
44     uint8_t  c[2];
45 } constexpr static  LECheck {1};
46
47 bool is_little_endian() {
48     return LECheck.c[0] == 1;
49 }
50
51 const int gna_header_magic = is_little_endian() ?  0x4d414e47 : 0x474e414d;
52
53 ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
54     is.exceptions(std::istream::failbit);
55
56     ModelHeader header;
57     readBits(header, is);
58     if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
59         THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
60                            << std::setfill('0') <<
61                            std::hex << std::setw(2) << static_cast<short>(header.gnam[0]) <<
62                            std::hex << std::setw(2) << static_cast<short>(header.gnam[1]) <<
63                            std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
64                            std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
65     }
66     if (header.version.major < 1) {
67         THROW_GNA_EXCEPTION << "Imported file unsupported: major version sould be > 1";
68     }
69     if (header.headerSize < sizeof(header)) {
70         THROW_GNA_EXCEPTION << "Unsupported header size minimal value is : " << sizeof (header) << ", but read: " << header.headerSize;
71     }
72     /*
73      * extra data need to be added into new header and modify check as appropriate
74      */
75
76     //  forward compatible
77     if (header.headerSize > sizeof(header)) {
78         is.seekg(header.headerSize - sizeof(header), std::ios_base::cur);
79     }
80     return header;
81 }
82
83 void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize,  std::istream & is) {
84     is.exceptions(std::istream::failbit);
85
86     auto readPwl = [&is, basePointer] (intel_pwl_func_t & value) {
87         readBits(value.nSegments, is);
88         if (value.nSegments != 0) {
89             readOffset(value.pSegments, basePointer, is);
90         } else {
91             value.pSegments = nullptr;
92         }
93     };
94
95     for (auto layer = ptr_nnet->pLayers; layer != ptr_nnet->pLayers + ptr_nnet->nLayers; ++layer) {
96         readBits(layer->nInputColumns, is);
97         readBits(layer->nInputRows, is);
98         readBits(layer->nOutputColumns, is);
99         readBits(layer->nOutputRows, is);
100         readBits(layer->nBytesPerInput, is);
101         readBits(layer->nBytesPerOutput, is);
102         readBits(layer->nBytesPerIntermediateOutput, is);
103         readNBits<32>(layer->nLayerKind, is);
104
105         // reading layers structs
106         switch (layer->nLayerKind) {
107             case INTEL_AFFINE_DIAGONAL:
108             case INTEL_AFFINE: {
109                 layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
110                 if (layer->pLayerStruct == nullptr) {
111                     THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
112                 }
113
114                 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
115                 readBits(affine.affine.nBytesPerWeight, is);
116                 readBits(affine.affine.nBytesPerBias, is);
117                 readOffset(affine.affine.pWeights, basePointer, is);
118                 readOffset(affine.affine.pBiases, basePointer, is);
119                 readPwl(affine.pwl);
120                 break;
121             }
122             case INTEL_CONVOLUTIONAL: {
123                 layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
124                 if (layer->pLayerStruct == nullptr) {
125                     THROW_GNA_EXCEPTION <<"could not allocate memory for intel_convolutional_layer_t structure.";
126                 }
127
128                 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
129                 readBits(convolution.nFilterCoefficients, is);
130                 readBits(convolution.nBytesFilterCoefficient, is);
131                 readBits(convolution.nBytesBias, is);
132                 readBits(convolution.nFilters, is);
133                 readBits(convolution.nFeatureMaps, is);
134                 readBits(convolution.nFeatureMapRows, is);
135                 readBits(convolution.nFeatureMapColumns, is);
136                 readBits(convolution.nFilterRows, is);
137                 readOffset(convolution.pFilters, basePointer, is);
138                 readOffset(convolution.pBiases, basePointer, is);
139                 readBits(convolution.nPoolSize, is);
140                 readBits(convolution.nPoolStride, is);
141                 readBits(convolution.poolType, is);
142                 readPwl(convolution.pwl);
143                 break;
144             }
145
146             case INTEL_RECURRENT:
147                 THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
148             case INTEL_INTERLEAVE:
149                 THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
150             case INTEL_DEINTERLEAVE:
151                 THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
152             case INTEL_COPY:
153                 THROW_GNA_EXCEPTION << "Importing of copy layer not supported";
154             default:
155                 THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ")  not supported";
156         }
157
158         // reading offsets of inputs/outputs
159         readOffset(layer->pInputs, basePointer, is);
160         readOffset(layer->pOutputsIntermediate, basePointer, is);
161         readOffset(layer->pOutputs, basePointer, is);
162     }
163
164     // writing memory information
165     uint32_t nStates = 0;
166     readBits(nStates, is);
167     if (pstates != nullptr) {
168         pstates->resize(nStates);
169     }
170
171     for (int i = 0; i != nStates; i++) {
172        void *pSegment;
173        readOffset(pSegment, basePointer, is);
174        uint32_t segmentSz;
175        readBits(segmentSz, is);
176        if (pstates) {
177            (*pstates)[i] = {pSegment, segmentSz};
178        }
179     }
180
181
182     // once structure has been read lets read whole gna graph
183     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
184 }
185
186 #define offsetFromBase(field)\
187 getOffsetFromBase(field, #field)
188
189
190 /**
191  *
192  * @param ptr_nnet
193  * @param gnaAllocSize - it can be calculated based on nnet, however it will overcomplicate export
194  * about base adress it is relatively easy to calculate
195  * @param os
196  */
197 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
198     os.exceptions(std::ostream::failbit);
199
200     std::vector<intel_nnet_layer_t>
201         layers(ptr_nnet->pLayers, ptr_nnet->pLayers + ptr_nnet->nLayers);
202
203
204     // all offsets will be from this pointer
205     auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
206         auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
207         if (offset > gnaGraphSize) {
208             THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
209                                << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
210                                << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
211         }
212         return offset;
213     };
214
215     auto writePwl = [&os, getOffsetFromBase] (intel_pwl_func_t & value) {
216         writeBits(value.nSegments, os);
217         // export require certain offset, since offset from base to nullptr cannot be correct, we are not store it at all
218         if (value.nSegments != 0) {
219             writeBits(offsetFromBase(value.pSegments), os);
220         }
221     };
222
223     auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
224         ModelHeader::EndPoint out;
225         out.elements_count = ep.elements_count;
226         out.element_size = ep.element_size;
227         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
228         out.scaleFactor = ep.scaleFactor;
229         return out;
230     };
231     /**
232      * writing header
233      */
234     ModelHeader header;
235     header.gnam[0] = 'G';
236     header.gnam[1] = 'N';
237     header.gnam[2] = 'A';
238     header.gnam[3] = 'M';
239     header.version.major = HEADER_MAJOR;
240     header.version.minor = HEADER_MINOR;
241     header.gnaMemSize = gnaGraphSize;
242     header.layersCount = layers.size();
243     header.nGroup = ptr_nnet->nGroup;
244     header.input  = convert_to_serial(input);
245     header.output = convert_to_serial(output);
246     header.headerSize = sizeof(ModelHeader);
247     header.nRotateRows = nRotateRows;
248     header.nRotateColumns = nRotateColumns;
249
250
251     writeBits(header, os);
252
253     for (auto & layer : layers) {
254         writeBits(layer.nInputColumns, os);
255         writeBits(layer.nInputRows, os);
256         writeBits(layer.nOutputColumns, os);
257         writeBits(layer.nOutputRows, os);
258         writeBits(layer.nBytesPerInput, os);
259         writeBits(layer.nBytesPerOutput, os);
260         writeBits(layer.nBytesPerIntermediateOutput, os);
261         writeBits(static_cast<uint32_t>(layer.nLayerKind), os);
262
263         // writing layers structs
264         switch (layer.nLayerKind) {
265             case INTEL_AFFINE_DIAGONAL:
266             case INTEL_AFFINE: {
267                 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer.pLayerStruct);
268                 writeBits(affine.affine.nBytesPerWeight, os);
269                 writeBits(affine.affine.nBytesPerBias, os);
270                 writeBits(offsetFromBase(affine.affine.pWeights), os);
271                 writeBits(offsetFromBase(affine.affine.pBiases), os);
272                 writePwl(affine.pwl);
273                 break;
274             }
275             case INTEL_CONVOLUTIONAL: {
276                 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer.pLayerStruct);
277                 writeBits(convolution.nFilterCoefficients, os);
278                 writeBits(convolution.nBytesFilterCoefficient, os);
279                 writeBits(convolution.nBytesBias, os);
280                 writeBits(convolution.nFilters, os);
281                 writeBits(convolution.nFeatureMaps, os);
282                 writeBits(convolution.nFeatureMapRows, os);
283                 writeBits(convolution.nFeatureMapColumns, os);
284                 writeBits(convolution.nFilterRows, os);
285                 writeBits(offsetFromBase(convolution.pFilters), os);
286                 writeBits(offsetFromBase(convolution.pBiases), os);
287                 writeBits(convolution.nPoolSize, os);
288                 writeBits(convolution.nPoolStride, os);
289                 writeBits(convolution.poolType, os);
290                 writePwl(convolution.pwl);
291                 break;
292             }
293
294             case INTEL_RECURRENT:
295                 THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
296             case INTEL_INTERLEAVE:
297                 THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
298             case INTEL_DEINTERLEAVE:
299                 THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
300             case INTEL_COPY:
301                 THROW_GNA_EXCEPTION << "Exporting of copy layer not supported";
302             default:
303                 THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ")  not supported";
304         }
305
306         // writing offsets from base.
307         writeBits(offsetFromBase(layer.pInputs), os);
308         writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
309         writeBits(offsetFromBase(layer.pOutputs), os);
310     }
311     // writing memory information
312     writeBits(static_cast<uint32_t>(states.size()), os);
313     for (auto && state : states) {
314         writeBits(offsetFromBase(state.first), os);
315         writeBits(state.second, os);
316     }
317
318     // once structure has been written lets push gna graph
319     os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
320 }