1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
7 #include <details/ie_exception.hpp>
11 #include <mm_malloc.h>
13 #include <gna-api-types-xnn.h>
14 #include "gna_model_serial.hpp"
15 #include "gna_plugin_log.hpp"
18 inline void writeBits(const T & obj, std::ostream & os) {
19 os.write(reinterpret_cast<const char *>(&obj), sizeof(T));
23 inline void readBits(T & obj, std::istream & is) {
24 is.read(reinterpret_cast<char *>(&obj), sizeof(T));
27 template <int nBits, class T>
28 inline void readNBits(T & obj, std::istream & is) {
29 std::array<uint8_t, nBits / 8> tmp;
30 is.read(reinterpret_cast<char *>(&tmp), nBits / 8);
32 obj = * reinterpret_cast<T*>(&tmp.front());
36 inline void readOffset(T & ptr, void *base, std::istream & is) {
37 uint64_t offset = 0ull;
39 ptr = reinterpret_cast<T>(reinterpret_cast<uint8_t *>(base) + offset);
45 } constexpr static LECheck {1};
47 bool is_little_endian() {
48 return LECheck.c[0] == 1;
51 const int gna_header_magic = is_little_endian() ? 0x4d414e47 : 0x474e414d;
53 ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
54 is.exceptions(std::istream::failbit);
58 if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
59 THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
60 << std::setfill('0') <<
61 std::hex << std::setw(2) << static_cast<short>(header.gnam[0]) <<
62 std::hex << std::setw(2) << static_cast<short>(header.gnam[1]) <<
63 std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
64 std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
66 if (header.version.major < 1) {
67 THROW_GNA_EXCEPTION << "Imported file unsupported: major version sould be > 1";
69 if (header.headerSize < sizeof(header)) {
70 THROW_GNA_EXCEPTION << "Unsupported header size minimal value is : " << sizeof (header) << ", but read: " << header.headerSize;
73 * extra data need to be added into new header and modify check as appropriate
77 if (header.headerSize > sizeof(header)) {
78 is.seekg(header.headerSize - sizeof(header), std::ios_base::cur);
83 void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize, std::istream & is) {
84 is.exceptions(std::istream::failbit);
86 auto readPwl = [&is, basePointer] (intel_pwl_func_t & value) {
87 readBits(value.nSegments, is);
88 if (value.nSegments != 0) {
89 readOffset(value.pSegments, basePointer, is);
91 value.pSegments = nullptr;
95 for (auto layer = ptr_nnet->pLayers; layer != ptr_nnet->pLayers + ptr_nnet->nLayers; ++layer) {
96 readBits(layer->nInputColumns, is);
97 readBits(layer->nInputRows, is);
98 readBits(layer->nOutputColumns, is);
99 readBits(layer->nOutputRows, is);
100 readBits(layer->nBytesPerInput, is);
101 readBits(layer->nBytesPerOutput, is);
102 readBits(layer->nBytesPerIntermediateOutput, is);
103 readNBits<32>(layer->nLayerKind, is);
105 // reading layers structs
106 switch (layer->nLayerKind) {
107 case INTEL_AFFINE_DIAGONAL:
109 layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
110 if (layer->pLayerStruct == nullptr) {
111 THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
114 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
115 readBits(affine.affine.nBytesPerWeight, is);
116 readBits(affine.affine.nBytesPerBias, is);
117 readOffset(affine.affine.pWeights, basePointer, is);
118 readOffset(affine.affine.pBiases, basePointer, is);
122 case INTEL_CONVOLUTIONAL: {
123 layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
124 if (layer->pLayerStruct == nullptr) {
125 THROW_GNA_EXCEPTION <<"could not allocate memory for intel_convolutional_layer_t structure.";
128 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
129 readBits(convolution.nFilterCoefficients, is);
130 readBits(convolution.nBytesFilterCoefficient, is);
131 readBits(convolution.nBytesBias, is);
132 readBits(convolution.nFilters, is);
133 readBits(convolution.nFeatureMaps, is);
134 readBits(convolution.nFeatureMapRows, is);
135 readBits(convolution.nFeatureMapColumns, is);
136 readBits(convolution.nFilterRows, is);
137 readOffset(convolution.pFilters, basePointer, is);
138 readOffset(convolution.pBiases, basePointer, is);
139 readBits(convolution.nPoolSize, is);
140 readBits(convolution.nPoolStride, is);
141 readBits(convolution.poolType, is);
142 readPwl(convolution.pwl);
146 case INTEL_RECURRENT:
147 THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
148 case INTEL_INTERLEAVE:
149 THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
150 case INTEL_DEINTERLEAVE:
151 THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
153 THROW_GNA_EXCEPTION << "Importing of copy layer not supported";
155 THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ") not supported";
158 // reading offsets of inputs/outputs
159 readOffset(layer->pInputs, basePointer, is);
160 readOffset(layer->pOutputsIntermediate, basePointer, is);
161 readOffset(layer->pOutputs, basePointer, is);
164 // writing memory information
165 uint32_t nStates = 0;
166 readBits(nStates, is);
167 if (pstates != nullptr) {
168 pstates->resize(nStates);
171 for (int i = 0; i != nStates; i++) {
173 readOffset(pSegment, basePointer, is);
175 readBits(segmentSz, is);
177 (*pstates)[i] = {pSegment, segmentSz};
182 // once structure has been read lets read whole gna graph
183 is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
186 #define offsetFromBase(field)\
187 getOffsetFromBase(field, #field)
193 * @param gnaAllocSize - it can be calculated based on nnet, however it will overcomplicate export
194 * about base adress it is relatively easy to calculate
197 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
198 os.exceptions(std::ostream::failbit);
200 std::vector<intel_nnet_layer_t>
201 layers(ptr_nnet->pLayers, ptr_nnet->pLayers + ptr_nnet->nLayers);
204 // all offsets will be from this pointer
205 auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
206 auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
207 if (offset > gnaGraphSize) {
208 THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
209 << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
210 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
215 auto writePwl = [&os, getOffsetFromBase] (intel_pwl_func_t & value) {
216 writeBits(value.nSegments, os);
217 // export require certain offset, since offset from base to nullptr cannot be correct, we are not store it at all
218 if (value.nSegments != 0) {
219 writeBits(offsetFromBase(value.pSegments), os);
223 auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
224 ModelHeader::EndPoint out;
225 out.elements_count = ep.elements_count;
226 out.element_size = ep.element_size;
227 out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
228 out.scaleFactor = ep.scaleFactor;
235 header.gnam[0] = 'G';
236 header.gnam[1] = 'N';
237 header.gnam[2] = 'A';
238 header.gnam[3] = 'M';
239 header.version.major = HEADER_MAJOR;
240 header.version.minor = HEADER_MINOR;
241 header.gnaMemSize = gnaGraphSize;
242 header.layersCount = layers.size();
243 header.nGroup = ptr_nnet->nGroup;
244 header.input = convert_to_serial(input);
245 header.output = convert_to_serial(output);
246 header.headerSize = sizeof(ModelHeader);
247 header.nRotateRows = nRotateRows;
248 header.nRotateColumns = nRotateColumns;
251 writeBits(header, os);
253 for (auto & layer : layers) {
254 writeBits(layer.nInputColumns, os);
255 writeBits(layer.nInputRows, os);
256 writeBits(layer.nOutputColumns, os);
257 writeBits(layer.nOutputRows, os);
258 writeBits(layer.nBytesPerInput, os);
259 writeBits(layer.nBytesPerOutput, os);
260 writeBits(layer.nBytesPerIntermediateOutput, os);
261 writeBits(static_cast<uint32_t>(layer.nLayerKind), os);
263 // writing layers structs
264 switch (layer.nLayerKind) {
265 case INTEL_AFFINE_DIAGONAL:
267 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer.pLayerStruct);
268 writeBits(affine.affine.nBytesPerWeight, os);
269 writeBits(affine.affine.nBytesPerBias, os);
270 writeBits(offsetFromBase(affine.affine.pWeights), os);
271 writeBits(offsetFromBase(affine.affine.pBiases), os);
272 writePwl(affine.pwl);
275 case INTEL_CONVOLUTIONAL: {
276 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer.pLayerStruct);
277 writeBits(convolution.nFilterCoefficients, os);
278 writeBits(convolution.nBytesFilterCoefficient, os);
279 writeBits(convolution.nBytesBias, os);
280 writeBits(convolution.nFilters, os);
281 writeBits(convolution.nFeatureMaps, os);
282 writeBits(convolution.nFeatureMapRows, os);
283 writeBits(convolution.nFeatureMapColumns, os);
284 writeBits(convolution.nFilterRows, os);
285 writeBits(offsetFromBase(convolution.pFilters), os);
286 writeBits(offsetFromBase(convolution.pBiases), os);
287 writeBits(convolution.nPoolSize, os);
288 writeBits(convolution.nPoolStride, os);
289 writeBits(convolution.poolType, os);
290 writePwl(convolution.pwl);
294 case INTEL_RECURRENT:
295 THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
296 case INTEL_INTERLEAVE:
297 THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
298 case INTEL_DEINTERLEAVE:
299 THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
301 THROW_GNA_EXCEPTION << "Exporting of copy layer not supported";
303 THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ") not supported";
306 // writing offsets from base.
307 writeBits(offsetFromBase(layer.pInputs), os);
308 writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
309 writeBits(offsetFromBase(layer.pOutputs), os);
311 // writing memory information
312 writeBits(static_cast<uint32_t>(states.size()), os);
313 for (auto && state : states) {
314 writeBits(offsetFromBase(state.first), os);
315 writeBits(state.second, os);
318 // once structure has been written lets push gna graph
319 os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);