1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
7 #include <details/ie_exception.hpp>
11 #include <ie_algorithm.hpp>
12 #include <ie_common.h>
13 #include <ie_precision.hpp>
15 #if defined __INTEL_COMPILER || defined _MSC_VER
18 #include <mm_malloc.h>
21 #include "gna_plugin.hpp"
22 #include "gna_model_serial.hpp"
23 #include "serial/headers/latest/gna_model_header.hpp"
25 using namespace GNAPluginNS;
27 inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
28 os.write(static_cast<const char*>(ptr), size);
32 inline void writeBits(const T & obj, std::ostream & os) {
33 os.write(reinterpret_cast<const char *>(&obj), sizeof(T));
37 inline void readBits(T & obj, std::istream & is) {
38 is.read(reinterpret_cast<char *>(&obj), sizeof(T));
41 inline void readNBytes(void * ptr, uint32_t size, std::istream & is) {
42 is.read(reinterpret_cast<char *>(ptr), size);
45 template <int nBits, class T>
46 inline void readNBits(T & obj, std::istream & is) {
47 std::array<uint8_t, nBits / 8> tmp;
48 is.read(reinterpret_cast<char *>(&tmp), nBits / 8);
50 obj = * reinterpret_cast<T*>(&tmp.front());
53 inline void * offsetToPointer(void * const base, uint64_t offset) {
54 return reinterpret_cast<uint8_t *>(base) + offset;
58 inline void readOffset(T & ptr, void *base, std::istream & is) {
59 uint64_t offset = 0ull;
61 ptr = reinterpret_cast<T>(offsetToPointer(base, offset));
67 } constexpr static LECheck {1};
69 bool is_little_endian() {
70 return LECheck.c[0] == 1;
73 const int gna_header_magic = is_little_endian() ? 0x4d414e47 : 0x474e414d;
75 GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
76 is.exceptions(std::istream::failbit);
78 auto stream_len = is.tellg();
79 if (stream_len == -1) {
80 THROW_GNA_EXCEPTION << "Can't open file to import";
84 HeaderLatest::ModelHeader header;
85 header.version.major = 0u;
86 header.version.minor = 0u;
87 auto size_of_headers_header = sizeof(HeaderLatest::ModelHeader::gnam) + sizeof(HeaderLatest::ModelHeader::headerSize)
88 + sizeof(HeaderLatest::ModelHeader::Version);
89 if (stream_len > size_of_headers_header) {
90 readNBytes(&header, size_of_headers_header, is);
92 readNBytes(&header, stream_len, is);
94 if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
95 THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
96 << std::setfill('0') <<
97 std::hex << std::setw(2) << static_cast<short>(header.gnam[0]) <<
98 std::hex << std::setw(2) << static_cast<short>(header.gnam[1]) <<
99 std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
100 std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
104 Header2dot1::ModelHeader tempHeader2dot1;
105 switch (header.version.major) {
107 switch (header.version.minor) {
109 readBits(tempHeader2dot1, is);
110 header = Header2dot3::ModelHeader(tempHeader2dot1);
114 readBits(header, is);
117 THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should be equal to 1 or 2 and is: " << header.version.minor;
121 THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: " << header.version.major << " is not implemented";
125 * extra data need to be added into new header and modify check as appropriate
128 // forward compatible
129 if (header.headerSize > sizeof(header)) {
130 is.seekg(header.headerSize - sizeof(header), std::ios_base::cur);
135 #define offsetFromBase(field)\
136 getOffsetFromBase(field, #field)
140 bool IsEmptyTensor(const Gna2Tensor& t) {
141 return t.Type == Gna2DataTypeNone &&
143 t.Layout[0] == '\0' &&
144 t.Mode == Gna2TensorModeDefault &&
145 t.Shape.NumberOfDimensions == 0;
148 const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
149 {Gna2OperationTypeFullyConnectedAffine, {sizeof(Gna2BiasMode), sizeof(uint32_t)}},
150 {Gna2OperationTypeConvolution, {
152 sizeof(Gna2BiasMode),
153 sizeof(Gna2PoolingMode),
157 {Gna2OperationTypeCopy, {sizeof(Gna2Shape)}},
158 {Gna2OperationTypeTransposition, {sizeof(Gna2Shape)}},
161 void GNAModelSerial::Import(void *basePointer,
164 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
165 std::vector<GNAPluginNS::OutputDesc> &desc,
166 InferenceEngine::InputsDataMap& inputsDataMap,
167 InferenceEngine::OutputsDataMap& outputsDataMap) {
168 is.exceptions(std::istream::failbit);
170 if (modelHeader.version.major == 2) {
171 if (modelHeader.version.minor >= 3) {
172 for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
173 uint32_t nameSize = 0;
174 readNBits<32>(nameSize, is);
175 std::string inName(nameSize, '\0');
176 readNBytes(&inName[0], nameSize, is);
177 inputNames.push_back(inName.substr(0, nameSize - 1));
181 ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
183 if (modelHeader.version.major == 2) {
184 if (modelHeader.version.minor >= 3) {
185 for (auto inputIndex = 0; inputIndex < modelHeader.nOutputs; inputIndex++) {
186 uint32_t nameSize = 0;
187 readNBits<32>(nameSize, is);
188 std::string outName(nameSize, '\0');
189 readNBytes(&outName[0], nameSize, is);
190 outputNames.push_back(outName.substr(0, nameSize - 1));
194 ImportOutputs(is, basePointer, desc, outputsDataMap);
196 for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
197 readNBits<32>(operation->Type, is);
198 readBits(operation->NumberOfOperands, is);
199 operation->Operands = static_cast<Gna2Tensor const **>(gnaUserAllocator(sizeof(Gna2Tensor*) * operation->NumberOfOperands));
200 IE_ASSERT(operation->Operands != nullptr);
201 for (uint32_t i = 0; i < operation->NumberOfOperands; i++) {
204 if (IsEmptyTensor(t)) {
205 operation->Operands[i] = nullptr;
207 operation->Operands[i] = static_cast<Gna2Tensor const *>(gnaUserAllocator(sizeof(Gna2Tensor)));
208 t.Data = offsetToPointer(basePointer, reinterpret_cast<uint64_t>(t.Data));
209 const_cast<Gna2Tensor&>(*operation->Operands[i]) = t;
212 readBits(operation->NumberOfParameters, is);
213 switch (operation->Type) {
214 case Gna2OperationTypeElementWiseAffine:
215 case Gna2OperationTypeFullyConnectedAffine:
216 case Gna2OperationTypeConvolution:
217 case Gna2OperationTypeCopy:
218 case Gna2OperationTypeTransposition:
220 case Gna2OperationTypeRecurrent:
221 THROW_GNA_EXCEPTION << "Importing of recurrent operation not supported";
223 THROW_GNA_EXCEPTION << "Importing of unknown GNA operation type(" << operation->Type << ") not supported";
225 if (operation->NumberOfParameters > 0)
226 operation->Parameters = static_cast<void **>(gnaUserAllocator(sizeof(void*) * operation->NumberOfParameters));
228 operation->Parameters = nullptr;
229 for (uint32_t i = 0; i < operation->NumberOfParameters; i++) {
230 uint32_t paramSize = 0;
231 readBits(paramSize, is);
232 IE_ASSERT(operation->Parameters != nullptr);
233 if (paramSize == 0) {
234 IE_ASSERT(operation->Parameters != nullptr);
235 operation->Parameters[i] = nullptr;
238 operation->Parameters[i] = gnaUserAllocator(paramSize);
239 readNBytes(operation->Parameters[i], paramSize, is);
241 if (GnaParamSize.at(operation->Type).size() <= i) {
242 THROW_GNA_EXCEPTION << "Cannot import parameter of index: " << i;
244 if (paramSize != GnaParamSize.at(operation->Type).at(i)) {
245 THROW_GNA_EXCEPTION << "Parameter size mismatch on import: " << i;
250 // writing memory information
251 uint32_t nStates = 0;
252 readBits(nStates, is);
253 if (pstates != nullptr) {
254 pstates->resize(nStates);
257 for (int i = 0; i != nStates; i++) {
259 readOffset(pSegment, basePointer, is);
261 readBits(segmentSz, is);
263 (*pstates)[i] = { pSegment, segmentSz };
268 // once structure has been read lets read whole gna graph
269 is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
273 uint32_t guessGrouping(Gna2Model const& model) {
274 if (model.NumberOfOperations == 0 ||
275 model.Operations == nullptr ||
276 model.Operations[0].Operands == nullptr ||
277 model.Operations[0].NumberOfOperands == 0 ||
278 model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
279 THROW_GNA_EXCEPTION << "Can not guess grouping";
281 return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
284 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
285 os.exceptions(std::ostream::failbit);
287 const std::vector<Gna2Operation>
288 layers(gna2Model->Operations, gna2Model->Operations + gna2Model->NumberOfOperations);
291 // all offsets will be from this pointer
292 auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
293 auto offset = static_cast<uint64_t>(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
294 if (offset > gnaGraphSize) {
295 THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
296 << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
297 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
302 auto getTensorWithProperOffset = [&getOffsetFromBase](const Gna2Tensor& tensor) {
303 Gna2Tensor out = tensor;
304 out.Data = reinterpret_cast<void*>(getOffsetFromBase(tensor.Data));
308 auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep) {
309 HeaderLatest::RuntimeEndPoint out;
310 out.elements_count = ep.elements_count;
311 out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
312 out.scaleFactor = ep.scaleFactor;
313 out.element_size = ep.element_size;
314 out.orientation = ep.orientation;
320 HeaderLatest::ModelHeader header;
321 header.gnam[0] = 'G';
322 header.gnam[1] = 'N';
323 header.gnam[2] = 'A';
324 header.gnam[3] = 'M';
325 header.headerSize = sizeof(HeaderLatest::ModelHeader);
326 header.gnaMemSize = gnaGraphSize;
327 header.layersCount = layers.size();
328 header.nGroup = guessGrouping(*gna2Model);
329 header.nInputs = inputs.size();
330 header.nOutputs = outputs.size();
331 header.nRotateRows = nRotateRows;
332 header.nRotateColumns = nRotateColumns;
333 header.doRotateInput = doRotateInput;
336 writeBits(header, os);
338 for (auto &name : inputNames) {
339 const auto nameSize = strlen(name.c_str()) + 1;
340 writeBits(static_cast<uint32_t>(nameSize), os);
341 writeNBytes(name.c_str(), nameSize , os);
343 for (const auto &input : inputs) {
344 writeBits(convert_to_serial(input), os);
346 for (auto &name : outputNames) {
347 const auto nameSize = strlen(name.c_str()) + 1;
348 writeBits(static_cast<uint32_t>(nameSize), os);
349 writeNBytes(name.c_str(), nameSize, os);
351 for (const auto &output : outputs) {
352 writeBits(convert_to_serial(output), os);
355 for (const auto & layer : layers) {
356 writeBits(static_cast<uint32_t>(layer.Type), os);
357 writeBits(layer.NumberOfOperands, os);
359 for (uint32_t i = 0; i < layer.NumberOfOperands; i++) {
360 if (layer.Operands[i] == nullptr)
361 writeBits(Gna2Tensor{}, os);
363 writeBits(getTensorWithProperOffset(*layer.Operands[i]), os);
366 writeBits(layer.NumberOfParameters, os);
368 // writing parameters
369 switch (layer.Type) {
370 case Gna2OperationTypeElementWiseAffine:
371 case Gna2OperationTypeFullyConnectedAffine:
372 case Gna2OperationTypeConvolution:
373 case Gna2OperationTypeCopy:
374 case Gna2OperationTypeTransposition:
376 case Gna2OperationTypeRecurrent:
377 THROW_GNA_EXCEPTION << "Exporting of recurrent operation not supported";
379 THROW_GNA_EXCEPTION << "Exporting of unknown GNA operation type(" << layer.Type << ") not supported";
381 for (uint32_t i = 0; i < layer.NumberOfParameters; i++) {
382 if (layer.Parameters[i] == nullptr) {
383 writeBits(static_cast<uint32_t>(0), os);
386 const auto paramSize = GnaParamSize.at(layer.Type).at(i);
387 writeBits(paramSize, os);
388 writeNBytes(layer.Parameters[i], paramSize, os);
391 // writing memory information
392 writeBits(static_cast<uint32_t>(states.size()), os);
393 for (auto && state : states) {
394 writeBits(offsetFromBase(state.first), os);
395 writeBits(state.second, os);
398 // once structure has been written lets push gna graph
399 os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
403 void GNAModelSerial::Import(void *basePointer,
406 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
407 std::vector<GNAPluginNS::OutputDesc> &desc,
408 InferenceEngine::InputsDataMap& inputsDataMap,
409 InferenceEngine::OutputsDataMap& outputsDataMap) {
410 is.exceptions(std::istream::failbit);
412 ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
413 ImportOutputs(is, basePointer, desc, outputsDataMap);
415 auto readPwl = [&is, basePointer](intel_pwl_func_t & value) {
416 readBits(value.nSegments, is);
417 if (value.nSegments != 0) {
418 readOffset(value.pSegments, basePointer, is);
420 value.pSegments = nullptr;
424 for (auto layer = ptr_nnet->pLayers; layer != ptr_nnet->pLayers + ptr_nnet->nLayers; ++layer) {
425 readBits(layer->nInputColumns, is);
426 readBits(layer->nInputRows, is);
427 readBits(layer->nOutputColumns, is);
428 readBits(layer->nOutputRows, is);
429 readBits(layer->nBytesPerInput, is);
430 readBits(layer->nBytesPerOutput, is);
431 readBits(layer->nBytesPerIntermediateOutput, is);
432 readNBits<32>(layer->nLayerKind, is);
434 // reading layers structs
435 switch (layer->nLayerKind) {
436 case INTEL_AFFINE_DIAGONAL:
438 layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
439 if (layer->pLayerStruct == nullptr) {
440 THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
443 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
444 readBits(affine.affine.nBytesPerWeight, is);
445 readBits(affine.affine.nBytesPerBias, is);
446 readOffset(affine.affine.pWeights, basePointer, is);
447 readOffset(affine.affine.pBiases, basePointer, is);
451 case INTEL_CONVOLUTIONAL: {
452 layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
453 if (layer->pLayerStruct == nullptr) {
454 THROW_GNA_EXCEPTION << "could not allocate memory for intel_convolutional_layer_t structure.";
457 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
458 readBits(convolution.nFilterCoefficients, is);
459 readBits(convolution.nBytesFilterCoefficient, is);
460 readBits(convolution.nBytesBias, is);
461 readBits(convolution.nFilters, is);
462 readBits(convolution.nFeatureMaps, is);
463 readBits(convolution.nFeatureMapRows, is);
464 readBits(convolution.nFeatureMapColumns, is);
465 readBits(convolution.nFilterRows, is);
466 readOffset(convolution.pFilters, basePointer, is);
467 readOffset(convolution.pBiases, basePointer, is);
468 readBits(convolution.nPoolSize, is);
469 readBits(convolution.nPoolStride, is);
470 readBits(convolution.poolType, is);
471 readPwl(convolution.pwl);
476 layer->pLayerStruct = _mm_malloc(sizeof(intel_copy_layer_t), 64);
477 if (layer->pLayerStruct == nullptr) {
478 THROW_GNA_EXCEPTION << "could not allocate memory for intel_copy_layer_t structure.";
481 auto © = *reinterpret_cast<intel_copy_layer_t *>(layer->pLayerStruct);
482 readBits(copy.nCopyRows, is);
483 readBits(copy.nCopyCols, is);
487 case INTEL_RECURRENT:
488 THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
489 case INTEL_INTERLEAVE:
490 THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
491 case INTEL_DEINTERLEAVE:
492 THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
494 THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ") not supported";
497 // reading offsets of inputs/outputs
498 readOffset(layer->pInputs, basePointer, is);
499 if (layer->nLayerKind == INTEL_COPY) {
500 layer->pOutputsIntermediate = nullptr;
502 readOffset(layer->pOutputsIntermediate, basePointer, is);
504 readOffset(layer->pOutputs, basePointer, is);
507 // writing memory information
508 uint32_t nStates = 0;
509 readBits(nStates, is);
510 if (pstates != nullptr) {
511 pstates->resize(nStates);
514 for (int i = 0; i != nStates; i++) {
516 readOffset(pSegment, basePointer, is);
518 readBits(segmentSz, is);
520 (*pstates)[i] = { pSegment, segmentSz };
525 // once structure has been read lets read whole gna graph
526 is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
532 * @param gnaAllocSize - it can be calculated based on nnet, however it will overcomplicate export
533 * about base adress it is relatively easy to calculate
537 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
538 os.exceptions(std::ostream::failbit);
540 std::vector<intel_nnet_layer_t>
541 layers(ptr_nnet->pLayers, ptr_nnet->pLayers + ptr_nnet->nLayers);
544 // all offsets will be from this pointer
545 auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
546 auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
547 if (offset > gnaGraphSize) {
548 THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
549 << ") not in range segment returned from GNAAlloc(0x" << basePointer << "-0x"
550 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
555 auto writePwl = [&os, getOffsetFromBase] (intel_pwl_func_t & value) {
556 writeBits(value.nSegments, os);
557 // export require certain offset, since offset from base to nullptr cannot be correct, we are not store it at all
558 if (value.nSegments != 0) {
559 writeBits(offsetFromBase(value.pSegments), os);
563 auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep){
564 HeaderLatest::RuntimeEndPoint out;
565 out.elements_count = ep.elements_count;
566 out.element_size = ep.element_size;
567 out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
568 out.scaleFactor = ep.scaleFactor;
569 out.orientation = ep.orientation;
575 HeaderLatest::ModelHeader header;
576 header.gnam[0] = 'G';
577 header.gnam[1] = 'N';
578 header.gnam[2] = 'A';
579 header.gnam[3] = 'M';
580 header.version.major = 1u;
581 header.version.minor = 1u;
582 header.gnaMemSize = gnaGraphSize;
583 header.layersCount = layers.size();
584 header.nGroup = ptr_nnet->nGroup;
587 header.headerSize = sizeof(HeaderLatest::ModelHeader);
588 header.nRotateRows = nRotateRows;
589 header.nRotateColumns = nRotateColumns;
592 writeBits(header, os);
593 writeBits(convert_to_serial(inputs[0]), os);
594 writeBits(convert_to_serial(outputs[0]), os);
596 for (auto & layer : layers) {
597 writeBits(layer.nInputColumns, os);
598 writeBits(layer.nInputRows, os);
599 writeBits(layer.nOutputColumns, os);
600 writeBits(layer.nOutputRows, os);
601 writeBits(layer.nBytesPerInput, os);
602 writeBits(layer.nBytesPerOutput, os);
603 writeBits(layer.nBytesPerIntermediateOutput, os);
604 writeBits(static_cast<uint32_t>(layer.nLayerKind), os);
606 // writing layers structs
607 switch (layer.nLayerKind) {
608 case INTEL_AFFINE_DIAGONAL:
610 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer.pLayerStruct);
611 writeBits(affine.affine.nBytesPerWeight, os);
612 writeBits(affine.affine.nBytesPerBias, os);
613 writeBits(offsetFromBase(affine.affine.pWeights), os);
614 writeBits(offsetFromBase(affine.affine.pBiases), os);
615 writePwl(affine.pwl);
618 case INTEL_CONVOLUTIONAL: {
619 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer.pLayerStruct);
620 writeBits(convolution.nFilterCoefficients, os);
621 writeBits(convolution.nBytesFilterCoefficient, os);
622 writeBits(convolution.nBytesBias, os);
623 writeBits(convolution.nFilters, os);
624 writeBits(convolution.nFeatureMaps, os);
625 writeBits(convolution.nFeatureMapRows, os);
626 writeBits(convolution.nFeatureMapColumns, os);
627 writeBits(convolution.nFilterRows, os);
628 writeBits(offsetFromBase(convolution.pFilters), os);
629 writeBits(offsetFromBase(convolution.pBiases), os);
630 writeBits(convolution.nPoolSize, os);
631 writeBits(convolution.nPoolStride, os);
632 writeBits(convolution.poolType, os);
633 writePwl(convolution.pwl);
638 auto © = *reinterpret_cast<intel_copy_layer_t *>(layer.pLayerStruct);
639 writeBits(copy.nCopyRows, os);
640 writeBits(copy.nCopyCols, os);
644 case INTEL_RECURRENT:
645 THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
646 case INTEL_INTERLEAVE:
647 THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
648 case INTEL_DEINTERLEAVE:
649 THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
651 THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ") not supported";
654 // writing offsets from base.
655 writeBits(offsetFromBase(layer.pInputs), os);
656 if (layer.nLayerKind != INTEL_COPY) {
657 writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
659 writeBits(offsetFromBase(layer.pOutputs), os);
661 // writing memory information
662 writeBits(static_cast<uint32_t>(states.size()), os);
663 for (auto && state : states) {
664 writeBits(offsetFromBase(state.first), os);
665 writeBits(state.second, os);
668 // once structure has been written lets push gna graph
669 os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
674 std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
675 const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
676 std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
677 std::size_t outputIndex = 0;
678 for (auto const &output : outputsDataMap) {
679 auto outputName = output.first;
680 auto inputDims = output.second->getTensorDesc().getDims();
681 uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
683 HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
684 outputsDesc[outputIndex].ptrs[0],
685 outputsDesc[outputIndex].num_bytes_per_element,
687 outputsDesc[outputIndex].orientation);
688 endPoints.push_back(endPoint);
694 std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
695 std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
696 std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
698 std::size_t inputIndex = 0;
699 for (auto const& input : inputsDataMap) {
700 auto inputName = input.first;
701 auto inputDims = input.second->getTensorDesc().getDims();
703 double scaleFactor = inputDesc->getScaleFactor(inputIndex);
704 std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
705 IE_ASSERT(descriptor_ptr.size() > 0);
706 uint32_t element_size = 2u;
707 uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
708 intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
710 HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
715 endPoints.push_back(endPoint);
721 void GNAModelSerial::ImportInputs(std::istream &is,
723 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
724 InferenceEngine::InputsDataMap& dataMap) {
727 for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
728 const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
729 ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
730 HeaderLatest::RuntimeEndPoint input;
731 is.read(reinterpret_cast<char *>(&input), sizeof(input));
732 inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
733 inputsDesc->orientation_in[name] = input.orientation;
734 inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
736 auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
738 dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
739 dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
740 InferenceEngine::TensorDesc(
741 InferenceEngine::Precision::FP32,
743 InferenceEngine::Layout::NC)));
744 inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
748 void GNAModelSerial::ImportOutputs(std::istream &is,
750 std::vector<GNAPluginNS::OutputDesc> &desc,
751 InferenceEngine::OutputsDataMap& dataMap) {
754 desc.resize(modelHeader.nOutputs);
756 for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
757 const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
758 ? outputNames.at(outputIndex) : std::string("input" + std::to_string(outputIndex));
759 HeaderLatest::RuntimeEndPoint output;
760 is.read(reinterpret_cast<char *>(&output), sizeof(output));
761 OutputDesc description;
762 description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
763 description.orientation = kDnnInterleavedOrientation;
764 description.orientation = output.orientation;
765 description.num_bytes_per_element = output.element_size;
766 description.scale_factor = output.scaleFactor;
768 auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
769 dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
770 InferenceEngine::TensorDesc(
771 InferenceEngine::Precision::FP32,
773 InferenceEngine::Layout::NC));
774 desc.at(outputIndex) = description;
778 void GNAModelSerial::setHeader(HeaderLatest::ModelHeader header) {
779 modelHeader = header;