1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
7 #include <details/ie_exception.hpp>
11 #include <ie_algorithm.hpp>
12 #include <ie_common.h>
13 #include <ie_precision.hpp>
15 #if defined __INTEL_COMPILER || defined _MSC_VER
18 #include <mm_malloc.h>
21 #include "gna_plugin.hpp"
22 #include "gna_model_serial.hpp"
24 inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
25 os.write(static_cast<const char*>(ptr), size);
29 inline void writeBits(const T & obj, std::ostream & os) {
30 os.write(reinterpret_cast<const char *>(&obj), sizeof(T));
34 inline void readBits(T & obj, std::istream & is) {
35 is.read(reinterpret_cast<char *>(&obj), sizeof(T));
38 inline void readNBytes(void * ptr, uint32_t size, std::istream & is) {
39 is.read(reinterpret_cast<char *>(ptr), size);
42 template <int nBits, class T>
43 inline void readNBits(T & obj, std::istream & is) {
44 std::array<uint8_t, nBits / 8> tmp;
45 is.read(reinterpret_cast<char *>(&tmp), nBits / 8);
47 obj = * reinterpret_cast<T*>(&tmp.front());
50 inline void * offsetToPointer(void * const base, uint64_t offset) {
51 return reinterpret_cast<uint8_t *>(base) + offset;
55 inline void readOffset(T & ptr, void *base, std::istream & is) {
56 uint64_t offset = 0ull;
58 ptr = reinterpret_cast<T>(offsetToPointer(base, offset));
64 } constexpr static LECheck {1};
66 bool is_little_endian() {
67 return LECheck.c[0] == 1;
70 const int gna_header_magic = is_little_endian() ? 0x4d414e47 : 0x474e414d;
72 ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
73 is.exceptions(std::istream::failbit);
77 if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
78 THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
79 << std::setfill('0') <<
80 std::hex << std::setw(2) << static_cast<short>(header.gnam[0]) <<
81 std::hex << std::setw(2) << static_cast<short>(header.gnam[1]) <<
82 std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
83 std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
85 if (header.version.major != HEADER_MAJOR) {
86 THROW_GNA_EXCEPTION << "Imported file unsupported: major version should be == " << HEADER_MAJOR;
88 if (header.headerSize < sizeof(header)) {
89 THROW_GNA_EXCEPTION << "Unsupported header size minimal value is : " << sizeof (header) << ", but read: " << header.headerSize;
92 * extra data need to be added into new header and modify check as appropriate
96 if (header.headerSize > sizeof(header)) {
97 is.seekg(header.headerSize - sizeof(header), std::ios_base::cur);
102 #define offsetFromBase(field)\
103 getOffsetFromBase(field, #field)
107 bool IsEmptyTensor(const Gna2Tensor& t) {
108 return t.Type == Gna2DataTypeNone &&
110 t.Layout[0] == '\0' &&
111 t.Mode == Gna2TensorModeDefault &&
112 t.Shape.NumberOfDimensions == 0;
115 const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
116 {Gna2OperationTypeFullyConnectedAffine, {sizeof(Gna2BiasMode), sizeof(uint32_t)}},
117 {Gna2OperationTypeConvolution, {
119 sizeof(Gna2BiasMode),
120 sizeof(Gna2PoolingMode),
124 {Gna2OperationTypeCopy, {sizeof(Gna2Shape)}},
125 {Gna2OperationTypeTransposition, {sizeof(Gna2Shape)}},
128 void GNAModelSerial::Import(void *basePointer,
131 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
132 std::vector<GNAPluginNS::OutputDesc> &desc,
133 InferenceEngine::InputsDataMap& inputsDataMap,
134 InferenceEngine::OutputsDataMap& outputsDataMap) {
135 is.exceptions(std::istream::failbit);
137 ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
138 ImportOutputs(is, basePointer, desc, outputsDataMap);
140 for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
141 readNBits<32>(operation->Type, is);
142 readBits(operation->NumberOfOperands, is);
143 operation->Operands = static_cast<Gna2Tensor const **>(gnaUserAllocator(sizeof(Gna2Tensor*) * operation->NumberOfOperands));
144 for (uint32_t i = 0; i < operation->NumberOfOperands; i++) {
147 if (IsEmptyTensor(t)) {
148 operation->Operands[i] = nullptr;
150 operation->Operands[i] = static_cast<Gna2Tensor const *>(gnaUserAllocator(sizeof(Gna2Tensor)));
151 t.Data = offsetToPointer(basePointer, reinterpret_cast<uint64_t>(t.Data));
152 const_cast<Gna2Tensor&>(*operation->Operands[i]) = t;
155 readBits(operation->NumberOfParameters, is);
156 switch (operation->Type) {
157 case Gna2OperationTypeElementWiseAffine:
158 case Gna2OperationTypeFullyConnectedAffine:
159 case Gna2OperationTypeConvolution:
160 case Gna2OperationTypeCopy:
161 case Gna2OperationTypeTransposition:
163 case Gna2OperationTypeRecurrent:
164 THROW_GNA_EXCEPTION << "Importing of recurrent operation not supported";
166 THROW_GNA_EXCEPTION << "Importing of unknown GNA operation type(" << operation->Type << ") not supported";
168 if (operation->NumberOfParameters > 0)
169 operation->Parameters = static_cast<void **>(gnaUserAllocator(sizeof(void*) * operation->NumberOfParameters));
171 operation->Parameters = nullptr;
172 for (uint32_t i = 0; i < operation->NumberOfParameters; i++) {
173 uint32_t paramSize = 0;
174 readBits(paramSize, is);
175 if (paramSize == 0) {
176 operation->Parameters[i] = nullptr;
179 operation->Parameters[i] = gnaUserAllocator(paramSize);
180 readNBytes(operation->Parameters[i], paramSize, is);
182 if (GnaParamSize.at(operation->Type).size() <= i) {
183 THROW_GNA_EXCEPTION << "Cannot import parameter of index: " << i;
185 if (paramSize != GnaParamSize.at(operation->Type).at(i)) {
186 THROW_GNA_EXCEPTION << "Parameter size mismatch on import: " << i;
191 // writing memory information
192 uint32_t nStates = 0;
193 readBits(nStates, is);
194 if (pstates != nullptr) {
195 pstates->resize(nStates);
198 for (int i = 0; i != nStates; i++) {
200 readOffset(pSegment, basePointer, is);
202 readBits(segmentSz, is);
204 (*pstates)[i] = { pSegment, segmentSz };
209 // once structure has been read lets read whole gna graph
210 is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
214 uint32_t guessGrouping(Gna2Model const& model) {
215 if (model.NumberOfOperations == 0 ||
216 model.Operations == nullptr ||
217 model.Operations[0].Operands == nullptr ||
218 model.Operations[0].NumberOfOperands == 0 ||
219 model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
220 THROW_GNA_EXCEPTION << "Can not guess grouping";
222 return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
225 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
226 os.exceptions(std::ostream::failbit);
228 const std::vector<Gna2Operation>
229 layers(gna2Model->Operations, gna2Model->Operations + gna2Model->NumberOfOperations);
232 // all offsets will be from this pointer
233 auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
234 auto offset = static_cast<uint64_t>(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
235 if (offset > gnaGraphSize) {
236 THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
237 << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
238 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
243 auto getTensorWithProperOffset = [&getOffsetFromBase](const Gna2Tensor& tensor) {
244 Gna2Tensor out = tensor;
245 out.Data = reinterpret_cast<void*>(getOffsetFromBase(tensor.Data));
249 auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep) {
251 out.elements_count = ep.elements_count;
252 out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
253 out.scaleFactor = ep.scaleFactor;
254 out.element_size = ep.element_size;
255 out.orientation = ep.orientation;
262 header.gnam[0] = 'G';
263 header.gnam[1] = 'N';
264 header.gnam[2] = 'A';
265 header.gnam[3] = 'M';
266 header.headerSize = sizeof(ModelHeader);
267 header.version.major = HEADER_MAJOR;
268 header.version.minor = HEADER_MINOR;
269 header.gnaMemSize = gnaGraphSize;
270 header.layersCount = layers.size();
271 header.nGroup = guessGrouping(*gna2Model);
272 header.nInputs = inputs.size();
273 header.nOutputs = outputs.size();
274 header.nRotateRows = nRotateRows;
275 header.nRotateColumns = nRotateColumns;
278 writeBits(header, os);
280 for (const auto &input : inputs) {
281 writeBits(convert_to_serial(input), os);
283 for (const auto &output : outputs) {
284 writeBits(convert_to_serial(output), os);
287 for (const auto & layer : layers) {
288 writeBits(static_cast<uint32_t>(layer.Type), os);
289 writeBits(layer.NumberOfOperands, os);
291 for (uint32_t i = 0; i < layer.NumberOfOperands; i++) {
292 if (layer.Operands[i] == nullptr)
293 writeBits(Gna2Tensor{}, os);
295 writeBits(getTensorWithProperOffset(*layer.Operands[i]), os);
298 writeBits(layer.NumberOfParameters, os);
300 // writing parameters
301 switch (layer.Type) {
302 case Gna2OperationTypeElementWiseAffine:
303 case Gna2OperationTypeFullyConnectedAffine:
304 case Gna2OperationTypeConvolution:
305 case Gna2OperationTypeCopy:
306 case Gna2OperationTypeTransposition:
308 case Gna2OperationTypeRecurrent:
309 THROW_GNA_EXCEPTION << "Exporting of recurrent operation not supported";
311 THROW_GNA_EXCEPTION << "Exporting of unknown GNA operation type(" << layer.Type << ") not supported";
313 for (uint32_t i = 0; i < layer.NumberOfParameters; i++) {
314 if (layer.Parameters[i] == nullptr) {
315 writeBits(static_cast<uint32_t>(0), os);
318 const auto paramSize = GnaParamSize.at(layer.Type).at(i);
319 writeBits(paramSize, os);
320 writeNBytes(layer.Parameters[i], paramSize, os);
323 // writing memory information
324 writeBits(static_cast<uint32_t>(states.size()), os);
325 for (auto && state : states) {
326 writeBits(offsetFromBase(state.first), os);
327 writeBits(state.second, os);
330 // once structure has been written lets push gna graph
331 os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
335 void GNAModelSerial::Import(void *basePointer,
338 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
339 std::vector<GNAPluginNS::OutputDesc> &desc,
340 InferenceEngine::InputsDataMap& inputsDataMap,
341 InferenceEngine::OutputsDataMap& outputsDataMap) {
342 is.exceptions(std::istream::failbit);
344 ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
345 ImportOutputs(is, basePointer, desc, outputsDataMap);
347 auto readPwl = [&is, basePointer](intel_pwl_func_t & value) {
348 readBits(value.nSegments, is);
349 if (value.nSegments != 0) {
350 readOffset(value.pSegments, basePointer, is);
352 value.pSegments = nullptr;
356 for (auto layer = ptr_nnet->pLayers; layer != ptr_nnet->pLayers + ptr_nnet->nLayers; ++layer) {
357 readBits(layer->nInputColumns, is);
358 readBits(layer->nInputRows, is);
359 readBits(layer->nOutputColumns, is);
360 readBits(layer->nOutputRows, is);
361 readBits(layer->nBytesPerInput, is);
362 readBits(layer->nBytesPerOutput, is);
363 readBits(layer->nBytesPerIntermediateOutput, is);
364 readNBits<32>(layer->nLayerKind, is);
366 // reading layers structs
367 switch (layer->nLayerKind) {
368 case INTEL_AFFINE_DIAGONAL:
370 layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
371 if (layer->pLayerStruct == nullptr) {
372 THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
375 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
376 readBits(affine.affine.nBytesPerWeight, is);
377 readBits(affine.affine.nBytesPerBias, is);
378 readOffset(affine.affine.pWeights, basePointer, is);
379 readOffset(affine.affine.pBiases, basePointer, is);
383 case INTEL_CONVOLUTIONAL: {
384 layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
385 if (layer->pLayerStruct == nullptr) {
386 THROW_GNA_EXCEPTION << "could not allocate memory for intel_convolutional_layer_t structure.";
389 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
390 readBits(convolution.nFilterCoefficients, is);
391 readBits(convolution.nBytesFilterCoefficient, is);
392 readBits(convolution.nBytesBias, is);
393 readBits(convolution.nFilters, is);
394 readBits(convolution.nFeatureMaps, is);
395 readBits(convolution.nFeatureMapRows, is);
396 readBits(convolution.nFeatureMapColumns, is);
397 readBits(convolution.nFilterRows, is);
398 readOffset(convolution.pFilters, basePointer, is);
399 readOffset(convolution.pBiases, basePointer, is);
400 readBits(convolution.nPoolSize, is);
401 readBits(convolution.nPoolStride, is);
402 readBits(convolution.poolType, is);
403 readPwl(convolution.pwl);
408 layer->pLayerStruct = _mm_malloc(sizeof(intel_copy_layer_t), 64);
409 if (layer->pLayerStruct == nullptr) {
410 THROW_GNA_EXCEPTION << "could not allocate memory for intel_copy_layer_t structure.";
413 auto © = *reinterpret_cast<intel_copy_layer_t *>(layer->pLayerStruct);
414 readBits(copy.nCopyRows, is);
415 readBits(copy.nCopyCols, is);
419 case INTEL_RECURRENT:
420 THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
421 case INTEL_INTERLEAVE:
422 THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
423 case INTEL_DEINTERLEAVE:
424 THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
426 THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ") not supported";
429 // reading offsets of inputs/outputs
430 readOffset(layer->pInputs, basePointer, is);
431 if (layer->nLayerKind == INTEL_COPY) {
432 layer->pOutputsIntermediate = nullptr;
434 readOffset(layer->pOutputsIntermediate, basePointer, is);
436 readOffset(layer->pOutputs, basePointer, is);
439 // writing memory information
440 uint32_t nStates = 0;
441 readBits(nStates, is);
442 if (pstates != nullptr) {
443 pstates->resize(nStates);
446 for (int i = 0; i != nStates; i++) {
448 readOffset(pSegment, basePointer, is);
450 readBits(segmentSz, is);
452 (*pstates)[i] = { pSegment, segmentSz };
457 // once structure has been read lets read whole gna graph
458 is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
464 * @param gnaAllocSize - it can be calculated based on nnet, however it will overcomplicate export
465 * about base adress it is relatively easy to calculate
469 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
470 os.exceptions(std::ostream::failbit);
472 std::vector<intel_nnet_layer_t>
473 layers(ptr_nnet->pLayers, ptr_nnet->pLayers + ptr_nnet->nLayers);
476 // all offsets will be from this pointer
477 auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
478 auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
479 if (offset > gnaGraphSize) {
480 THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
481 << ") not in range segment returned from GNAAlloc(0x" << basePointer << "-0x"
482 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
487 auto writePwl = [&os, getOffsetFromBase] (intel_pwl_func_t & value) {
488 writeBits(value.nSegments, os);
489 // export require certain offset, since offset from base to nullptr cannot be correct, we are not store it at all
490 if (value.nSegments != 0) {
491 writeBits(offsetFromBase(value.pSegments), os);
495 auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
497 out.elements_count = ep.elements_count;
498 out.element_size = ep.element_size;
499 out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
500 out.scaleFactor = ep.scaleFactor;
501 out.orientation = ep.orientation;
508 header.gnam[0] = 'G';
509 header.gnam[1] = 'N';
510 header.gnam[2] = 'A';
511 header.gnam[3] = 'M';
512 header.version.major = HEADER_MAJOR;
513 header.version.minor = HEADER_MINOR;
514 header.gnaMemSize = gnaGraphSize;
515 header.layersCount = layers.size();
516 header.nGroup = ptr_nnet->nGroup;
519 header.headerSize = sizeof(ModelHeader);
520 header.nRotateRows = nRotateRows;
521 header.nRotateColumns = nRotateColumns;
524 writeBits(header, os);
525 writeBits(convert_to_serial(inputs[0]), os);
526 writeBits(convert_to_serial(outputs[0]), os);
528 for (auto & layer : layers) {
529 writeBits(layer.nInputColumns, os);
530 writeBits(layer.nInputRows, os);
531 writeBits(layer.nOutputColumns, os);
532 writeBits(layer.nOutputRows, os);
533 writeBits(layer.nBytesPerInput, os);
534 writeBits(layer.nBytesPerOutput, os);
535 writeBits(layer.nBytesPerIntermediateOutput, os);
536 writeBits(static_cast<uint32_t>(layer.nLayerKind), os);
538 // writing layers structs
539 switch (layer.nLayerKind) {
540 case INTEL_AFFINE_DIAGONAL:
542 auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer.pLayerStruct);
543 writeBits(affine.affine.nBytesPerWeight, os);
544 writeBits(affine.affine.nBytesPerBias, os);
545 writeBits(offsetFromBase(affine.affine.pWeights), os);
546 writeBits(offsetFromBase(affine.affine.pBiases), os);
547 writePwl(affine.pwl);
550 case INTEL_CONVOLUTIONAL: {
551 auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer.pLayerStruct);
552 writeBits(convolution.nFilterCoefficients, os);
553 writeBits(convolution.nBytesFilterCoefficient, os);
554 writeBits(convolution.nBytesBias, os);
555 writeBits(convolution.nFilters, os);
556 writeBits(convolution.nFeatureMaps, os);
557 writeBits(convolution.nFeatureMapRows, os);
558 writeBits(convolution.nFeatureMapColumns, os);
559 writeBits(convolution.nFilterRows, os);
560 writeBits(offsetFromBase(convolution.pFilters), os);
561 writeBits(offsetFromBase(convolution.pBiases), os);
562 writeBits(convolution.nPoolSize, os);
563 writeBits(convolution.nPoolStride, os);
564 writeBits(convolution.poolType, os);
565 writePwl(convolution.pwl);
570 auto © = *reinterpret_cast<intel_copy_layer_t *>(layer.pLayerStruct);
571 writeBits(copy.nCopyRows, os);
572 writeBits(copy.nCopyCols, os);
576 case INTEL_RECURRENT:
577 THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
578 case INTEL_INTERLEAVE:
579 THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
580 case INTEL_DEINTERLEAVE:
581 THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
583 THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ") not supported";
586 // writing offsets from base.
587 writeBits(offsetFromBase(layer.pInputs), os);
588 if (layer.nLayerKind != INTEL_COPY) {
589 writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
591 writeBits(offsetFromBase(layer.pOutputs), os);
593 // writing memory information
594 writeBits(static_cast<uint32_t>(states.size()), os);
595 for (auto && state : states) {
596 writeBits(offsetFromBase(state.first), os);
597 writeBits(state.second, os);
600 // once structure has been written lets push gna graph
601 os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
606 std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
607 const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
608 std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
609 std::size_t outputIndex = 0;
610 for (auto const &output : outputsDataMap) {
611 auto outputName = output.first;
612 auto inputDims = output.second->getTensorDesc().getDims();
613 uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
615 GNAModelSerial::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
616 outputsDesc[outputIndex].ptrs[0],
617 outputsDesc[outputIndex].num_bytes_per_element,
619 outputsDesc[outputIndex].orientation);
620 endPoints.push_back(endPoint);
626 std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
627 std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
628 std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
630 std::size_t inputIndex = 0;
631 for (auto const& input : inputsDataMap) {
632 auto inputName = input.first;
633 auto inputDims = input.second->getTensorDesc().getDims();
635 double scaleFactor = inputDesc->getScaleFactor(inputIndex);
636 std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
637 IE_ASSERT(descriptor_ptr.size() > 0);
638 uint32_t element_size = 2u;
639 uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
640 intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
642 GNAModelSerial::RuntimeEndPoint endPoint(scaleFactor,
647 endPoints.push_back(endPoint);
653 void GNAModelSerial::ImportInputs(std::istream &is,
655 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
656 InferenceEngine::InputsDataMap& dataMap) {
659 for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
660 std::string name = "input" + std::to_string(inputIndex);
661 RuntimeEndPoint input;
662 is.read(reinterpret_cast<char *>(&input), sizeof(input));
663 inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
664 inputsDesc->orientation_in[name] = input.orientation;
666 auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
668 dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
669 dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
670 InferenceEngine::TensorDesc(
671 InferenceEngine::Precision::FP32,
673 InferenceEngine::Layout::NC)));
674 inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
678 void GNAModelSerial::ImportOutputs(std::istream &is,
680 std::vector<GNAPluginNS::OutputDesc> &desc,
681 InferenceEngine::OutputsDataMap& dataMap) {
684 desc.resize(modelHeader.nOutputs);
686 for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
687 std::string name = "output" + std::to_string(outputIndex);
688 RuntimeEndPoint output;
689 is.read(reinterpret_cast<char *>(&output), sizeof(output));
690 GNAPluginNS::OutputDesc description;
691 description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
692 description.orientation = kDnnInterleavedOrientation;
693 description.orientation = output.orientation;
694 description.num_bytes_per_element = output.element_size;
695 description.scale_factor = output.scaleFactor;
697 auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
698 dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
699 InferenceEngine::TensorDesc(
700 InferenceEngine::Precision::FP32,
702 InferenceEngine::Layout::NC));
703 desc.at(outputIndex) = description;
707 void GNAModelSerial::setHeader(ModelHeader header) {
708 modelHeader = header;