[GNA] Fix a global buffer overflow in GNAModelSerial::Import (#3290) (#3327)

[platform/upstream/dldt.git] / inference-engine / src / gna_plugin / gna_model_serial.cpp
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp

index 84c7d3c..4ed6511 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_model_serial.cpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2019 Intel Corporation
+// Copyright (C) 2018-2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -7,12 +7,26 @@
  #include <details/ie_exception.hpp>
  #include <ios>
  #include <iomanip>
-#ifndef _WIN32
+#include <map>
+#include <ie_algorithm.hpp>
+#include <ie_common.h>
+#include <ie_precision.hpp>
+
+#if defined __INTEL_COMPILER || defined _MSC_VER
+#include <malloc.h>
+#else
  #include <mm_malloc.h>
  #endif
-#include <gna-api-types-xnn.h>
+
+#include "gna_plugin.hpp"
  #include "gna_model_serial.hpp"
-#include "gna_plugin_log.hpp"
+#include "serial/headers/latest/gna_model_header.hpp"
+
+using namespace GNAPluginNS;
+
+inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
+    os.write(static_cast<const char*>(ptr), size);
+}
  
  template <class T>
  inline void writeBits(const T & obj, std::ostream & os) {
@@ -24,6 +38,10 @@ inline void readBits(T & obj, std::istream & is) {
      is.read(reinterpret_cast<char *>(&obj), sizeof(T));
  }
  
+inline void readNBytes(void * ptr, uint32_t size, std::istream & is) {
+    is.read(reinterpret_cast<char *>(ptr), size);
+}
+
  template <int nBits, class T>
  inline void readNBits(T & obj, std::istream & is) {
      std::array<uint8_t, nBits / 8> tmp;
@@ -32,11 +50,15 @@ inline void readNBits(T & obj, std::istream & is) {
      obj = * reinterpret_cast<T*>(&tmp.front());
  }
  
+inline void * offsetToPointer(void * const base, uint64_t offset) {
+    return reinterpret_cast<uint8_t *>(base) + offset;
+}
+
  template <class T>
  inline void readOffset(T & ptr, void *base,  std::istream & is) {
      uint64_t offset = 0ull;
      readBits(offset, is);
-    ptr = reinterpret_cast<T>(reinterpret_cast<uint8_t *>(base) + offset);
+    ptr = reinterpret_cast<T>(offsetToPointer(base, offset));
  }
  
  union {
@@ -50,11 +72,25 @@ bool is_little_endian() {
  
  const int gna_header_magic = is_little_endian() ?  0x4d414e47 : 0x474e414d;
  
-ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
+GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
      is.exceptions(std::istream::failbit);
-
-    ModelHeader header;
-    readBits(header, is);
+    is.seekg(0, is.end);
+    auto stream_len = is.tellg();
+    if (stream_len == -1) {
+        THROW_GNA_EXCEPTION << "Can't open file to import";
+    }
+    is.seekg(0, is.beg);
+
+    HeaderLatest::ModelHeader header;
+    header.version.major = 0u;
+    header.version.minor = 0u;
+    auto size_of_headers_header = sizeof(HeaderLatest::ModelHeader::gnam) + sizeof(HeaderLatest::ModelHeader::headerSize)
+                                + sizeof(HeaderLatest::ModelHeader::Version);
+    if (stream_len > size_of_headers_header) {
+        readNBytes(&header, size_of_headers_header, is);
+    } else {
+        readNBytes(&header, stream_len, is);
+    }
      if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
          THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
                             << std::setfill('0') <<
@@ -63,12 +99,28 @@ ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
                             std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
                             std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
      }
-    if (header.version.major < 1) {
-        THROW_GNA_EXCEPTION << "Imported file unsupported: major version sould be > 1";
-    }
-    if (header.headerSize < sizeof(header)) {
-        THROW_GNA_EXCEPTION << "Unsupported header size minimal value is : " << sizeof (header) << ", but read: " << header.headerSize;
+
+    is.seekg(0, is.beg);
+    Header2dot1::ModelHeader tempHeader2dot1;
+    switch (header.version.major) {
+        case 2:
+            switch (header.version.minor) {
+                case 1:
+                    readBits(tempHeader2dot1, is);
+                    header = Header2dot3::ModelHeader(tempHeader2dot1);
+                    break;
+                case 2:
+                case 3:
+                    readBits(header, is);
+                    break;
+                default:
+                    THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should be equal to 1 or 2 and is: " << header.version.minor;
+            }
+            break;
+        default:
+            THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: " << header.version.major << " is not implemented";
      }
+
      /*
       * extra data need to be added into new header and modify check as appropriate
       */
@@ -80,10 +132,287 @@ ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
      return header;
  }
  
-void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize,  std::istream & is) {
+#define offsetFromBase(field)\
+getOffsetFromBase(field, #field)
+
+#if GNA_LIB_VER == 2
+
+bool IsEmptyTensor(const Gna2Tensor& t) {
+    return t.Type == Gna2DataTypeNone &&
+        t.Data == nullptr &&
+        t.Layout[0] == '\0' &&
+        t.Mode == Gna2TensorModeDefault &&
+        t.Shape.NumberOfDimensions == 0;
+}
+
+const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
+    {Gna2OperationTypeFullyConnectedAffine, {sizeof(Gna2BiasMode), sizeof(uint32_t)}},
+    {Gna2OperationTypeConvolution, {
+        sizeof(Gna2Shape),
+        sizeof(Gna2BiasMode),
+        sizeof(Gna2PoolingMode),
+        sizeof(Gna2Shape),
+        sizeof(Gna2Shape),
+        sizeof(Gna2Shape)}},
+    {Gna2OperationTypeCopy, {sizeof(Gna2Shape)}},
+    {Gna2OperationTypeTransposition, {sizeof(Gna2Shape)}},
+};
+
+void GNAModelSerial::Import(void *basePointer,
+        size_t gnaGraphSize,
+        std::istream & is,
+        std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
+        std::vector<GNAPluginNS::OutputDesc> &desc,
+        InferenceEngine::InputsDataMap& inputsDataMap,
+        InferenceEngine::OutputsDataMap& outputsDataMap) {
+    is.exceptions(std::istream::failbit);
+
+    if (modelHeader.version.major == 2) {
+        if (modelHeader.version.minor >= 3) {
+            for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
+                uint32_t nameSize = 0;
+                readNBits<32>(nameSize, is);
+                std::string inName(nameSize, '\0');
+                readNBytes(&inName[0], nameSize, is);
+                inputNames.push_back(inName.substr(0, nameSize - 1));
+            }
+        }
+    }
+    ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
+
+    if (modelHeader.version.major == 2) {
+        if (modelHeader.version.minor >= 3) {
+            for (auto inputIndex = 0; inputIndex < modelHeader.nOutputs; inputIndex++) {
+                uint32_t nameSize = 0;
+                readNBits<32>(nameSize, is);
+                std::string outName(nameSize, '\0');
+                readNBytes(&outName[0], nameSize, is);
+                outputNames.push_back(outName.substr(0, nameSize - 1));
+            }
+        }
+    }
+    ImportOutputs(is, basePointer, desc, outputsDataMap);
+
+    for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
+        readNBits<32>(operation->Type, is);
+        readBits(operation->NumberOfOperands, is);
+        operation->Operands = static_cast<Gna2Tensor const **>(gnaUserAllocator(sizeof(Gna2Tensor*) * operation->NumberOfOperands));
+        IE_ASSERT(operation->Operands != nullptr);
+        for (uint32_t i = 0; i < operation->NumberOfOperands; i++) {
+            Gna2Tensor t{};
+            readBits(t, is);
+            if (IsEmptyTensor(t)) {
+                operation->Operands[i] = nullptr;
+            } else {
+                operation->Operands[i] = static_cast<Gna2Tensor const *>(gnaUserAllocator(sizeof(Gna2Tensor)));
+                t.Data = offsetToPointer(basePointer, reinterpret_cast<uint64_t>(t.Data));
+                const_cast<Gna2Tensor&>(*operation->Operands[i]) = t;
+            }
+        }
+        readBits(operation->NumberOfParameters, is);
+        switch (operation->Type) {
+        case Gna2OperationTypeElementWiseAffine:
+        case Gna2OperationTypeFullyConnectedAffine:
+        case Gna2OperationTypeConvolution:
+        case Gna2OperationTypeCopy:
+        case Gna2OperationTypeTransposition:
+            break;
+        case Gna2OperationTypeRecurrent:
+            THROW_GNA_EXCEPTION << "Importing of recurrent operation not supported";
+        default:
+            THROW_GNA_EXCEPTION << "Importing of unknown GNA operation type(" << operation->Type << ")  not supported";
+        }
+        if (operation->NumberOfParameters > 0)
+            operation->Parameters = static_cast<void **>(gnaUserAllocator(sizeof(void*) * operation->NumberOfParameters));
+        else
+            operation->Parameters = nullptr;
+        for (uint32_t i = 0; i < operation->NumberOfParameters; i++) {
+            uint32_t paramSize = 0;
+            readBits(paramSize, is);
+            IE_ASSERT(operation->Parameters != nullptr);
+            if (paramSize == 0) {
+                IE_ASSERT(operation->Parameters != nullptr);
+                operation->Parameters[i] = nullptr;
+                continue;
+            }
+            operation->Parameters[i] = gnaUserAllocator(paramSize);
+            readNBytes(operation->Parameters[i], paramSize, is);
+
+            if (GnaParamSize.at(operation->Type).size() <= i) {
+                THROW_GNA_EXCEPTION << "Cannot import parameter of index: " << i;
+            }
+            if (paramSize != GnaParamSize.at(operation->Type).at(i)) {
+                THROW_GNA_EXCEPTION << "Parameter size mismatch on import: " << i;
+            }
+        }
+    }
+
+    // writing memory information
+    uint32_t nStates = 0;
+    readBits(nStates, is);
+    if (pstates != nullptr) {
+        pstates->resize(nStates);
+    }
+
+    for (int i = 0; i != nStates; i++) {
+        void *pSegment;
+        readOffset(pSegment, basePointer, is);
+        uint32_t segmentSz;
+        readBits(segmentSz, is);
+        if (pstates) {
+            (*pstates)[i] = { pSegment, segmentSz };
+        }
+    }
+
+
+    // once structure has been read lets read whole gna graph
+    is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
+}
+
+
+uint32_t guessGrouping(Gna2Model const& model) {
+    if (model.NumberOfOperations == 0 ||
+        model.Operations == nullptr ||
+        model.Operations[0].Operands == nullptr ||
+        model.Operations[0].NumberOfOperands == 0 ||
+        model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
+        THROW_GNA_EXCEPTION << "Can not guess grouping";
+    }
+    return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
+}
+
+void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
+    os.exceptions(std::ostream::failbit);
+
+    const std::vector<Gna2Operation>
+        layers(gna2Model->Operations, gna2Model->Operations + gna2Model->NumberOfOperations);
+
+
+    // all offsets will be from this pointer
+    auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
+        auto offset = static_cast<uint64_t>(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
+        if (offset > gnaGraphSize) {
+            THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
+                << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
+                << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
+        }
+        return offset;
+    };
+
+    auto getTensorWithProperOffset = [&getOffsetFromBase](const Gna2Tensor& tensor) {
+        Gna2Tensor out = tensor;
+        out.Data = reinterpret_cast<void*>(getOffsetFromBase(tensor.Data));
+        return out;
+    };
+
+    auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep) {
+        HeaderLatest::RuntimeEndPoint out;
+        out.elements_count = ep.elements_count;
+        out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
+        out.scaleFactor = ep.scaleFactor;
+        out.element_size = ep.element_size;
+        out.orientation = ep.orientation;
+        return out;
+    };
+    /**
+     * writing header
+     */
+    HeaderLatest::ModelHeader header;
+    header.gnam[0] = 'G';
+    header.gnam[1] = 'N';
+    header.gnam[2] = 'A';
+    header.gnam[3] = 'M';
+    header.headerSize = sizeof(HeaderLatest::ModelHeader);
+    header.gnaMemSize = gnaGraphSize;
+    header.layersCount = layers.size();
+    header.nGroup = guessGrouping(*gna2Model);
+    header.nInputs = inputs.size();
+    header.nOutputs = outputs.size();
+    header.nRotateRows = nRotateRows;
+    header.nRotateColumns = nRotateColumns;
+    header.doRotateInput = doRotateInput;
+
+
+    writeBits(header, os);
+
+    for (auto &name : inputNames) {
+        const auto nameSize = strlen(name.c_str()) + 1;
+        writeBits(static_cast<uint32_t>(nameSize), os);
+        writeNBytes(name.c_str(), nameSize , os);
+    }
+    for (const auto &input : inputs) {
+        writeBits(convert_to_serial(input), os);
+    }
+    for (auto &name : outputNames) {
+        const auto nameSize = strlen(name.c_str()) + 1;
+        writeBits(static_cast<uint32_t>(nameSize), os);
+        writeNBytes(name.c_str(), nameSize, os);
+    }
+    for (const auto &output : outputs) {
+        writeBits(convert_to_serial(output), os);
+    }
+
+    for (const auto & layer : layers) {
+        writeBits(static_cast<uint32_t>(layer.Type), os);
+        writeBits(layer.NumberOfOperands, os);
+
+        for (uint32_t i = 0; i < layer.NumberOfOperands; i++) {
+            if (layer.Operands[i] == nullptr)
+                writeBits(Gna2Tensor{}, os);
+            else
+                writeBits(getTensorWithProperOffset(*layer.Operands[i]), os);
+        }
+
+        writeBits(layer.NumberOfParameters, os);
+
+        // writing parameters
+        switch (layer.Type) {
+        case Gna2OperationTypeElementWiseAffine:
+        case Gna2OperationTypeFullyConnectedAffine:
+        case Gna2OperationTypeConvolution:
+        case Gna2OperationTypeCopy:
+        case Gna2OperationTypeTransposition:
+            break;
+        case Gna2OperationTypeRecurrent:
+            THROW_GNA_EXCEPTION << "Exporting of recurrent operation not supported";
+        default:
+            THROW_GNA_EXCEPTION << "Exporting of unknown GNA operation type(" << layer.Type << ")  not supported";
+        }
+        for (uint32_t i = 0; i < layer.NumberOfParameters; i++) {
+            if (layer.Parameters[i] == nullptr) {
+                writeBits(static_cast<uint32_t>(0), os);
+                continue;
+            }
+            const auto paramSize = GnaParamSize.at(layer.Type).at(i);
+            writeBits(paramSize, os);
+            writeNBytes(layer.Parameters[i], paramSize, os);
+        }
+    }
+    // writing memory information
+    writeBits(static_cast<uint32_t>(states.size()), os);
+    for (auto && state : states) {
+        writeBits(offsetFromBase(state.first), os);
+        writeBits(state.second, os);
+    }
+
+    // once structure has been written lets push gna graph
+    os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
+}
+#else
+
+void GNAModelSerial::Import(void *basePointer,
+        size_t gnaGraphSize,
+        std::istream & is,
+        std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
+        std::vector<GNAPluginNS::OutputDesc> &desc,
+        InferenceEngine::InputsDataMap& inputsDataMap,
+        InferenceEngine::OutputsDataMap& outputsDataMap) {
      is.exceptions(std::istream::failbit);
  
-    auto readPwl = [&is, basePointer] (intel_pwl_func_t & value) {
+    ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
+    ImportOutputs(is, basePointer, desc, outputsDataMap);
+
+    auto readPwl = [&is, basePointer](intel_pwl_func_t & value) {
          readBits(value.nSegments, is);
          if (value.nSegments != 0) {
              readOffset(value.pSegments, basePointer, is);
@@ -104,60 +433,74 @@ void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize,  std::istrea
  
          // reading layers structs
          switch (layer->nLayerKind) {
-            case INTEL_AFFINE_DIAGONAL:
-            case INTEL_AFFINE: {
-                layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
-                if (layer->pLayerStruct == nullptr) {
-                    THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
-                }
-
-                auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
-                readBits(affine.affine.nBytesPerWeight, is);
-                readBits(affine.affine.nBytesPerBias, is);
-                readOffset(affine.affine.pWeights, basePointer, is);
-                readOffset(affine.affine.pBiases, basePointer, is);
-                readPwl(affine.pwl);
-                break;
+        case INTEL_AFFINE_DIAGONAL:
+        case INTEL_AFFINE: {
+            layer->pLayerStruct = _mm_malloc(sizeof(intel_affine_layer_t), 64);
+            if (layer->pLayerStruct == nullptr) {
+                THROW_GNA_EXCEPTION << "could not allocate memory for intel_affine_layer_t structure.";
              }
-            case INTEL_CONVOLUTIONAL: {
-                layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
-                if (layer->pLayerStruct == nullptr) {
-                    THROW_GNA_EXCEPTION <<"could not allocate memory for intel_convolutional_layer_t structure.";
-                }
-
-                auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
-                readBits(convolution.nFilterCoefficients, is);
-                readBits(convolution.nBytesFilterCoefficient, is);
-                readBits(convolution.nBytesBias, is);
-                readBits(convolution.nFilters, is);
-                readBits(convolution.nFeatureMaps, is);
-                readBits(convolution.nFeatureMapRows, is);
-                readBits(convolution.nFeatureMapColumns, is);
-                readBits(convolution.nFilterRows, is);
-                readOffset(convolution.pFilters, basePointer, is);
-                readOffset(convolution.pBiases, basePointer, is);
-                readBits(convolution.nPoolSize, is);
-                readBits(convolution.nPoolStride, is);
-                readBits(convolution.poolType, is);
-                readPwl(convolution.pwl);
-                break;
+
+            auto &affine = *reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
+            readBits(affine.affine.nBytesPerWeight, is);
+            readBits(affine.affine.nBytesPerBias, is);
+            readOffset(affine.affine.pWeights, basePointer, is);
+            readOffset(affine.affine.pBiases, basePointer, is);
+            readPwl(affine.pwl);
+            break;
+        }
+        case INTEL_CONVOLUTIONAL: {
+            layer->pLayerStruct = _mm_malloc(sizeof(intel_convolutional_layer_t), 64);
+            if (layer->pLayerStruct == nullptr) {
+                THROW_GNA_EXCEPTION << "could not allocate memory for intel_convolutional_layer_t structure.";
              }
  
-            case INTEL_RECURRENT:
-                THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
-            case INTEL_INTERLEAVE:
-                THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
-            case INTEL_DEINTERLEAVE:
-                THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
-            case INTEL_COPY:
-                THROW_GNA_EXCEPTION << "Importing of copy layer not supported";
-            default:
-                THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ")  not supported";
+            auto &convolution = *reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
+            readBits(convolution.nFilterCoefficients, is);
+            readBits(convolution.nBytesFilterCoefficient, is);
+            readBits(convolution.nBytesBias, is);
+            readBits(convolution.nFilters, is);
+            readBits(convolution.nFeatureMaps, is);
+            readBits(convolution.nFeatureMapRows, is);
+            readBits(convolution.nFeatureMapColumns, is);
+            readBits(convolution.nFilterRows, is);
+            readOffset(convolution.pFilters, basePointer, is);
+            readOffset(convolution.pBiases, basePointer, is);
+            readBits(convolution.nPoolSize, is);
+            readBits(convolution.nPoolStride, is);
+            readBits(convolution.poolType, is);
+            readPwl(convolution.pwl);
+            break;
+        }
+
+        case INTEL_COPY: {
+            layer->pLayerStruct = _mm_malloc(sizeof(intel_copy_layer_t), 64);
+            if (layer->pLayerStruct == nullptr) {
+                THROW_GNA_EXCEPTION << "could not allocate memory for intel_copy_layer_t structure.";
+            }
+
+            auto &copy = *reinterpret_cast<intel_copy_layer_t *>(layer->pLayerStruct);
+            readBits(copy.nCopyRows, is);
+            readBits(copy.nCopyCols, is);
+            break;
+        }
+
+        case INTEL_RECURRENT:
+            THROW_GNA_EXCEPTION << "Importing of recurrent layer not supported";
+        case INTEL_INTERLEAVE:
+            THROW_GNA_EXCEPTION << "Importing of interleave layer not supported";
+        case INTEL_DEINTERLEAVE:
+            THROW_GNA_EXCEPTION << "Importing of deinterleave layer not supported";
+        default:
+            THROW_GNA_EXCEPTION << "Importing of unknown GNA layer kind(" << layer->nLayerKind << ")  not supported";
          }
  
          // reading offsets of inputs/outputs
          readOffset(layer->pInputs, basePointer, is);
-        readOffset(layer->pOutputsIntermediate, basePointer, is);
+        if (layer->nLayerKind == INTEL_COPY) {
+            layer->pOutputsIntermediate = nullptr;
+        } else {
+            readOffset(layer->pOutputsIntermediate, basePointer, is);
+        }
          readOffset(layer->pOutputs, basePointer, is);
      }
  
@@ -169,13 +512,13 @@ void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize,  std::istrea
      }
  
      for (int i = 0; i != nStates; i++) {
-       void *pSegment;
-       readOffset(pSegment, basePointer, is);
-       uint32_t segmentSz;
-       readBits(segmentSz, is);
-       if (pstates) {
-           (*pstates)[i] = {pSegment, segmentSz};
-       }
+        void *pSegment;
+        readOffset(pSegment, basePointer, is);
+        uint32_t segmentSz;
+        readBits(segmentSz, is);
+        if (pstates) {
+            (*pstates)[i] = { pSegment, segmentSz };
+        }
      }
  
  
@@ -183,10 +526,6 @@ void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize,  std::istrea
      is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
  }
  
-#define offsetFromBase(field)\
-getOffsetFromBase(field, #field)
-
-
  /**
   *
   * @param ptr_nnet
@@ -194,6 +533,7 @@ getOffsetFromBase(field, #field)
   * about base adress it is relatively easy to calculate
   * @param os
   */
+
  void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
      os.exceptions(std::ostream::failbit);
  
@@ -206,7 +546,7 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
          auto offset = static_cast<uint64_t >(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
          if (offset > gnaGraphSize) {
              THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
-                               << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
+                               << ") not in range segment returned from GNAAlloc(0x" << basePointer << "-0x"
                                 << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
          }
          return offset;
@@ -220,35 +560,38 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
          }
      };
  
-    auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
-        ModelHeader::EndPoint out;
+    auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep){
+        HeaderLatest::RuntimeEndPoint out;
          out.elements_count = ep.elements_count;
          out.element_size = ep.element_size;
          out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
          out.scaleFactor = ep.scaleFactor;
+        out.orientation = ep.orientation;
          return out;
      };
      /**
       * writing header
       */
-    ModelHeader header;
+    HeaderLatest::ModelHeader header;
      header.gnam[0] = 'G';
      header.gnam[1] = 'N';
      header.gnam[2] = 'A';
      header.gnam[3] = 'M';
-    header.version.major = HEADER_MAJOR;
-    header.version.minor = HEADER_MINOR;
+    header.version.major = 1u;
+    header.version.minor = 1u;
      header.gnaMemSize = gnaGraphSize;
      header.layersCount = layers.size();
      header.nGroup = ptr_nnet->nGroup;
-    header.input  = convert_to_serial(input);
-    header.output = convert_to_serial(output);
-    header.headerSize = sizeof(ModelHeader);
+    header.nInputs = 1;
+    header.nOutputs = 1;
+    header.headerSize = sizeof(HeaderLatest::ModelHeader);
      header.nRotateRows = nRotateRows;
      header.nRotateColumns = nRotateColumns;
  
  
      writeBits(header, os);
+    writeBits(convert_to_serial(inputs[0]), os);
+    writeBits(convert_to_serial(outputs[0]), os);
  
      for (auto & layer : layers) {
          writeBits(layer.nInputColumns, os);
@@ -291,21 +634,28 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
                  break;
              }
  
+            case INTEL_COPY: {
+                auto &copy = *reinterpret_cast<intel_copy_layer_t *>(layer.pLayerStruct);
+                writeBits(copy.nCopyRows, os);
+                writeBits(copy.nCopyCols, os);
+                break;
+            }
+
              case INTEL_RECURRENT:
                  THROW_GNA_EXCEPTION << "Exporting of recurrent layer not supported";
              case INTEL_INTERLEAVE:
                  THROW_GNA_EXCEPTION << "Exporting of interleave layer not supported";
              case INTEL_DEINTERLEAVE:
                  THROW_GNA_EXCEPTION << "Exporting of deinterleave layer not supported";
-            case INTEL_COPY:
-                THROW_GNA_EXCEPTION << "Exporting of copy layer not supported";
              default:
                  THROW_GNA_EXCEPTION << "Exporting of unknown GNA layer kind(" << layer.nLayerKind << ")  not supported";
          }
  
          // writing offsets from base.
          writeBits(offsetFromBase(layer.pInputs), os);
-        writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
+        if (layer.nLayerKind != INTEL_COPY) {
+            writeBits(offsetFromBase(layer.pOutputsIntermediate), os);
+        }
          writeBits(offsetFromBase(layer.pOutputs), os);
      }
      // writing memory information
@@ -318,3 +668,113 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
      // once structure has been written lets push gna graph
      os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
  }
+
+#endif
+
+std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
+        const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
+    std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
+    std::size_t outputIndex = 0;
+    for (auto const &output : outputsDataMap) {
+        auto outputName = output.first;
+        auto inputDims = output.second->getTensorDesc().getDims();
+        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
+
+        HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
+                                                 outputsDesc[outputIndex].ptrs[0],
+                                                 outputsDesc[outputIndex].num_bytes_per_element,
+                                                 elementsCount,
+                                                 outputsDesc[outputIndex].orientation);
+        endPoints.push_back(endPoint);
+        outputIndex++;
+    }
+    return endPoints;
+}
+
+std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
+                                                                             std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
+    std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
+
+    std::size_t inputIndex = 0;
+    for (auto const& input : inputsDataMap) {
+        auto inputName = input.first;
+        auto inputDims = input.second->getTensorDesc().getDims();
+
+        double scaleFactor = inputDesc->getScaleFactor(inputIndex);
+        std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
+        IE_ASSERT(descriptor_ptr.size() > 0);
+        uint32_t element_size = 2u;
+        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
+        intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
+
+        HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
+                                                 descriptor_ptr[0],
+                                                 element_size,
+                                                 elementsCount,
+                                                 orientation);
+        endPoints.push_back(endPoint);
+        inputIndex++;
+    }
+    return endPoints;
+}
+
+void GNAModelSerial::ImportInputs(std::istream &is,
+        void* basePtr,
+        std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
+        InferenceEngine::InputsDataMap& dataMap) {
+    dataMap.clear();
+
+    for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
+        const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
+                ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
+        HeaderLatest::RuntimeEndPoint input;
+        is.read(reinterpret_cast<char *>(&input), sizeof(input));
+        inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
+        inputsDesc->orientation_in[name] = input.orientation;
+        inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
+
+        auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
+
+        dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
+        dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
+                                                            InferenceEngine::TensorDesc(
+                                                                    InferenceEngine::Precision::FP32,
+                                                                    inputDims,
+                                                                    InferenceEngine::Layout::NC)));
+        inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
+    }
+}
+
+void GNAModelSerial::ImportOutputs(std::istream &is,
+        void* basePtr,
+        std::vector<GNAPluginNS::OutputDesc> &desc,
+        InferenceEngine::OutputsDataMap& dataMap) {
+    desc.clear();
+    dataMap.clear();
+    desc.resize(modelHeader.nOutputs);
+
+    for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
+        const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
+                                  ? outputNames.at(outputIndex) : std::string("input" + std::to_string(outputIndex));
+        HeaderLatest::RuntimeEndPoint output;
+        is.read(reinterpret_cast<char *>(&output), sizeof(output));
+        OutputDesc description;
+        description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
+        description.orientation = kDnnInterleavedOrientation;
+        description.orientation = output.orientation;
+        description.num_bytes_per_element = output.element_size;
+        description.scale_factor = output.scaleFactor;
+
+        auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
+        dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
+                                                 InferenceEngine::TensorDesc(
+                                                         InferenceEngine::Precision::FP32,
+                                                         outputDims,
+                                                         InferenceEngine::Layout::NC));
+        desc.at(outputIndex) = description;
+    }
+}
+
+void GNAModelSerial::setHeader(HeaderLatest::ModelHeader header) {
+    modelHeader = header;
+}