From 445fd6842356c576322cdcc1c94201c2170b0cfc Mon Sep 17 00:00:00 2001
From: Pavel Rodionov <pavel.rodionov@intel.com>
Date: Tue, 1 Sep 2020 19:36:56 +0300
Subject: [PATCH] [GNA] Improve ReadHeader (#1843)

* [GNA] Improve ReadHeader

Issue-34205

* [GNA[ Fix stack corruption issue in ReadHeader

* [GNA] fix code style

Co-authored-by: Kamil Magierski <kamil.magierski@intel.com>
---
 .../src/gna_plugin/gna_model_serial.cpp            |  92 ++++++++++-----
 .../src/gna_plugin/gna_model_serial.hpp            | 125 ++-------------------
 .../serial/headers/2dot1/gna_model_header.hpp      | 110 ++++++++++++++++++
 .../serial/headers/2dot2/gna_model_header.hpp      | 122 ++++++++++++++++++++
 .../serial/headers/latest/gna_model_header.hpp     |  14 +++
 .../unit/engines/gna/gna_graph_aot_test.cpp        |   2 +-
 6 files changed, 316 insertions(+), 149 deletions(-)
 create mode 100644 inference-engine/src/gna_plugin/serial/headers/2dot1/gna_model_header.hpp
 create mode 100644 inference-engine/src/gna_plugin/serial/headers/2dot2/gna_model_header.hpp
 create mode 100644 inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp
index 61a023b..977610d 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.cpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@@ -20,6 +20,9 @@
 
 #include "gna_plugin.hpp"
 #include "gna_model_serial.hpp"
+#include "serial/headers/latest/gna_model_header.hpp"
+
+using namespace GNAPluginNS;
 
 inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
     os.write(static_cast<const char*>(ptr), size);
@@ -69,11 +72,25 @@ bool is_little_endian() {
 
 const int gna_header_magic = is_little_endian() ?  0x4d414e47 : 0x474e414d;
 
-ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
+GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
     is.exceptions(std::istream::failbit);
-
-    ModelHeader header;
-    readBits(header, is);
+    is.seekg(0, is.end);
+    auto stream_len = is.tellg();
+    if (stream_len == -1) {
+        THROW_GNA_EXCEPTION << "Can't open file to import";
+    }
+    is.seekg(0, is.beg);
+
+    HeaderLatest::ModelHeader header;
+    header.version.major = 0u;
+    header.version.minor = 0u;
+    auto size_of_headers_header = sizeof(HeaderLatest::ModelHeader::gnam) + sizeof(HeaderLatest::ModelHeader::headerSize)
+                                + sizeof(HeaderLatest::ModelHeader::Version);
+    if (stream_len > size_of_headers_header) {
+        readNBytes(&header, size_of_headers_header, is);
+    } else {
+        readNBytes(&header, stream_len, is);
+    }
     if (*reinterpret_cast<int*>(header.gnam) != gna_header_magic) {
         THROW_GNA_EXCEPTION << "Imported file unsupported: magic number should be GNAM(0x474e414d), but was 0x"
                            << std::setfill('0') <<
@@ -82,12 +99,27 @@ ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
                            std::hex << std::setw(2) << static_cast<short>(header.gnam[2]) <<
                            std::hex << std::setw(2) << static_cast<short>(header.gnam[3]);
     }
-    if (header.version.major != HEADER_MAJOR) {
-        THROW_GNA_EXCEPTION << "Imported file unsupported: major version should be == " << HEADER_MAJOR;
-    }
-    if (header.headerSize < sizeof(header)) {
-        THROW_GNA_EXCEPTION << "Unsupported header size minimal value is : " << sizeof (header) << ", but read: " << header.headerSize;
+
+    is.seekg(0, is.beg);
+    Header2dot1::ModelHeader tempHeader2dot1;
+    switch (header.version.major) {
+        case 2:
+            switch (header.version.minor) {
+                case 1:
+                    readBits(tempHeader2dot1, is);
+                    header = Header2dot2::ModelHeader(tempHeader2dot1);
+                    break;
+                case 2:
+                    readBits(header, is);
+                    break;
+                default:
+                    THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should be equal to 1 or 2 and is: " << header.version.minor;
+            }
+            break;
+        default:
+            THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: " << header.version.major << " is not implemented";
     }
+
     /*
      * extra data need to be added into new header and modify check as appropriate
      */
@@ -249,8 +281,8 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
         return out;
     };
 
-    auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep) {
-        RuntimeEndPoint out;
+    auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep) {
+        HeaderLatest::RuntimeEndPoint out;
         out.elements_count = ep.elements_count;
         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
         out.scaleFactor = ep.scaleFactor;
@@ -261,14 +293,12 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
     /**
      * writing header
      */
-    ModelHeader header;
+    HeaderLatest::ModelHeader header;
     header.gnam[0] = 'G';
     header.gnam[1] = 'N';
     header.gnam[2] = 'A';
     header.gnam[3] = 'M';
-    header.headerSize = sizeof(ModelHeader);
-    header.version.major = HEADER_MAJOR;
-    header.version.minor = HEADER_MINOR;
+    header.headerSize = sizeof(HeaderLatest::ModelHeader);
     header.gnaMemSize = gnaGraphSize;
     header.layersCount = layers.size();
     header.nGroup = guessGrouping(*gna2Model);
@@ -496,8 +526,8 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
         }
     };
 
-    auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
-        RuntimeEndPoint out;
+    auto convert_to_serial = [getOffsetFromBase](const HeaderLatest::RuntimeEndPoint& ep){
+        HeaderLatest::RuntimeEndPoint out;
         out.elements_count = ep.elements_count;
         out.element_size = ep.element_size;
         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
@@ -508,19 +538,19 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
     /**
      * writing header
      */
-    ModelHeader header;
+    HeaderLatest::ModelHeader header;
     header.gnam[0] = 'G';
     header.gnam[1] = 'N';
     header.gnam[2] = 'A';
     header.gnam[3] = 'M';
-    header.version.major = HEADER_MAJOR;
-    header.version.minor = HEADER_MINOR;
+    header.version.major = 1u;
+    header.version.minor = 1u;
     header.gnaMemSize = gnaGraphSize;
     header.layersCount = layers.size();
     header.nGroup = ptr_nnet->nGroup;
     header.nInputs = 1;
     header.nOutputs = 1;
-    header.headerSize = sizeof(ModelHeader);
+    header.headerSize = sizeof(HeaderLatest::ModelHeader);
     header.nRotateRows = nRotateRows;
     header.nRotateColumns = nRotateColumns;
 
@@ -607,16 +637,16 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
 
 #endif
 
-std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
+std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
         const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
-    std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
+    std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
     std::size_t outputIndex = 0;
     for (auto const &output : outputsDataMap) {
         auto outputName = output.first;
         auto inputDims = output.second->getTensorDesc().getDims();
         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
 
-        GNAModelSerial::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
+        HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
                                                  outputsDesc[outputIndex].ptrs[0],
                                                  outputsDesc[outputIndex].num_bytes_per_element,
                                                  elementsCount,
@@ -627,9 +657,9 @@ std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeOutputs(co
     return endPoints;
 }
 
-std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
+std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
                                                                              std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
-    std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
+    std::vector<HeaderLatest::RuntimeEndPoint> endPoints;
 
     std::size_t inputIndex = 0;
     for (auto const& input : inputsDataMap) {
@@ -643,7 +673,7 @@ std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeInputs(con
         uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
         intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
 
-        GNAModelSerial::RuntimeEndPoint endPoint(scaleFactor,
+        HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
                                                  descriptor_ptr[0],
                                                  element_size,
                                                  elementsCount,
@@ -662,7 +692,7 @@ void GNAModelSerial::ImportInputs(std::istream &is,
 
     for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
         std::string name = "input" + std::to_string(inputIndex);
-        RuntimeEndPoint input;
+        HeaderLatest::RuntimeEndPoint input;
         is.read(reinterpret_cast<char *>(&input), sizeof(input));
         inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
         inputsDesc->orientation_in[name] = input.orientation;
@@ -689,9 +719,9 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
 
     for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
         std::string name = "output" + std::to_string(outputIndex);
-        RuntimeEndPoint output;
+        HeaderLatest::RuntimeEndPoint output;
         is.read(reinterpret_cast<char *>(&output), sizeof(output));
-        GNAPluginNS::OutputDesc description;
+        OutputDesc description;
         description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
         description.orientation = kDnnInterleavedOrientation;
         description.orientation = output.orientation;
@@ -708,6 +738,6 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
     }
 }
 
-void GNAModelSerial::setHeader(ModelHeader header) {
+void GNAModelSerial::setHeader(HeaderLatest::ModelHeader header) {
     modelHeader = header;
 }
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.hpp b/inference-engine/src/gna_plugin/gna_model_serial.hpp
index 065d682..6c295a7 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.hpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp
@@ -12,126 +12,17 @@
 #include "descriptions/gna_input_desc.hpp"
 #include "descriptions/gna_output_desc.hpp"
 #include "gna_plugin_log.hpp"
+#include "serial/headers/latest/gna_model_header.hpp"
 #if GNA_LIB_VER == 2
 #include "gna2-model-api.h"
 #endif
 
-#pragma pack(push, 1)
-
-/**
- * version history
- * 1.0 - basic support
- * 1.1 - added memory information
- * 2.0 - for use with GNA2 library
- * 2.1 - multiple i/o support
- */
-#if GNA_LIB_VER == 2
-#define HEADER_MAJOR 2
-#define HEADER_MINOR 1
-#else
-#define HEADER_MAJOR 1
-#define HEADER_MINOR 2
-#endif
-
-
-/**
- * @brief Header version 2.1
- */
-struct ModelHeader {
-    /**
-     *@brief MagicNumber â GNAM in ascii table, equals to hex 0x474e414d
-     */
-    char gnam[4];
-    /**
-     * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
-     * usually it is an indicator of working with version of model different that is current export function produce
-     */
-    uint32_t headerSize = 0u;
-    struct Version {
-        /**
-         * @details Version of format Major â unsigned int, ex: 0x0001
-         * every change in the header or in the layers definition should be reflected in version change
-         * for backward compatibility new parsers can read old versions of model with certain restrictions
-         */
-        uint16_t major = 0u;
-        /**
-         * @details Version of Format Minor â unsigned int,  corresponding to build revision for example
-         * changes in minor version are not affected layout of model
-         */
-        uint32_t minor = 0u;
-    } version;
-    /**
-     * @brief Memory required to be allocated using GNAAlloc()
-     */
-    uint64_t gnaMemSize = 0ull;
-    /**
-     * @brief Number of GNA Layers
-     */
-    uint64_t layersCount = 0ull;
-    /**
-     * @brief Grouping level
-     */
-    uint32_t nGroup = 0u;
-    /**
-     * Convolution related setting - they are affecting input transformation
-     */
-    uint32_t nRotateRows = 0u;
-    uint32_t nRotateColumns = 0u;
-    bool doRotateInput = false;
-
-    uint32_t nInputs = 0u;
-    uint32_t nOutputs = 0u;
-
-    /**
-     * Reserved Data might be here
-     */
-};
-#pragma pack(pop)
 
 /**
  * @brief implements serialisation tasks for GNAGraph
  */
 class GNAModelSerial {
  public:
-    /*
-     * In runtime endpoint mostly same as in serial version, except of descriptor field
-     */
-    struct RuntimeEndPoint {
-        /**
-         * if scale factor is different then pased into infer , network might need to be requantized
-         */
-        float scaleFactor = 0;
-        /**
-         * Pointer descriptor
-         */
-        void* descriptor_ptr = nullptr;
-        /**
-         * Endpoint resolution in bytes.
-         */
-        uint32_t element_size = 0;
-        /**
-         * Number of elements
-         */
-        uint32_t elements_count = 0;
-        /**
-         * Offset in bytes of pointer descriptor
-        */
-        uint64_t descriptor_offset = 0ull;
-
-        intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
-
-        RuntimeEndPoint() = default;
-        RuntimeEndPoint(double scaleFactor,
-                    void* descriptor_ptr,
-                    uint32_t element_size,
-                    uint32_t elements_count,
-                    intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
-                                    descriptor_ptr(descriptor_ptr),
-                                    element_size(element_size),
-                                    elements_count(elements_count),
-                                    orientation(orientation) {
-        }
-    };
     using MemoryType = std::vector<std::pair<void*, uint32_t>>;
 
 private:
@@ -140,14 +31,14 @@ private:
 #else
     intel_nnet_type_t *ptr_nnet;
 #endif
-    std::vector<RuntimeEndPoint> inputs;
-    std::vector<RuntimeEndPoint> outputs;
+    std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> inputs;
+    std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> outputs;
     uint32_t nRotateRows = 0;
     uint32_t nRotateColumns = 0;
     bool doRotateInput = false;
 
     MemoryType states, *pstates = nullptr;
-    ModelHeader modelHeader;
+    GNAPluginNS::HeaderLatest::ModelHeader modelHeader;
 
     void ImportInputs(std::istream &is,
             void* basePtr,
@@ -226,7 +117,7 @@ private:
      * @param is - opened input stream
      * @return
      */
-    static ModelHeader ReadHeader(std::istream &is);
+    static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is);
 
     /**
      * @brief Import model from FS into preallocated buffer,
@@ -254,12 +145,12 @@ private:
                 size_t gnaGraphSize,
                 std::ostream &os) const;
 
-    static std::vector<GNAModelSerial::RuntimeEndPoint> serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
+    static std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
             const std::vector<GNAPluginNS::OutputDesc>& outputsDesc);
 
 
-    static std::vector<GNAModelSerial::RuntimeEndPoint> serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
+    static std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
                                                                         const std::shared_ptr<GNAPluginNS::InputDesc>);
 
-    void setHeader(ModelHeader header);
+    void setHeader(GNAPluginNS::HeaderLatest::ModelHeader header);
 };
diff --git a/inference-engine/src/gna_plugin/serial/headers/2dot1/gna_model_header.hpp b/inference-engine/src/gna_plugin/serial/headers/2dot1/gna_model_header.hpp
new file mode 100644
index 0000000..3a1b384
--- /dev/null
+++ b/inference-engine/src/gna_plugin/serial/headers/2dot1/gna_model_header.hpp
@@ -0,0 +1,110 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cstdint>
+#include "backend/dnn_types.h"
+
+#pragma pack(push, 1)
+
+
+namespace GNAPluginNS {
+namespace Header2dot1 {
+
+
+/**
+ * @brief Header version 2.1
+ */
+struct ModelHeader {
+    /**
+     *@brief MagicNumber â GNAM in ascii table, equals to hex 0x474e414d
+     */
+    char gnam[4] = {};
+    /**
+     * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
+     * usually it is an indicator of working with version of model different that is current export function produce
+     */
+    uint32_t headerSize = 0u;
+    struct Version {
+        /**
+         * @details Version of format Major â unsigned int, ex: 0x0001
+         * every change in the header or in the layers definition should be reflected in version change
+         * for backward compatibility new parsers can read old versions of model with certain restrictions
+         */
+        uint16_t major = 2u;
+        /**
+         * @details Version of Format Minor â unsigned int,  corresponding to build revision for example
+         * changes in minor version are not affected layout of model
+         */
+        uint32_t minor = 1u;
+    } version;
+    /**
+     * @brief Memory required to be allocated using GNAAlloc()
+     */
+    uint64_t gnaMemSize = 0ull;
+    /**
+     * @brief Number of GNA Layers
+     */
+    uint64_t layersCount = 0ull;
+    /**
+     * @brief Grouping level
+     */
+    uint32_t nGroup = 0u;
+    /**
+     * Convolution related setting - they are affecting input transformation
+     */
+    uint32_t nRotateRows = 0u;
+    uint32_t nRotateColumns = 0u;
+
+    uint32_t nInputs = 0u;
+    uint32_t nOutputs = 0u;
+
+    /**
+     * Reserved Data might be here
+     */
+};
+#pragma pack(pop)
+
+/*
+ * In runtime endpoint mostly same as in serial version, except of descriptor field
+ */
+struct RuntimeEndPoint {
+    /**
+     * if scale factor is different then pased into infer , network might need to be requantized
+     */
+    float scaleFactor = 0;
+    /**
+     * Pointer descriptor
+     */
+    void* descriptor_ptr = nullptr;
+    /**
+     * Endpoint resolution in bytes.
+     */
+    uint32_t element_size = 0;
+    /**
+     * Number of elements
+     */
+    uint32_t elements_count = 0;
+    /**
+     * Offset in bytes of pointer descriptor
+    */
+    uint64_t descriptor_offset = 0ull;
+
+    intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
+
+    RuntimeEndPoint() = default;
+    RuntimeEndPoint(double scaleFactor,
+                    void* descriptor_ptr,
+                    uint32_t element_size,
+                    uint32_t elements_count,
+                    intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
+                                                           descriptor_ptr(descriptor_ptr),
+                                                           element_size(element_size),
+                                                           elements_count(elements_count),
+                                                           orientation(orientation) {
+    }
+};
+} // namespace Header2dot1
+} // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/serial/headers/2dot2/gna_model_header.hpp b/inference-engine/src/gna_plugin/serial/headers/2dot2/gna_model_header.hpp
new file mode 100644
index 0000000..14b5550
--- /dev/null
+++ b/inference-engine/src/gna_plugin/serial/headers/2dot2/gna_model_header.hpp
@@ -0,0 +1,122 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cstdint>
+#include "backend/dnn_types.h"
+#include "serial/headers/2dot1/gna_model_header.hpp"
+
+#pragma pack(push, 1)
+
+
+namespace GNAPluginNS {
+namespace Header2dot2 {
+
+
+/**
+ * @brief Header version 2.2
+ */
+struct ModelHeader {
+    /**
+     *@brief MagicNumber â GNAM in ascii table, equals to hex 0x474e414d
+     */
+    char gnam[4] = {};
+    /**
+     * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
+     * usually it is an indicator of working with version of model different that is current export function produce
+     */
+    uint32_t headerSize = 0u;
+    struct Version {
+        /**
+         * @details Version of format Major â unsigned int, ex: 0x0001
+         * every change in the header or in the layers definition should be reflected in version change
+         * for backward compatibility new parsers can read old versions of model with certain restrictions
+         */
+        uint16_t major = 2u;
+        /**
+         * @details Version of Format Minor â unsigned int,  corresponding to build revision for example
+         * changes in minor version are not affected layout of model
+         */
+        uint32_t minor = 2u;
+    } version;
+    /**
+     * @brief Memory required to be allocated using GNAAlloc()
+     */
+    uint64_t gnaMemSize = 0ull;
+    /**
+     * @brief Number of GNA Layers
+     */
+    uint64_t layersCount = 0ull;
+    /**
+     * @brief Grouping level
+     */
+    uint32_t nGroup = 0u;
+    /**
+     * Convolution related setting - they are affecting input transformation
+     */
+    uint32_t nRotateRows = 0u;
+    uint32_t nRotateColumns = 0u;
+    bool doRotateInput = false;
+
+    uint32_t nInputs = 0u;
+    uint32_t nOutputs = 0u;
+
+    /**
+     * Reserved Data might be here
+     */
+    ModelHeader() = default;
+    ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) {
+        gnaMemSize = old.gnaMemSize;
+        layersCount = old.layersCount;
+        nGroup = old.nGroup;
+        nRotateRows = old.nRotateRows;
+        nRotateColumns = old.nRotateColumns;
+        nInputs = old.nInputs;
+        nOutputs = old.nOutputs;
+    }
+};
+#pragma pack(pop)
+
+/*
+ * In runtime endpoint mostly same as in serial version, except of descriptor field
+ */
+struct RuntimeEndPoint {
+    /**
+     * if scale factor is different then pased into infer , network might need to be requantized
+     */
+    float scaleFactor = 0;
+    /**
+     * Pointer descriptor
+     */
+    void* descriptor_ptr = nullptr;
+    /**
+     * Endpoint resolution in bytes.
+     */
+    uint32_t element_size = 0;
+    /**
+     * Number of elements
+     */
+    uint32_t elements_count = 0;
+    /**
+     * Offset in bytes of pointer descriptor
+    */
+    uint64_t descriptor_offset = 0ull;
+
+    intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
+
+    RuntimeEndPoint() = default;
+    RuntimeEndPoint(double scaleFactor,
+                    void* descriptor_ptr,
+                    uint32_t element_size,
+                    uint32_t elements_count,
+                    intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
+                                                           descriptor_ptr(descriptor_ptr),
+                                                           element_size(element_size),
+                                                           elements_count(elements_count),
+                                                           orientation(orientation) {
+    }
+};
+} // namespace Header2dot2
+} // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp b/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
new file mode 100644
index 0000000..c5fc125
--- /dev/null
+++ b/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
@@ -0,0 +1,14 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "serial/headers/2dot2/gna_model_header.hpp"
+
+namespace GNAPluginNS {
+namespace HeaderLatest {
+using ModelHeader = GNAPluginNS::Header2dot2::ModelHeader;
+using RuntimeEndPoint = GNAPluginNS::Header2dot2::RuntimeEndPoint;
+}
+}
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
index eef17f3..798f49e 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
@@ -41,7 +41,7 @@ TEST_F(GNAAOTTests, DISABLED_AffineWith2AffineOutputs_canbe_export_imported) {
         .inNotCompactMode().gna().propagate_forward().called().once();
 }
 
-TEST_F(GNAAOTTests, AffineWith2AffineOutputs_canbe_imported_verify_structure) {
+TEST_F(GNAAOTTests, DISABLED_AffineWith2AffineOutputs_canbe_imported_verify_structure) {
 // Disabled because of random fails: Issue-23611
 #if GNA_LIB_VER == 1
     GTEST_SKIP();
-- 
2.7.4