From 166ec394bd09ad8618d598165ad6b9800748c4b7 Mon Sep 17 00:00:00 2001
From: Pavel Rodionov <pavel.rodionov@intel.com>
Date: Thu, 3 Sep 2020 14:42:30 +0300
Subject: [PATCH] [GNA] Move old headers (#2002)

---
 .../src/gna_plugin/backend/am_intel_dnn.cpp        |  29 ++--
 .../src/gna_plugin/backend/am_intel_dnn.hpp        |   5 +-
 .../src/gna_plugin/backend/dnn_types.h             |   4 +-
 .../src/gna_plugin/backend/gna_types.h             | 164 +++++++++++++++++++++
 .../src/gna_plugin/backend/make_pwl.cpp            |   3 +-
 .../src/gna_plugin/backend/make_pwl.hpp            |   3 +-
 .../src/gna_plugin/frontend/layer_quantizer.hpp    |   6 +-
 .../src/gna_plugin/frontend/quantization.cpp       |   3 +-
 .../src/gna_plugin/frontend/quantization.h         |  10 +-
 .../src/gna_plugin/gna_api_wrapper.hpp             |   4 +-
 .../src/gna_plugin/gna_graph_compiler.cpp          |  19 +--
 inference-engine/src/gna_plugin/gna_helper.cpp     |  27 +---
 .../src/gna_plugin/gna_lib_ver_selector.hpp        |  32 +++-
 .../src/gna_plugin/gna_model_serial.hpp            |   1 -
 .../src/gna_plugin/layers/gna_layer_info.hpp       |   3 +-
 .../src/gna_plugin/memory/gna_memory.hpp           |   3 +-
 .../src/gna_plugin/optimizer/gna_pass_manager.cpp  |   1 -
 inference-engine/src/gna_plugin/runtime/pwl.cpp    |   7 +-
 inference-engine/src/gna_plugin/runtime/pwl.h      |   5 +-
 .../unit/engines/gna/gna_graph_aot_test.cpp        |   4 +-
 .../engines/gna/gna_hardware_precision_test.cpp    |   2 +-
 .../unit/engines/gna/gna_matcher.cpp               |   2 +-
 .../unit/engines/gna/gna_matcher.hpp               |  10 +-
 .../unit/engines/gna/i16_quantisation_test.cpp     |   2 +-
 .../unit/engines/gna/matchers/conv_matcher.hpp     |   8 +-
 .../unit/engines/gna/matchers/copy_matcher.hpp     |   9 +-
 .../unit/engines/gna/matchers/diag_matcher.hpp     |  11 +-
 .../unit/engines/gna/matchers/fill_with_data.hpp   |   5 +-
 .../engines/gna/matchers/input_data_matcher.hpp    |   5 +-
 .../engines/gna/matchers/nnet_base_matcher.hpp     |  14 +-
 .../unit/engines/gna/matchers/pool_matcher.hpp     |   8 +-
 .../engines/gna/matchers/precision_matcher.hpp     |   9 +-
 .../unit/engines/gna/matchers/pwl_matcher.hpp      |  12 +-
 .../matchers/pwl_quantization_metrics_matcher.hpp  |  11 +-
 .../unit/engines/gna/matchers/weights_matcher.hpp  |  18 +--
 35 files changed, 324 insertions(+), 135 deletions(-)
 create mode 100644 inference-engine/src/gna_plugin/backend/gna_types.h

diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
index 21bf3f5..b8f77e3 100644
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -22,6 +22,7 @@
 #include "dnn.hpp"
 #include "am_intel_dnn.hpp"
 #include "dnn_types.h"
+#include "gna_types.h"
 
 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>
@@ -288,7 +289,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
                                                      float input_scale_factor,
                                                      void *&ptr_inputs,
                                                      void *&ptr_outputs,
-                                                     intel_pwl_segment_t *ptr_segments,
+                                                     gna_pwl_segment_t *ptr_segments,
                                                      bool postInitMem) {
     comp.num_rows_in = num_rows;
     comp.num_columns_in = num_columns;
@@ -313,8 +314,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
         ptr_inputs = &comp.ptr_inputs;
         ptr_outputs = &comp.ptr_outputs;
         if (ptr_segments != nullptr) {
-            *reinterpret_cast<intel_pwl_segment_t **>(ptr_segments) =
-                    reinterpret_cast<intel_pwl_segment_t *>(& comp.op.pwl.ptr_segments);
+            *reinterpret_cast<gna_pwl_segment_t **>(ptr_segments) =
+                    reinterpret_cast<gna_pwl_segment_t *>(& comp.op.pwl.ptr_segments);
         }
     }
 }
@@ -823,7 +824,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
 
                     if (num_bytes_per_weight == 1) {
                         int8_t *ptr_weight = reinterpret_cast<int8_t *>(component[i].op.affine.ptr_weights);
-                        intel_compound_bias_t *ptr_bias = reinterpret_cast<intel_compound_bias_t *>(component[i].op.affine.ptr_biases);
+                        gna_compound_bias_t *ptr_bias = reinterpret_cast<gna_compound_bias_t *>(component[i].op.affine.ptr_biases);
 #ifdef DUMP_WB
                         for (uint32_t row = 0; row < num_weight_rows; row++) {
                             for (uint32_t col = 0; col < num_weight_columns; col++) {
@@ -871,8 +872,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                     }
                     if (compute_precision_ == kDnnInt) {
                         if (num_bytes_per_weight == 1) {
-                            intel_compound_bias_t
-                                *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.affine.ptr_biases);
+                            gna_compound_bias_t
+                                *ptr_biases = reinterpret_cast<gna_compound_bias_t *>(component[i].op.affine.ptr_biases);
 #ifdef DUMP_WB
                             for (uint32_t row = 0; row < num_rows_out; row++) {
                                 if (logging_precision == kDnnInt) {
@@ -952,7 +953,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
 
                     if (num_bytes_per_weight == 1) {
                         int8_t *ptr_weight = reinterpret_cast<int8_t *>(component[i].op.conv1D.ptr_filters);
-                        intel_compound_bias_t *ptr_bias = reinterpret_cast<intel_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
+                        gna_compound_bias_t *ptr_bias = reinterpret_cast<gna_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
 #ifdef DUMP_WB
                         for (uint32_t row = 0; row < num_filters; row++) {
                             for (uint32_t col = 0; col < num_filter_coefficients; col++) {
@@ -1001,8 +1002,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                     if (compute_precision_ == kDnnInt) {
                         if (logging_precision == kDnnInt) {
                             if (num_bytes_per_weight == 1) {
-                                intel_compound_bias_t
-                                        *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
+                                gna_compound_bias_t
+                                        *ptr_biases = reinterpret_cast<gna_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
 #ifdef DUMP_WB
                                 for (uint32_t row = 0; row < num_filters; row++) {
                                     out_bfile << "0x" << std::setfill('0') << std::setw(8) << std::hex
@@ -1073,8 +1074,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                              << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_feedbacks, ptr_dnn_memory_) << "\n";
                     if (num_bytes_per_weight == 1) {
                         int8_t *ptr_weight = reinterpret_cast<int8_t *>(component[i].op.recurrent.ptr_weights);
-                        intel_compound_bias_t
-                                *ptr_bias = reinterpret_cast<intel_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
+                        gna_compound_bias_t
+                                *ptr_bias = reinterpret_cast<gna_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
 #ifdef DUMP_WB
                         for (uint32_t row = 0; row < num_weight_rows; row++) {
                             out_file << "<weight_row> ";
@@ -1128,8 +1129,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                     if (compute_precision_ == kDnnInt) {
                         if (logging_precision == kDnnInt) {
                             if (num_bytes_per_weight == 1) {
-                                intel_compound_bias_t
-                                        *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
+                                gna_compound_bias_t
+                                        *ptr_biases = reinterpret_cast<gna_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
                                 out_file << "<compound_bias>" << " ";
 #ifdef DUMP_WB
                                 for (uint32_t col = 0; col < num_columns_out; col++) {
@@ -1182,7 +1183,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                 }
                     break;
                 case kDnnPiecewiselinearOp: {
-                    intel_pwl_segment_t *ptr_segment = component[i].op.pwl.ptr_segments;
+                    gna_pwl_segment_t *ptr_segment = component[i].op.pwl.ptr_segments;
                     DnnActivationType func_id = component[i].op.pwl.func_id.type;
                     uint32_t num_segments = component[i].op.pwl.num_segments;
                     float output_scale_factor = component[i].output_scale_factor;
diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
index df49d78..036381a 100644
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include "dnn_types.h"
+#include "gna_types.h"
 
 #include "gna_plugin_log.hpp"
 
@@ -176,7 +177,7 @@ public:
                                              float input_scale_factor,
                                              A *&ptr_inputs,
                                              B *&ptr_outputs,
-                                             intel_pwl_segment_t *ptr_segments) {
+                                             gna_pwl_segment_t *ptr_segments) {
         InitPiecewiseLinearComponentPrivate(cmp,
                                             function_id,
                                             orientation,
@@ -381,7 +382,7 @@ private:
                                                     float input_scale_factor,
                                                     void *&ptr_inputs,
                                                     void *&ptr_outputs,
-                                                    intel_pwl_segment_t *ptr_segments,
+                                                    gna_pwl_segment_t *ptr_segments,
                                                     bool postInitMem);
 
     static void InitInterleaveComponentPrivate(intel_dnn_component_t &cmp,
diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h
index bf8b193..73a2d06 100644
--- a/inference-engine/src/gna_plugin/backend/dnn_types.h
+++ b/inference-engine/src/gna_plugin/backend/dnn_types.h
@@ -6,7 +6,7 @@
 
 #include <cstdint>
 #include <type_traits>
-#include <gna-api-types-xnn.h>
+#include "gna_types.h"
 
 #include "gna_plugin_log.hpp"
 
@@ -183,7 +183,7 @@ typedef struct {
 typedef struct {
     DnnActivation func_id;       // identifies function being approximated
     uint32_t num_segments;
-    intel_pwl_segment_t *ptr_segments;
+    gna_pwl_segment_t *ptr_segments;
 } intel_piecewiselinear_t;
 
 typedef struct {
diff --git a/inference-engine/src/gna_plugin/backend/gna_types.h b/inference-engine/src/gna_plugin/backend/gna_types.h
new file mode 100644
index 0000000..4a3c50e
--- /dev/null
+++ b/inference-engine/src/gna_plugin/backend/gna_types.h
@@ -0,0 +1,164 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if GNA_LIB_VER == 1
+#include "gna_lib_ver_selector.hpp"
+#else
+#include <cstdint>
+
+/** PWL Segment - as read directly by the accelerator */
+typedef struct _pwl_segment_t {
+    int32_t xBase;                  // X Component of segment starting point, with scaling encoded if needed.
+    int16_t yBase;                  // Y Component of segment starting point.
+    int16_t slope;                  // Slope of linear function.
+} gna_pwl_segment_t;
+static_assert(8 == sizeof(gna_pwl_segment_t), "Invalid size of gna_pwl_segment_t");
+
+/** Piecewise-linear activation function (PWL) details */
+typedef struct _pwl_func_t {
+    uint32_t nSegments;             // Number of segments, set to 0 to disable activation function.
+    gna_pwl_segment_t* pSegments; // Activation function segments data or NULL if disabled.
+} gna_pwl_func_t;
+
+/**
+ * Compound bias
+ * Used for nBytesPerWeight=GNA_INT8 and nBytesPerBias=GNA_INT16 only.
+ * As read directly by the accelerator.
+ */
+typedef struct _compound_bias_t {
+    int32_t bias;              // 4B Signed integer bias (constant) value.
+    uint8_t multiplier;             // Scaling factor that weight elements are multiplied by.
+    uint8_t reserved[3];            // Not used.
+} gna_compound_bias_t;
+static_assert(8 == sizeof(gna_compound_bias_t), "Invalid size of gna_compound_bias_t");
+
+/**
+ * Layer operation type.
+ * Defines type of layer "core" operation.
+ * All nodes/cells within a layer are of the same type,
+ * e.g. affine transform cell, convolutional cell, recurrent cell.
+ * Affine, convolutional and recurrent layers are in fact "fused operation" layers
+ * and "core" operation is fused with activation and/or pooling functions.
+ * NOTE: Operation types are exclusive.
+ */
+typedef enum _layer_operation {
+    // Fully connected affine transform (deep feed forward) with activation function. Cast pLayerStruct to intel_affine_layer_t.
+    INTEL_AFFINE,
+    // Fully connected affine transform (matrix x vector) (deep feed forward) with activation function.Cast pLayerStruct to intel_affine_layer_t.
+    INTEL_AFFINE_DIAGONAL,
+    /*
+     * Fully connected affine transform (with grouped bias vectors) (deep feed forward) with activation function.
+     * Cast pLayerStruct to intel_affine_multibias_layer_t.
+     */
+    INTEL_AFFINE_MULTIBIAS,
+    INTEL_CONVOLUTIONAL,            // Convolutional transform with activation function and pooling. Cast pLayerStruct to intel_convolutional_layer_t.
+    INTEL_CONVOLUTIONAL_2D,         // Convolutional transform with activation function and pooling. Cast pLayerStruct to nn_layer_cnn2d.
+    INTEL_COPY,                     // Auxiliary data copy operation. Cast pLayerStruct to intel_copy_layer_t.
+    INTEL_DEINTERLEAVE,             // Auxiliary 2D tensor transpose operation (interleave to flat). No casting, always set pLayerStruct to null.
+    INTEL_GMM,                      // Gaussian Mixture Model operation. Cast pLayerStruct to intel_gmm_layer_t.
+    INTEL_INTERLEAVE,               // Auxiliary 2D tensor transpose operation (flat to interleave). No casting, always set pLayerStruct to null.
+    INTEL_RECURRENT,                // Fully connected affine transform with recurrence and activation function. Cast pLayerStruct to intel_recurrent_layer_t.
+    GNA_LAYER_CNN_2D_POOLING,
+    LAYER_OPERATION_TYPE_COUT,
+} gna_layer_operation;
+
+typedef enum _layer_mode {
+    INTEL_INPUT,            // Layer serves as model input layer (usually first layer)
+    INTEL_OUTPUT,           // Layer serves as model output layer (usually last layer)
+    INTEL_INPUT_OUTPUT,     // Layer serves as model input nad output layer (usually in single layer topology)
+    INTEL_HIDDEN,           // Layer serves as model hidden layer (layers between input and output layers)
+    LAYER_MODE_COUNT        // Number of Layer modes.
+} gna_layer_mode;
+
+/** Layer common configuration descriptor */
+typedef struct _nnet_layer_t {
+    gna_layer_operation operation;  // Layer operation type.
+    gna_layer_mode mode;            // Layer connection mode.
+    uint32_t nInputColumns;         // Number of input columns.
+    uint32_t nInputRows;            // Number of input rows.
+    uint32_t nOutputColumns;        // Number of output columns.
+    uint32_t nOutputRows;           // Number of output rows.
+    uint32_t nBytesPerInput;        // Precision/mode of input node, use a value from gna_data_mode. Valid values {GNA_INT8, GNA_INT16, GNA_DATA_DISABLED}
+    // Precision/ activation mode of output node, use a value from gna_data_mode. Valid values {GNA_INT8, GNA_INT16, GNA_INT32, GNA_DATA_ACTIVATION_DISABLED}
+    uint32_t nBytesPerOutput;
+    uint32_t nBytesPerIntermediateOutput;// Number of bytes per intermediate output node, always set to GNA_INT32.
+    void* pLayerStruct;             // Layer detailed configuration, cast to intel_[LAYER_KIND]_layer_t.
+    void* pInputs;                  // Signed integer NN or GMM input buffer.
+    void* pOutputsIntermediate;     // 4B Signed integer Auxiliary output buffer.
+    void* pOutputs;                 // Signed integer output buffer.
+} gna_nnet_layer_t;
+
+/** GNA Network descriptor */
+typedef struct _nnet_type_t {
+    uint32_t nLayers;               // The number of layers in the network.
+    uint32_t nGroup;                // Input vector grouping level.
+    gna_nnet_layer_t *pLayers;    // Layer configurations.
+} gna_nnet_type_t;
+
+/** Affine function details */
+typedef struct _affine_func_t {
+    uint32_t nBytesPerWeight;       // Precision/mode of weight element, use a value from gna_data_mode.
+    uint32_t nBytesPerBias;         // Precision/mode of bias (constant) element, use a value from gna_data_mode.
+    void* pWeights;                 // Signed integer weights data buffer.
+    void* pBiases;                  // Biases (constants) data buffer. Signed integer biases or gna_compound_bias_t
+} gna_affine_func_t;
+
+/** Fully connected affine layer detailed descriptor */
+typedef struct _affine_layer_t {
+    gna_affine_func_t affine;     // Affine function details.
+    gna_pwl_func_t pwl;           // Activation function details.
+} gna_affine_layer_t;
+
+/** Pooling function types */
+typedef enum _pool_type_t {
+    INTEL_NO_POOLING = 0,           // Pooling function disabled.
+    INTEL_MAX_POOLING = 1,          // Max Pooling function.
+    INTEL_SUM_POOLING = 2,          // Sum Pooling function.
+    NUM_POOLING_TYPES               // Number of Pooling function types.
+} gna_pool_type_t;
+
+/** Convolutional Layer detailed descriptor */
+typedef struct _convolutional_layer_t {
+    uint32_t nFilters;              // Number of filters.
+    uint32_t nFilterCoefficients;   // Number of filter elements, including 0-padding if necessary.
+    uint32_t nFilterRows;           // Number of rows in each filter.
+    uint32_t nBytesFilterCoefficient;// Precision/mode of filter coefficient element, use a value from gna_data_mode.
+    uint32_t nBytesBias;            // Precision/mode of bias (constant) element, use a value from gna_data_mode.
+    uint32_t nFeatureMaps;          // Number of feature maps.
+    uint32_t nFeatureMapRows;       // Number of rows in each feature map.
+    uint32_t nFeatureMapColumns;    // Number of columns in each feature map.
+    void* pFilters;                 // Signed integer Filters data buffer, filters stored one after the other.
+    void* pBiases;                  // Signed integer Biases (constants) data buffer, biases are specified per kernel/filter.
+    gna_pool_type_t poolType;     // Pooling function type.
+    uint32_t nPoolSize;             // Pool size, set 1 to disable pooling.
+    uint32_t nPoolStride;           // Pool stride.
+    gna_pwl_func_t pwl;           // Activation function details.
+} gna_convolutional_layer_t;
+
+/**
+ The list of processing acceleration modes.
+ Current acceleration modes availability depends on the CPU type.
+ Available modes are detected by GNA.
+
+ NOTE:
+ - GNA_HARDWARE: in some GNA hardware generations, model components unsupported
+   by hardware will be processed using software acceleration.
+ When software inference is used, by default "fast" algorithm is used
+ and results may be not bit-exact with these produced by hardware device.
+ */
+typedef enum  _acceleration {
+    GNA_HARDWARE = static_cast<int>(0xFFFFFFFE), // GNA Hardware acceleration enforcement
+    GNA_AUTO     = 0x3,             // GNA selects the best available acceleration
+    GNA_SOFTWARE = 0x5,             // GNA selects the best available software acceleration
+    GNA_GENERIC  = 0x7,             // Enforce the usage of generic software mode
+    GNA_SSE4_2   = 0x9,             // Enforce the usage of SSE 4.2 CPU instruction set
+    GNA_AVX1     = 0xB,             // Enforce the usage of AVX1 CPU instruction set
+    GNA_AVX2     = 0xD              // Enforce the usage of AVX2 CPU instruction set
+} gna_acceleration;
+
+static_assert(4 == sizeof(gna_acceleration), "Invalid size of gna_acceleration");
+
+#endif
diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.cpp b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
index 9b0ac3e..762ea97 100644
--- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
@@ -8,6 +8,7 @@
 #include <runtime/pwl.h>
 #include <gna_slope_scale.h>
 #include "dnn_types.h"
+#include "backend/gna_types.h"
 #include "round_float_define.hpp"
 
 void make_gna_pwl(const DnnActivation  fun,
@@ -16,7 +17,7 @@ void make_gna_pwl(const DnnActivation  fun,
                   const double u_bound,
                   const double in_scale,
                   const double out_scale,
-                  std::vector<intel_pwl_segment_t> &gna_pwl) {
+                  std::vector<gna_pwl_segment_t> &gna_pwl) {
     pwl_gna_slope_scale_t s;
     uint32_t pwl_size = static_cast<int32_t>(pwl.size());
     gnalog() << "make_gna_pwl\n";
diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.hpp b/inference-engine/src/gna_plugin/backend/make_pwl.hpp
index c2be616..68a42f6 100644
--- a/inference-engine/src/gna_plugin/backend/make_pwl.hpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.hpp
@@ -6,6 +6,7 @@
 
 #include <vector>
 #include <runtime/pwl.h>
+#include "backend/gna_types.h"
 
 
 void make_gna_pwl(const DnnActivation  fun,
@@ -14,4 +15,4 @@ void make_gna_pwl(const DnnActivation  fun,
                   const double u_bound,
                   const double in_scale,
                   const double out_scale,
-                  std::vector<intel_pwl_segment_t> &gna_pwl);
+                  std::vector<gna_pwl_segment_t> &gna_pwl);
diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
index fe64b3c..01a72e5 100644
--- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -8,7 +8,7 @@
 #include <utility>
 #include <cmath>
 
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include "gna_plugin_log.hpp"
 #include "quantized_layer_params.hpp"
 #include "quantization.h"
@@ -77,7 +77,7 @@ struct QuantI16 : public QuantDescTmpl<PRECISION_TYPE(I16, I32, I16, I32, MIXED)
         _Np = InferenceEngine::Precision::MIXED;
     }
 };
-struct QuantI8  : public QuantDescTmpl<P_TYPE(I16), P_TYPE(I32), P_TYPE(I8), intel_compound_bias_t, P_TYPE(MIXED)> {
+struct QuantI8  : public QuantDescTmpl<P_TYPE(I16), P_TYPE(I32), P_TYPE(I8), gna_compound_bias_t, P_TYPE(MIXED)> {
     QuantI8() {
         _Np = InferenceEngine::Precision::MIXED;
     }
@@ -102,7 +102,7 @@ inline bool shouldAlwaysAllocate() {
 }
 
 template <>
-inline bool shouldAlwaysAllocate<intel_compound_bias_t>() {
+inline bool shouldAlwaysAllocate<gna_compound_bias_t>() {
     return true;
 }
 
diff --git a/inference-engine/src/gna_plugin/frontend/quantization.cpp b/inference-engine/src/gna_plugin/frontend/quantization.cpp
index 6dffb19..c0e24a7 100644
--- a/inference-engine/src/gna_plugin/frontend/quantization.cpp
+++ b/inference-engine/src/gna_plugin/frontend/quantization.cpp
@@ -5,6 +5,7 @@
 #include <cstring>
 #include <iostream>
 #include <details/ie_exception.hpp>
+#include "backend/gna_types.h"
 #include "quantization.h"
 
 void QuantizeAffine16(float *ptr_float_weights,
@@ -149,7 +150,7 @@ void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t
 }
 
 void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
-                     int8_t *ptr_int_weights, intel_compound_bias_t *ptr_int_biases,
+                     int8_t *ptr_int_weights, gna_compound_bias_t *ptr_int_biases,
                      float input_scale_factor, float *ptr_weight_scale_factor,
                      float *ptr_output_scale_factor, uint32_t num_rows, uint32_t num_columns,
                      uint32_t num_rows_padded, uint32_t num_columns_padded) {
diff --git a/inference-engine/src/gna_plugin/frontend/quantization.h b/inference-engine/src/gna_plugin/frontend/quantization.h
index 66d6349..d931673 100644
--- a/inference-engine/src/gna_plugin/frontend/quantization.h
+++ b/inference-engine/src/gna_plugin/frontend/quantization.h
@@ -4,12 +4,12 @@
 
 #pragma once
 
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
 #include <vector>
 #include <cstdint>
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 
 #define MAX_OUT_MULTIPLIER 230
 #define MAX_VAL_1B_WEIGHT 127
@@ -35,6 +35,6 @@ void QuantizeAffine16(float *ptr_float_weights,
                       uint32_t num_columns_padded);
 float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements);
 void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t num_elements, float scale_factor);
-void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases, int8_t *ptr_int_weights, intel_compound_bias_t *ptr_int_biases,
+void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases, int8_t *ptr_int_weights, gna_compound_bias_t *ptr_int_biases,
                      float input_scale_factor, float *ptr_weight_scale_factor, float *ptr_output_scale_factor,
                      uint32_t num_rows, uint32_t num_columns, uint32_t num_rows_padded, uint32_t num_columns_padded);
diff --git a/inference-engine/src/gna_plugin/gna_api_wrapper.hpp b/inference-engine/src/gna_plugin/gna_api_wrapper.hpp
index bce210b..b648667 100644
--- a/inference-engine/src/gna_plugin/gna_api_wrapper.hpp
+++ b/inference-engine/src/gna_plugin/gna_api_wrapper.hpp
@@ -9,7 +9,7 @@
 #else
 #include <mm_malloc.h>
 #endif
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include "gna_plugin_log.hpp"
 
 #if GNA_LIB_VER == 2
@@ -81,7 +81,7 @@ class CPPWrapper<Gna2Model> {
 };
 #else
 template <>
-class CPPWrapper<intel_nnet_type_t> {
+class CPPWrapper<gna_nnet_type_t> {
 public:
     intel_nnet_type_t obj;
 
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 5b745b0..7a77253 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -15,7 +15,6 @@
 #include <limits>
 
 #include <legacy/ie_layers.h>
-#include <gna-api-types-xnn.h>
 #include <ie_algorithm.hpp>
 #include <debug.h>
 
@@ -25,7 +24,6 @@
 #include "layers/gna_layer_info.hpp"
 #include "ie_memcpy.h"
 #include "caseless.hpp"
-#include "gna-api.h"
 #include "backend/am_intel_dnn.hpp"
 #include "runtime/pwl.h"
 #include "gna_graph_tools.hpp"
@@ -539,7 +537,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
         }
     } else {
         //use PWL to calculate power
-        std::vector<intel_pwl_segment_t> ptr_pwl_segments;
+        std::vector<gna_pwl_segment_t> ptr_pwl_segments;
 
         auto orientation = kDnnInterleavedOrientation;
 
@@ -550,7 +548,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
         auto& pwlComponent = dnnComponents.addComponent(layer->name, "power");
 
-        intel_pwl_segment_t* ptr_pwl_segments_target = nullptr;
+        gna_pwl_segment_t* ptr_pwl_segments_target = nullptr;
 
         float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
         float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.scale : 1.0f;
@@ -576,7 +574,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
             }
         }
 
-        ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t*>(&ptr_pwl_segments_target);
+        ptr_pwl_segments_target = reinterpret_cast<gna_pwl_segment_t*>(&ptr_pwl_segments_target);
 
         void* ptr_pwl_input = nullptr;
         void* ptr_pwl_outputs = nullptr;
@@ -600,7 +598,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
         if (ptr_pwl_segments_target != nullptr) {
             gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
                 &ptr_pwl_segments.front(),
-                ptr_pwl_segments.size() * sizeof(intel_pwl_segment_t),
+                ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
                 64);
         }
     }
@@ -1466,7 +1464,7 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
 void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto* generic = dynamic_cast<GenericLayer*>(layer.get());
     std::string type;
-    std::vector<intel_pwl_segment_t> ptr_pwl_segments;
+    std::vector<gna_pwl_segment_t> ptr_pwl_segments;
     uint32_t num_rows;
     uint32_t num_columns;
     void* ptr_inputs = nullptr;
@@ -1582,8 +1580,7 @@ case name:\
 #endif
 
     auto& currentComponent = dnnComponents.addComponent(layer->name, actName);
-
-    intel_pwl_segment_t* ptr_pwl_segments_target = nullptr;
+    gna_pwl_segment_t* ptr_pwl_segments_target = nullptr;
 
     if (!gnaFlags->sw_fp32) {
         // TODO: generalize activation function code
@@ -1618,7 +1615,7 @@ case name:\
                 input_pwl_scale_factor,
                 output_pwl_scale_factor);
         }
-        ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t*>(&ptr_pwl_segments_target);
+        ptr_pwl_segments_target = reinterpret_cast<gna_pwl_segment_t*>(&ptr_pwl_segments_target);
     }
 
     dnn->InitPiecewiseLinearComponent(currentComponent,
@@ -1641,7 +1638,7 @@ case name:\
     if (ptr_pwl_segments_target != nullptr) {
         gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
             &ptr_pwl_segments.front(),
-            ptr_pwl_segments.size() * sizeof(intel_pwl_segment_t),
+            ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
             64);
     }
 }
diff --git a/inference-engine/src/gna_plugin/gna_helper.cpp b/inference-engine/src/gna_plugin/gna_helper.cpp
index fba743c..2e9090b 100644
--- a/inference-engine/src/gna_plugin/gna_helper.cpp
+++ b/inference-engine/src/gna_plugin/gna_helper.cpp
@@ -4,15 +4,13 @@
 //  gna_helper.cpp : various GNA-related utility functions
 //
 
-#define PROFILE
-
 #include <cstdint>
 #include <cstdio>
 #include <fstream>
 #include <vector>
 #include <sstream>
 #include <string>
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include "gna_plugin_log.hpp"
 
 #include "gna_lib_ver_selector.hpp"
@@ -48,21 +46,6 @@ void PrintMatrixFloat32(char *ptr_name, float *ptr_matrix, int num_rows, int num
     }
 }
 
-void PrintGnaNetwork(intel_nnet_type_t *ptr_nnet) {
-    PrintMatrixInt16("input", reinterpret_cast<int16_t*>(ptr_nnet->pLayers[0].pInputs),
-                     ptr_nnet->pLayers[0].nInputRows, ptr_nnet->pLayers[0].nInputColumns, ptr_nnet->pLayers[0].nInputColumns, 1.0);
-    for (uint32_t i = 0; i < ptr_nnet->nLayers; i++) {
-        char name[256];
-        snprintf(name, sizeof(name), "output %d", i);
-        if (ptr_nnet->pLayers[i].nBytesPerOutput == 2) {
-            PrintMatrixInt16(name, reinterpret_cast<int16_t*>(ptr_nnet->pLayers[i].pOutputs),
-                             ptr_nnet->pLayers[i].nOutputRows, ptr_nnet->pLayers[i].nOutputColumns, ptr_nnet->pLayers[i].nOutputColumns, 1.0);
-        } else {
-            PrintMatrixInt32(name, reinterpret_cast<int32_t*>(ptr_nnet->pLayers[i].pOutputs),
-                             ptr_nnet->pLayers[i].nOutputRows, ptr_nnet->pLayers[i].nOutputColumns, ptr_nnet->pLayers[i].nOutputColumns, 1.0);
-        }
-    }
-}
 
 typedef struct {
     std::string sName;
@@ -146,7 +129,7 @@ uint32_t BufferOffsetFromAddress(std::vector<intel_memory_region_t> &vBuffer, vo
     return (nOffsetBytes);
 }
 
-std::string LayerName(intel_nnet_layer_t *pLayer) {
+std::string LayerName(gna_nnet_layer_t *pLayer) {
     const auto nKind = pLayer->nLayerKind;
     std::string sKind;
     if (nKind == INTEL_AFFINE) {
@@ -164,7 +147,7 @@ std::string LayerName(intel_nnet_layer_t *pLayer) {
     return (sKind);
 }
 
-uint32_t NumInputs(intel_nnet_layer_t *pLayer) {
+uint32_t NumInputs(gna_nnet_layer_t *pLayer) {
     const auto nKind = pLayer->nLayerKind;
     uint32_t nInputs;
     if ((nKind == INTEL_AFFINE) || (nKind == INTEL_AFFINE_DIAGONAL)) {
@@ -180,7 +163,7 @@ uint32_t NumInputs(intel_nnet_layer_t *pLayer) {
     return (nInputs);
 }
 
-uint32_t NumOutputs(intel_nnet_layer_t *pLayer) {
+uint32_t NumOutputs(gna_nnet_layer_t *pLayer) {
     const auto nKind = pLayer->nLayerKind;
     uint32_t nOutputs;
     if ((nKind == INTEL_AFFINE) || (nKind == INTEL_AFFINE_DIAGONAL)) {
@@ -196,7 +179,7 @@ uint32_t NumOutputs(intel_nnet_layer_t *pLayer) {
     return (nOutputs);
 }
 
-uint32_t NumGroupSize(intel_nnet_layer_t *pLayer) {
+uint32_t NumGroupSize(gna_nnet_layer_t *pLayer) {
     const auto nKind = pLayer->nLayerKind;
     uint32_t nGroupSize;
     if ((nKind == INTEL_AFFINE) || (nKind == INTEL_AFFINE_DIAGONAL)) {
diff --git a/inference-engine/src/gna_plugin/gna_lib_ver_selector.hpp b/inference-engine/src/gna_plugin/gna_lib_ver_selector.hpp
index 6366d24..0455b72 100644
--- a/inference-engine/src/gna_plugin/gna_lib_ver_selector.hpp
+++ b/inference-engine/src/gna_plugin/gna_lib_ver_selector.hpp
@@ -7,9 +7,39 @@
 #if GNA_LIB_VER == 2
 
 #include <cstdint>
-#include <gna-api-types-xnn.h>
 
 #define nLayerKind operation
 #define intel_layer_kind_t gna_layer_operation
 #define intel_gna_proc_t uint32_t
+
+
+/**
+ * Rounds a number up, to the nearest multiple of significance
+ * Used for calculating the memory sizes of GNA data buffers
+ *
+ * @param number        Memory size or a number to round up.
+ * @param significance  Informs the function how to round up. The function "ceils"
+ *                      the number to the lowest possible value divisible by "significance".
+ * @return Rounded integer value.
+ */
+#define ALIGN(number, significance) ((((number) + (significance) - 1) / (significance)) * (significance))
+
+/**
+ * Rounds a number up, to the nearest multiple of 64
+ * Used for calculating memory sizes of GNA data arrays
+ */
+#define ALIGN64(number) ALIGN(number, 64)
+
+#else
+
+#include <gna-api.h>
+#include <gna-api-types-xnn.h>
+
+#define gna_pwl_segment_t intel_pwl_segment_t
+#define gna_compound_bias_t intel_compound_bias_t
+#define gna_nnet_layer_t intel_nnet_layer_t
+#define gna_nnet_type_t intel_nnet_type_t
+#define gna_affine_func_t intel_affine_func_t
+#define gna_affine_layer_t intel_affine_layer_t
+#define gna_convolutional_layer_t intel_convolutional_layer_t
 #endif
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.hpp b/inference-engine/src/gna_plugin/gna_model_serial.hpp
index 6c295a7..1db971d 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.hpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp
@@ -8,7 +8,6 @@
 #include <vector>
 #include <utility>
 
-#include <gna-api.h>
 #include "descriptions/gna_input_desc.hpp"
 #include "descriptions/gna_output_desc.hpp"
 #include "gna_plugin_log.hpp"
diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
index f048a12..8137fba 100644
--- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@@ -10,8 +10,9 @@
 #include "ie_layers.h"
 #include "caseless.hpp"
 #include "ie_algorithm.hpp"
-#include "gna-api.h"
+#include "backend/gna_types.h"
 #include "gna_permute.hpp"
+#include "gna_lib_ver_selector.hpp"
 
 
 namespace GNAPluginNS {
diff --git a/inference-engine/src/gna_plugin/memory/gna_memory.hpp b/inference-engine/src/gna_plugin/memory/gna_memory.hpp
index 7cf0bea..5916ab5 100644
--- a/inference-engine/src/gna_plugin/memory/gna_memory.hpp
+++ b/inference-engine/src/gna_plugin/memory/gna_memory.hpp
@@ -13,8 +13,7 @@
 #include <list>
 #include <algorithm>
 #include <functional>
-
-#include <gna-api.h>
+#include "gna_lib_ver_selector.hpp"
 
 namespace GNAPluginNS {
 namespace memory {
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 708ad70..ca6e6a1 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -16,7 +16,6 @@
 #include <iomanip>
 
 #include <legacy/graph_transformer.h>
-#include <gna-api.h>
 #include <blob_factory.hpp>
 #include <ie_memcpy.h>
 #include <ie_algorithm.hpp>
diff --git a/inference-engine/src/gna_plugin/runtime/pwl.cpp b/inference-engine/src/gna_plugin/runtime/pwl.cpp
index d89fe2c..d3d8c77 100644
--- a/inference-engine/src/gna_plugin/runtime/pwl.cpp
+++ b/inference-engine/src/gna_plugin/runtime/pwl.cpp
@@ -9,6 +9,7 @@
 #include <limits>
 #include <cstdint>
 #include <algorithm>
+#include "backend/gna_types.h"
 
 #ifdef _NO_MKL_
 #include <cmath>
@@ -497,7 +498,7 @@ std::vector<pwl_t> pwl_search(const DnnActivation& activation_type,
 
 
 void PwlDesignOpt16(const DnnActivation activation_type,
-                    std::vector<intel_pwl_segment_t> &ptr_segment,
+                    std::vector<gna_pwl_segment_t> &ptr_segment,
                     const float scale_in,
                     const float scale_out) {
     std::vector<pwl_t> pwl;
@@ -588,7 +589,7 @@ void PwlDesignOpt16(const DnnActivation activation_type,
 }
 
 void PwlDesign16(const DnnActivation activation_type,
-                 intel_pwl_segment_t *ptr_segment,
+                 gna_pwl_segment_t *ptr_segment,
                  const uint32_t num_segments,
                  const float scale_in,
                  const float scale_out) {
@@ -869,7 +870,7 @@ void PwlApply16(intel_dnn_component_t *component,
     uint32_t num_saturate = 0;
     uint32_t num_segments = component->op.pwl.num_segments;
     if (num_segments > 0) {
-        intel_pwl_segment_t *ptr_segment = component->op.pwl.ptr_segments;
+        gna_pwl_segment_t *ptr_segment = component->op.pwl.ptr_segments;
         for (int i = num_row_start; i <= num_row_end; i++) {
             int32_t *ptr_input = reinterpret_cast<int32_t *>(component->ptr_inputs) + i * component->num_columns_in;
             int16_t *ptr_output = reinterpret_cast<int16_t *>(component->ptr_outputs) + i * component->num_columns_in;
diff --git a/inference-engine/src/gna_plugin/runtime/pwl.h b/inference-engine/src/gna_plugin/runtime/pwl.h
index 3b8030c..86b3cfb 100644
--- a/inference-engine/src/gna_plugin/runtime/pwl.h
+++ b/inference-engine/src/gna_plugin/runtime/pwl.h
@@ -8,6 +8,7 @@
 #include <cstdint>
 
 #include "backend/dnn_types.h"
+#include "backend/gna_types.h"
 
 #define SIGMOID_NUM_SEGMENTS 65
 #define SIGMOID_DOMAIN 10.0f  // portion of input to be approximated (-10,10)
@@ -95,11 +96,11 @@ void PwlApply32(intel_dnn_component_t *component,
                 const uint32_t num_col_start,
                 const uint32_t num_col_end);
 void PwlDesign16(const DnnActivation activation_type,
-                 intel_pwl_segment_t *ptr_segment,
+                 gna_pwl_segment_t *ptr_segment,
                  const uint32_t num_segments,
                  const float scale_in,
                  const float scale_out);
 void PwlDesignOpt16(const DnnActivation activation_type,
-                std::vector<intel_pwl_segment_t> &ptr_segment,
+                std::vector<gna_pwl_segment_t> &ptr_segment,
                 const float scale_in,
                 const float scale_out);
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
index 798f49e..efb1464 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
@@ -46,7 +46,7 @@ TEST_F(GNAAOTTests, DISABLED_AffineWith2AffineOutputs_canbe_imported_verify_stru
 #if GNA_LIB_VER == 1
     GTEST_SKIP();
 #endif
-    auto & nnet_type = storage<intel_nnet_type_t>();
+    auto & nnet_type = storage<gna_nnet_type_t>();
 
     // saving pointer to nnet - todo probably deep copy required
     save_args().onInferModel(AffineWith2AffineOutputsModel())
@@ -120,7 +120,7 @@ TEST_F(GNAAOTTests, PoolingModel_canbe_export_imported) {
 
 TEST_F(GNAAOTTests, DISABLED_CanConvertFromAOTtoSueModel) {
 
-    auto & nnet_type = storage<intel_nnet_type_t>();
+    auto & nnet_type = storage<gna_nnet_type_t>();
 
     // saving pointer to nnet - todo probably deep copy required
     save_args().onInferModel(AffineWith2AffineOutputsModel())
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_hardware_precision_test.cpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_hardware_precision_test.cpp
index b977866..b57c931 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_hardware_precision_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_hardware_precision_test.cpp
@@ -29,7 +29,7 @@ TEST_F(GNAHWPrecisionTest, canPassInt8Precision) {
             nnet_input_precision(Precision::I16).
             nnet_ouput_precision(Precision::I32).
             nnet_weights_precision(Precision::I8).
-            nnet_biases_precision(Precision::fromType<intel_compound_bias_t>());
+            nnet_biases_precision(Precision::fromType<gna_compound_bias_t>());
 }
 
 TEST_F(GNAHWPrecisionTest, canPassInt16Precision) {
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
index 6a71cb4..fb63c3f 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
@@ -4,7 +4,7 @@
 
 #include "gna_matcher.hpp"
 #include <gna/gna_config.hpp>
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include <gna_executable_network.hpp>
 #include "gna_plugin.hpp"
 #include "gna_mock_api.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
index 0470a61..bf9d460 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
@@ -25,7 +25,7 @@
 
 #include <backend/dnn_types.h>
 #include <gna_plugin_policy.hpp>
-#include <gna-api.h>
+#include <backend/gna_types.h>
 #include <gna/gna_config.hpp>
 #include <gna_plugin.hpp>
 #include <gna_lib_ver_selector.hpp>
@@ -292,7 +292,7 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
         return *this;
     }
 
-    GNAPropagateMatcher & exact_nnet_structure(intel_nnet_type_t * pNet) {
+    GNAPropagateMatcher & exact_nnet_structure(gna_nnet_type_t * pNet) {
 
         getMatcher().type = GnaPluginTestEnvironment::exactNNetStructure;
         original_nnet = pNet;
@@ -415,7 +415,7 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
         return * this;
     }
 
-    GNAPropagateMatcher & to(intel_nnet_type_t *savedNet) {
+    GNAPropagateMatcher & to(gna_nnet_type_t *savedNet) {
         this->savedNet = savedNet;
         return *this;
     }
@@ -427,8 +427,8 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
 
  protected:
     void match();
-    intel_nnet_type_t * original_nnet = nullptr;
-    intel_nnet_type_t * savedNet = nullptr;
+    gna_nnet_type_t * original_nnet = nullptr;
+    gna_nnet_type_t * savedNet = nullptr;
 };
 
 
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp b/inference-engine/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp
index 2734796..afbcc0e 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp
@@ -5,7 +5,7 @@
 #include <vector>
 #include <gtest/gtest.h>
 #include <legacy/layer_transform.hpp>
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include "frontend/model_quantizer.hpp"
 #include "frontend/layer_quantizer.hpp"
 #include "gna_matcher.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/conv_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/conv_matcher.hpp
index d1e3afc..ef9b892 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/conv_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/conv_matcher.hpp
@@ -4,22 +4,22 @@
 
 #pragma once
 
-#include"gna-api.h"
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 #include "frontend/quantization.h"
 
-class ConvoluionLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class ConvoluionLayerMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     bool matchInserted;
     int matchQuantity;
  public:
     ConvoluionLayerMatcher(bool matchInserted, int matchQuantity) : matchInserted(matchInserted), matchQuantity(matchQuantity) {}
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
         for(int i = 0; i < foo->nLayers; i++) {
             if (foo->pLayers[i].nLayerKind != INTEL_CONVOLUTIONAL) continue;
 
-            auto conv = (intel_convolutional_layer_t*)foo->pLayers[i].pLayerStruct;
+            auto conv = (gna_convolutional_layer_t*)foo->pLayers[i].pLayerStruct;
 
             return matchInserted;
         }
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp
index d75fe1c..2bc6c05 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp
@@ -3,14 +3,17 @@
 //
 
 #pragma once
+
 #include "nnet_base_matcher.hpp"
-class CopyLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+#include "backend/gna_types.h"
+
+class CopyLayerMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     bool matchInserted;
     const int matchQuantity;
     mutable int actualNumberOfCopyLayers;
  public:
     CopyLayerMatcher(bool matchInserted, int matchQuantity) : matchInserted(matchInserted), matchQuantity(matchQuantity) {}
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
         actualNumberOfCopyLayers = 0;
@@ -40,7 +43,7 @@ class CopyLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_typ
     }
 };
 
-inline ::testing::Matcher<const intel_nnet_type_t*> HasCopyLayer(bool matchInserted = false, int matchQuantity = -1) {
+inline ::testing::Matcher<const gna_nnet_type_t*> HasCopyLayer(bool matchInserted = false, int matchQuantity = -1) {
     std::unique_ptr<NNetComponentMatcher> c (new NNetComponentMatcher());
     c->add(new CopyLayerMatcher(matchInserted, matchQuantity));
     return ::testing::MakeMatcher(c.release());
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/diag_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/diag_matcher.hpp
index 83f7cd9..f2e1bfb 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/diag_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/diag_matcher.hpp
@@ -3,17 +3,18 @@
 //
 
 #pragma once
-#include"gna-api.h"
+
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 #include "frontend/quantization.h"
 
-class DiagLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class DiagLayerMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     bool matchInserted;
     int  matchQuantity;
     mutable int  actualQuantity;
 public:
     DiagLayerMatcher(bool matchInserted, int matchQuantity) : matchInserted(matchInserted), matchQuantity(matchQuantity) {}
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
         actualQuantity = 0;
@@ -21,7 +22,7 @@ public:
             if (foo->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL) continue;
             // diagonal layer has to have 1 for weights and 0 for biases
 
-            auto diag = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
+            auto diag = (gna_affine_func_t*)foo->pLayers[i].pLayerStruct;
             bool bWeightsOK = true;
 
             int beforePadding = 0;
@@ -69,7 +70,7 @@ public:
     }
 };
 
-inline ::testing::Matcher<const intel_nnet_type_t*> HasDiagonalLayer(bool matchInserted = false, int matchQuantity = -1) {
+inline ::testing::Matcher<const gna_nnet_type_t*> HasDiagonalLayer(bool matchInserted = false, int matchQuantity = -1) {
     std::unique_ptr<NNetComponentMatcher> c (new NNetComponentMatcher());
     c->add(new DiagLayerMatcher(matchInserted, matchQuantity));
     return ::testing::MakeMatcher(c.release());
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/fill_with_data.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/fill_with_data.hpp
index d5eb3eb..091b866 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/fill_with_data.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/fill_with_data.hpp
@@ -4,8 +4,9 @@
 
 #pragma once
 
+#include "backend/gna_types.h"
 
-class OutputFiller : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class OutputFiller : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     mutable std::stringstream reason;
     int32_t fill32BValue;
     int16_t fill16BValue;
@@ -14,7 +15,7 @@ class OutputFiller : public ::testing::MatcherInterface<const intel_nnet_type_t*
     OutputFiller(int32_t fill32BValue, int16_t fill16BValue) : fill32BValue(fill32BValue), fill16BValue(fill16BValue) {}
 
 
-    bool MatchAndExplain(const intel_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
         if (foo == nullptr)
             return false;
         reason.str("");
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/input_data_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/input_data_matcher.hpp
index 0da6fd5..1e57f19 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/input_data_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/input_data_matcher.hpp
@@ -6,15 +6,16 @@
 #pragma once
 
 #include <gmock/gmock-matchers.h>
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 
-class InputDataMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t *> {
+class InputDataMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t *> {
     std::vector<int16_t> refInput;
 public:
 
     explicit InputDataMatcher(const std::vector<int16_t> &_refInput) : refInput(_refInput) {}
 
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo->pLayers == nullptr) {
             *listener << "Address of the first layer descriptor is NULL";
             return false;
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/nnet_base_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/nnet_base_matcher.hpp
index 0dbe279..c500bb9 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/nnet_base_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/nnet_base_matcher.hpp
@@ -3,23 +3,25 @@
 //
 
 #pragma once
+
+#include "backend/gna_types.h"
 #include "gna_lib_ver_selector.hpp"
 
-class NNetComponentMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
-    std::vector<std::shared_ptr<::testing::MatcherInterface<const intel_nnet_type_t*>>> matchers;
+class NNetComponentMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
+    std::vector<std::shared_ptr<::testing::MatcherInterface<const gna_nnet_type_t*>>> matchers;
     mutable int failIdx = -1;
     mutable std::stringstream reason;
     int bitness;
  public:
     NNetComponentMatcher(int bitness  = 16) : bitness(bitness) {}
-    NNetComponentMatcher& add(::testing::MatcherInterface<const intel_nnet_type_t*> * p) {
-        matchers.push_back(std::shared_ptr<::testing::MatcherInterface<const intel_nnet_type_t*>>(p));
+    NNetComponentMatcher& add(::testing::MatcherInterface<const gna_nnet_type_t*> * p) {
+        matchers.push_back(std::shared_ptr<::testing::MatcherInterface<const gna_nnet_type_t*>>(p));
         return *this;
     }
     bool empty() const {
         return matchers.empty();
     }
-    bool MatchAndExplain(const intel_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
         if (foo == nullptr)
             return false;
         reason.str("");
@@ -48,7 +50,7 @@ class NNetComponentMatcher : public ::testing::MatcherInterface<const intel_nnet
                     }
                     if (foo->pLayers[j].nLayerKind == INTEL_AFFINE ||
                         foo->pLayers[j].nLayerKind == INTEL_AFFINE_DIAGONAL) {
-                        auto pAffine = reinterpret_cast<intel_affine_func_t*>(foo->pLayers[j].pLayerStruct);
+                        auto pAffine = reinterpret_cast<gna_affine_func_t*>(foo->pLayers[j].pLayerStruct);
 
                         if (pAffine->pWeights == foo->pLayers[i].pOutputs) {
                             reason << "numberOfBytes per output int pLayers[" << i << "] should be " << (bitness/8) << ", but was "
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pool_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pool_matcher.hpp
index c4b2b7b..701ecd7 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pool_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pool_matcher.hpp
@@ -4,24 +4,24 @@
 
 #pragma once
 
-#include"gna-api.h"
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 #include "frontend/quantization.h"
 
-class PoolingLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class PoolingLayerMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     bool matchInserted;
     int matchQuantity;
     bool bMaxPool;
  public:
     PoolingLayerMatcher(bool matchInserted, int matchQuantity, bool bMaxPool)
         : matchInserted(matchInserted), matchQuantity(matchQuantity), bMaxPool(bMaxPool) {}
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
         for(int i = 0; i < foo->nLayers; i++) {
             if (foo->pLayers[i].nLayerKind != INTEL_CONVOLUTIONAL) continue;
 
-            auto conv = (intel_convolutional_layer_t*)foo->pLayers[i].pLayerStruct;
+            auto conv = (gna_convolutional_layer_t*)foo->pLayers[i].pLayerStruct;
             if (conv->poolType != INTEL_MAX_POOLING) continue;
 
             return matchInserted;
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/precision_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/precision_matcher.hpp
index c02ded0..7376b32 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/precision_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/precision_matcher.hpp
@@ -4,15 +4,16 @@
 
 #pragma once
 #include "nnet_base_matcher.hpp"
+#include "backend/gna_types.h"
 #include "gna_lib_ver_selector.hpp"
 
-class NNetPrecisionMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class NNetPrecisionMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     GnaPluginTestEnvironment::NnetPrecision nnetPrecision;
     intel_layer_kind_t layerKind = (intel_layer_kind_t)-1;
  public:
     explicit  NNetPrecisionMatcher(GnaPluginTestEnvironment::NnetPrecision nnetPrecision,
                                    intel_layer_kind_t layerKind = (intel_layer_kind_t)-1) : nnetPrecision(nnetPrecision), layerKind(layerKind) {}
-    bool MatchAndExplain(const intel_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
 
         auto ioPrecision = (foo->pLayers->nBytesPerInput == nnetPrecision.input_precision.size()) &&
             (foo->pLayers->nBytesPerOutput== nnetPrecision.output_precision.size());
@@ -25,7 +26,7 @@ class NNetPrecisionMatcher : public ::testing::MatcherInterface<const intel_nnet
             }
             switch (layerKind) {
                 case INTEL_AFFINE : {
-                    auto affine = (intel_affine_layer_t *) (foo->pLayers->pLayerStruct);
+                    auto affine = (gna_affine_layer_t *) (foo->pLayers->pLayerStruct);
 
                     return affine->affine.nBytesPerBias == nnetPrecision.biases_precision.size() &&
                         affine->affine.nBytesPerWeight == nnetPrecision.weights_precision.size();
@@ -47,7 +48,7 @@ class NNetPrecisionMatcher : public ::testing::MatcherInterface<const intel_nnet
     }
 };
 
-inline ::testing::Matcher<const intel_nnet_type_t*> BitnessOfNNetEq(GnaPluginTestEnvironment::NnetPrecision nnetPrecision,
+inline ::testing::Matcher<const gna_nnet_type_t*> BitnessOfNNetEq(GnaPluginTestEnvironment::NnetPrecision nnetPrecision,
                                                          intel_layer_kind_t component) {
     std::unique_ptr<NNetComponentMatcher> c (new NNetComponentMatcher());
     c->add(new NNetPrecisionMatcher(nnetPrecision, component));
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_matcher.hpp
index 6f7589a..41ac315 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_matcher.hpp
@@ -10,7 +10,7 @@
 
 extern void PwlApply16(intel_dnn_component_t *component, uint32_t num_subset_size);
 
-class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class PWLMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     bool matchInserted;
     int matchQuantity;
     mutable int timesInserted = 0;
@@ -25,7 +25,7 @@ class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*>
         : matchInserted(inserted), matchQuantity(matchQuantity), activationsToLookFor(particularActivations) {
     }
 
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
         timesInserted = 0;
@@ -35,7 +35,7 @@ class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*>
             if (foo->pLayers[i].nLayerKind != INTEL_AFFINE &&
                 foo->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL &&
                 foo->pLayers[i].nLayerKind != INTEL_CONVOLUTIONAL) continue;
-            auto affine = reinterpret_cast<intel_affine_layer_t*>(foo->pLayers[i].pLayerStruct);
+            auto affine = reinterpret_cast<gna_affine_layer_t*>(foo->pLayers[i].pLayerStruct);
             if (affine == nullptr) continue;
 
             bool hasPwl = affine->pwl.nSegments != 0 && affine->pwl.pSegments != nullptr;
@@ -73,7 +73,7 @@ class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*>
         return timesInserted == 0;
     };
 
-    DnnActivationType detectPwlType(intel_nnet_layer_t *layer) const {
+    DnnActivationType detectPwlType(gna_nnet_layer_t *layer) const {
 
         intel_dnn_component_t comp;
         comp.ptr_outputs = layer->pOutputs;
@@ -82,11 +82,11 @@ class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*>
 
         if (layer->nLayerKind == INTEL_AFFINE ||
             layer->nLayerKind == INTEL_AFFINE_DIAGONAL) {
-            auto pAffineLayer = reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
+            auto pAffineLayer = reinterpret_cast<gna_affine_layer_t *>(layer->pLayerStruct);
             comp.op.pwl.num_segments = pAffineLayer->pwl.nSegments;
             comp.op.pwl.ptr_segments = pAffineLayer->pwl.pSegments;
         } else if (layer->nLayerKind == INTEL_CONVOLUTIONAL) {
-            auto pConvolutionalLayer = reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
+            auto pConvolutionalLayer = reinterpret_cast<gna_convolutional_layer_t *>(layer->pLayerStruct);
             comp.op.pwl.num_segments = pConvolutionalLayer->pwl.nSegments;
             comp.op.pwl.ptr_segments = pConvolutionalLayer->pwl.pSegments;
         } else {
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp
index 5606d51..d11725f 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp
@@ -10,10 +10,11 @@
 #include <iostream>
 
 #include <runtime/pwl.h>
+#include <backend/gna_types.h>
 
 #include "nnet_base_matcher.hpp"
 
-class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     const float rmse_threshold;
     const uint32_t activation_type;
     const uint16_t segment_threshold;
@@ -23,7 +24,7 @@ class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const i
                                                             rmse_threshold(precision_threshold),
                                                             segment_threshold(segments) {}
 
-    bool MatchAndExplain(const intel_nnet_type_t *nnet, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *nnet, ::testing::MatchResultListener *listener) const override {
         float rmse = 0.f;
         const float test_arg_scale_factor = 16384;
 
@@ -35,7 +36,7 @@ class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const i
                 nnet->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL &&
                 nnet->pLayers[i].nLayerKind != INTEL_CONVOLUTIONAL) continue;
 
-            auto affine = reinterpret_cast<intel_affine_layer_t*>(nnet->pLayers[i].pLayerStruct);
+            auto affine = reinterpret_cast<gna_affine_layer_t*>(nnet->pLayers[i].pLayerStruct);
 
             if (affine == nullptr ||
                 affine->pwl.nSegments == 0 ||
@@ -85,7 +86,7 @@ class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const i
             }
 
             std::vector<double> y_diviation(2*domain);
-            std::vector<intel_pwl_segment_t*> segments_vector(affine->pwl.nSegments);
+            std::vector<gna_pwl_segment_t*> segments_vector(affine->pwl.nSegments);
             std::iota(segments_vector.begin(), segments_vector.begin()+affine->pwl.nSegments,
                                                                                 affine->pwl.pSegments);
 
@@ -132,7 +133,7 @@ class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const i
     }
 };
 
-inline ::testing::Matcher<const intel_nnet_type_t*> PrecisionOfQuantizedPwlMetrics(uint32_t type,
+inline ::testing::Matcher<const gna_nnet_type_t*> PrecisionOfQuantizedPwlMetrics(uint32_t type,
                                                                                     float threshold,
                                                                                     uint16_t segments) {
     std::unique_ptr<NNetComponentMatcher> c (new NNetComponentMatcher());
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp
index 2b18587..9bdb8e2 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp
@@ -3,7 +3,7 @@
 //
 
 #pragma once
-#include"gna-api.h"
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 #include "frontend/quantization.h"
 
@@ -72,7 +72,7 @@ class TranspozeIterator {
     }
 };
 
-class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class WeightsMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     enum HowMatch{
         eNone,
         eEq,
@@ -92,7 +92,7 @@ class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_
             eMatchKind = eEq;
         }
     }
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
         iterator.reset();
@@ -101,7 +101,7 @@ class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_
             if (foo->pLayers[i].nLayerKind != INTEL_AFFINE &&
                 foo->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL) continue;
 
-            auto affine = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
+            auto affine = (gna_affine_func_t*)foo->pLayers[i].pLayerStruct;
 
             auto affineWeightsSize = foo->pLayers[i].nOutputRows *
                 (foo->pLayers[i].nLayerKind == INTEL_AFFINE_DIAGONAL ? 1 : foo->pLayers[i].nInputRows);
@@ -136,7 +136,7 @@ class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_
     }
 };
 
-class WeightsSizeMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class WeightsSizeMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     enum HowMatch{
         eNone,
         eEqAffine,
@@ -150,7 +150,7 @@ class WeightsSizeMatcher : public ::testing::MatcherInterface<const intel_nnet_t
         eMatchKind(eEqAffine),
         expected_weights_size(data_len){
     }
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
 
@@ -180,20 +180,20 @@ class WeightsSizeMatcher : public ::testing::MatcherInterface<const intel_nnet_t
 };
 
 
-class WeightsSaver: public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class WeightsSaver: public ::testing::MatcherInterface<const gna_nnet_type_t*> {
     mutable TranspozeIterator iterator;
     std::vector<uint16_t>* weights;
  public:
     explicit WeightsSaver(TranspozedData data) :
         weights(std::get<0>(data)), iterator(data) {
     }
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
         if (foo == nullptr)
             return false;
         for(int i = 0; i < foo->nLayers; i++) {
             if (foo->pLayers[i].nLayerKind != INTEL_AFFINE) continue;
 
-            auto affine = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
+            auto affine = (gna_affine_func_t*)foo->pLayers[i].pLayerStruct;
 
             auto affineWeightsSize = foo->pLayers[i].nOutputRows * foo->pLayers[i].nInputRows;
             auto pWeights = reinterpret_cast<uint16_t *>(affine->pWeights);
-- 
2.7.4