[GNA] Support export model with multiple inputs/outputs and Permute layer (#775)
authorPavel Rodionov <pavel.rodionov@intel.com>
Mon, 22 Jun 2020 15:00:29 +0000 (18:00 +0300)
committerGitHub <noreply@github.com>
Mon, 22 Jun 2020 15:00:29 +0000 (18:00 +0300)
inference-engine/src/gna_plugin/backend/dnn_types.h
inference-engine/src/gna_plugin/gna_device.cpp
inference-engine/src/gna_plugin/gna_model_serial.cpp
inference-engine/src/gna_plugin/gna_model_serial.hpp
inference-engine/src/gna_plugin/gna_plugin.cpp
inference-engine/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp
inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp

index 4e36ed9..8d5fa4e 100644 (file)
@@ -80,7 +80,7 @@ static const char *intel_dnn_softmax_name[kSoftmaxNumType] = {
 };
 
 typedef enum {
-    kDnnUnknownOrientation,
+    kDnnUnknownOrientation = 100,
     kDnnInterleavedOrientation,
     kDnnNonInterleavedOrientation,
     kDnnNumOrientation
index b92214b..bbd4891 100644 (file)
@@ -25,7 +25,7 @@
 #include "gna_plugin_log.hpp"
 
 uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted) {
-    void * memPtr;
+    void * memPtr = nullptr;
 #if GNA_LIB_VER == 1
     memPtr = GNAAlloc(nGNAHandle, size_requested, size_granted);
 #else
index 74f3af1..0e03155 100644 (file)
@@ -8,6 +8,9 @@
 #include <ios>
 #include <iomanip>
 #include <map>
+#include <ie_algorithm.hpp>
+#include <ie_common.h>
+#include <ie_precision.hpp>
 
 #if defined __INTEL_COMPILER || defined _MSC_VER
 #include <malloc.h>
@@ -119,11 +122,21 @@ const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
         sizeof(Gna2Shape),
         sizeof(Gna2Shape)}},
     {Gna2OperationTypeCopy, {sizeof(Gna2Shape)}},
+    {Gna2OperationTypeTransposition, {sizeof(Gna2Shape)}},
 };
 
-void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize, std::istream & is) {
+void GNAModelSerial::Import(void *basePointer,
+        size_t gnaGraphSize,
+        std::istream & is,
+        std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
+        std::vector<GNAPluginNS::OutputDesc> &desc,
+        InferenceEngine::InputsDataMap& inputsDataMap,
+        InferenceEngine::OutputsDataMap& outputsDataMap) {
     is.exceptions(std::istream::failbit);
 
+    ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
+    ImportOutputs(is, basePointer, desc, outputsDataMap);
+
     for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
         readNBits<32>(operation->Type, is);
         readBits(operation->NumberOfOperands, is);
@@ -145,11 +158,10 @@ void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize, std::istream
         case Gna2OperationTypeFullyConnectedAffine:
         case Gna2OperationTypeConvolution:
         case Gna2OperationTypeCopy:
+        case Gna2OperationTypeTransposition:
             break;
         case Gna2OperationTypeRecurrent:
             THROW_GNA_EXCEPTION << "Importing of recurrent operation not supported";
-        case Gna2OperationTypeTransposition:
-            THROW_GNA_EXCEPTION << "Importing of transposition operation not supported";
         default:
             THROW_GNA_EXCEPTION << "Importing of unknown GNA operation type(" << operation->Type << ")  not supported";
         }
@@ -235,11 +247,12 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
     };
 
     auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep) {
-        ModelHeader::EndPoint out;
+        RuntimeEndPoint out;
         out.elements_count = ep.elements_count;
         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
         out.scaleFactor = ep.scaleFactor;
         out.element_size = ep.element_size;
+        out.orientation = ep.orientation;
         return out;
     };
     /**
@@ -256,15 +269,21 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
     header.gnaMemSize = gnaGraphSize;
     header.layersCount = layers.size();
     header.nGroup = guessGrouping(*gna2Model);
-    header.input = convert_to_serial(input);
-    header.output = convert_to_serial(output);
-
+    header.nInputs = inputs.size();
+    header.nOutputs = outputs.size();
     header.nRotateRows = nRotateRows;
     header.nRotateColumns = nRotateColumns;
 
 
     writeBits(header, os);
 
+    for (const auto &input : inputs) {
+        writeBits(convert_to_serial(input), os);
+    }
+    for (const auto &output : outputs) {
+        writeBits(convert_to_serial(output), os);
+    }
+
     for (const auto & layer : layers) {
         writeBits(static_cast<uint32_t>(layer.Type), os);
         writeBits(layer.NumberOfOperands, os);
@@ -284,11 +303,10 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
         case Gna2OperationTypeFullyConnectedAffine:
         case Gna2OperationTypeConvolution:
         case Gna2OperationTypeCopy:
+        case Gna2OperationTypeTransposition:
             break;
         case Gna2OperationTypeRecurrent:
             THROW_GNA_EXCEPTION << "Exporting of recurrent operation not supported";
-        case Gna2OperationTypeTransposition:
-            THROW_GNA_EXCEPTION << "Exporting of interleave operation not supported";
         default:
             THROW_GNA_EXCEPTION << "Exporting of unknown GNA operation type(" << layer.Type << ")  not supported";
         }
@@ -314,9 +332,18 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
 }
 #else
 
-void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize, std::istream & is) {
+void GNAModelSerial::Import(void *basePointer,
+        size_t gnaGraphSize,
+        std::istream & is,
+        std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
+        std::vector<GNAPluginNS::OutputDesc> &desc,
+        InferenceEngine::InputsDataMap& inputsDataMap,
+        InferenceEngine::OutputsDataMap& outputsDataMap) {
     is.exceptions(std::istream::failbit);
 
+    ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
+    ImportOutputs(is, basePointer, desc, outputsDataMap);
+
     auto readPwl = [&is, basePointer](intel_pwl_func_t & value) {
         readBits(value.nSegments, is);
         if (value.nSegments != 0) {
@@ -466,11 +493,12 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
     };
 
     auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
-        ModelHeader::EndPoint out;
+        RuntimeEndPoint out;
         out.elements_count = ep.elements_count;
         out.element_size = ep.element_size;
         out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
         out.scaleFactor = ep.scaleFactor;
+        out.orientation = ep.orientation;
         return out;
     };
     /**
@@ -486,14 +514,16 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
     header.gnaMemSize = gnaGraphSize;
     header.layersCount = layers.size();
     header.nGroup = ptr_nnet->nGroup;
-    header.input  = convert_to_serial(input);
-    header.output = convert_to_serial(output);
+    header.nInputs = 1;
+    header.nOutputs = 1;
     header.headerSize = sizeof(ModelHeader);
     header.nRotateRows = nRotateRows;
     header.nRotateColumns = nRotateColumns;
 
 
     writeBits(header, os);
+    writeBits(convert_to_serial(inputs[0]), os);
+    writeBits(convert_to_serial(outputs[0]), os);
 
     for (auto & layer : layers) {
         writeBits(layer.nInputColumns, os);
@@ -572,3 +602,108 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
 }
 
 #endif
+
+std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
+        const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
+    std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
+    std::size_t outputIndex = 0;
+    for (auto const &output : outputsDataMap) {
+        auto outputName = output.first;
+        auto inputDims = output.second->getTensorDesc().getDims();
+        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
+
+        GNAModelSerial::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
+                                                 outputsDesc[outputIndex].ptrs[0],
+                                                 outputsDesc[outputIndex].num_bytes_per_element,
+                                                 elementsCount,
+                                                 outputsDesc[outputIndex].orientation);
+        endPoints.push_back(endPoint);
+        outputIndex++;
+    }
+    return endPoints;
+}
+
+std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
+                                                                             std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
+    std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
+
+    std::size_t inputIndex = 0;
+    for (auto const& input : inputsDataMap) {
+        auto inputName = input.first;
+        auto inputDims = input.second->getTensorDesc().getDims();
+
+        double scaleFactor = inputDesc->getScaleFactor(inputIndex);
+        std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
+        IE_ASSERT(descriptor_ptr.size() > 0);
+        uint32_t element_size = 2u;
+        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
+        intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
+
+        GNAModelSerial::RuntimeEndPoint endPoint(scaleFactor,
+                                                 descriptor_ptr[0],
+                                                 element_size,
+                                                 elementsCount,
+                                                 orientation);
+        endPoints.push_back(endPoint);
+        inputIndex++;
+    }
+    return endPoints;
+}
+
+void GNAModelSerial::ImportInputs(std::istream &is,
+        void* basePtr,
+        std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
+        InferenceEngine::InputsDataMap& dataMap) {
+    dataMap.clear();
+
+    for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
+        std::string name = "input" + std::to_string(inputIndex);
+        RuntimeEndPoint input;
+        is.read(reinterpret_cast<char *>(&input), sizeof(input));
+        inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
+        inputsDesc->orientation_in[name] = input.orientation;
+
+        auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
+
+        dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
+        dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
+                                                            InferenceEngine::TensorDesc(
+                                                                    InferenceEngine::Precision::FP32,
+                                                                    inputDims,
+                                                                    InferenceEngine::Layout::NC)));
+        inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
+    }
+}
+
+void GNAModelSerial::ImportOutputs(std::istream &is,
+        void* basePtr,
+        std::vector<GNAPluginNS::OutputDesc> &desc,
+        InferenceEngine::OutputsDataMap& dataMap) {
+    desc.clear();
+    dataMap.clear();
+    desc.resize(modelHeader.nOutputs);
+
+    for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
+        std::string name = "output" + std::to_string(outputIndex);
+        RuntimeEndPoint output;
+        is.read(reinterpret_cast<char *>(&output), sizeof(output));
+        GNAPluginNS::OutputDesc description;
+        description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
+        description.orientation = kDnnInterleavedOrientation;
+        description.orientation = output.orientation;
+        description.num_bytes_per_element = output.element_size;
+        description.scale_factor = output.scaleFactor;
+
+        auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
+        dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
+                                                 InferenceEngine::TensorDesc(
+                                                         InferenceEngine::Precision::FP32,
+                                                         outputDims,
+                                                         InferenceEngine::Layout::NC));
+        desc.at(outputIndex) = description;
+    }
+}
+
+void GNAModelSerial::setHeader(ModelHeader header) {
+    modelHeader = header;
+}
index 28dacfb..ca979eb 100644 (file)
@@ -7,7 +7,10 @@
 #include <istream>
 #include <vector>
 #include <utility>
-#include "gna-api.h"
+
+#include <gna-api.h>
+#include "descriptions/gna_input_desc.hpp"
+#include "descriptions/gna_output_desc.hpp"
 #include "gna_plugin_log.hpp"
 #if GNA_LIB_VER == 2
 #include "gna2-model-api.h"
  * 1.0 - basic support
  * 1.1 - added memory information
  * 2.0 - for use with GNA2 library
+ * 2.1 - multiple i/o support
  */
 #if GNA_LIB_VER == 2
 #define HEADER_MAJOR 2
-#define HEADER_MINOR 0
+#define HEADER_MINOR 1
 #else
 #define HEADER_MAJOR 1
-#define HEADER_MINOR 1
+#define HEADER_MINOR 2
 #endif
 
 
 /**
- * @brief Header version 1.0
+ * @brief Header version 2.1
  */
 struct ModelHeader {
     /**
@@ -74,27 +78,8 @@ struct ModelHeader {
     uint32_t nRotateRows = 0u;
     uint32_t nRotateColumns = 0u;
 
-
-    struct EndPoint {
-        /**
-         * if scale factor is different then pased into infer , network might need to be requantized
-         */
-        float scaleFactor = 0.f;
-        /**
-         * Offset in bytes of pointer descriptor
-         */
-        uint64_t descriptor_offset = 0ull;
-        /**
-         * Endpoint resolution in bytes.
-         */
-        uint32_t element_size = 0u;
-        /**
-         * Number of elements
-         */
-        uint32_t elements_count = 0u;
-    };
-    EndPoint input;
-    EndPoint output;
+    uint32_t nInputs = 0u;
+    uint32_t nOutputs = 0u;
 
     /**
      * Reserved Data might be here
@@ -127,15 +112,23 @@ class GNAModelSerial {
          * Number of elements
          */
         uint32_t elements_count = 0;
+        /**
+         * Offset in bytes of pointer descriptor
+        */
+        uint64_t descriptor_offset = 0ull;
+
+        intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
 
         RuntimeEndPoint() = default;
         RuntimeEndPoint(double scaleFactor,
                     void* descriptor_ptr,
                     uint32_t element_size,
-                    uint32_t elements_count) : scaleFactor(scaleFactor),
+                    uint32_t elements_count,
+                    intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
                                     descriptor_ptr(descriptor_ptr),
                                     element_size(element_size),
-                                    elements_count(elements_count) {
+                                    elements_count(elements_count),
+                                    orientation(orientation) {
         }
     };
     using MemoryType = std::vector<std::pair<void*, uint32_t>>;
@@ -146,11 +139,23 @@ private:
 #else
     intel_nnet_type_t *ptr_nnet;
 #endif
-    RuntimeEndPoint input, output;
+    std::vector<RuntimeEndPoint> inputs;
+    std::vector<RuntimeEndPoint> outputs;
     uint32_t nRotateRows = 0;
     uint32_t nRotateColumns = 0;
 
     MemoryType states, *pstates = nullptr;
+    ModelHeader modelHeader;
+
+    void ImportInputs(std::istream &is,
+            void* basePtr,
+            std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
+            InferenceEngine::InputsDataMap& dataMap);
+
+    void ImportOutputs(std::istream &is,
+            void* basePtr,
+            std::vector<GNAPluginNS::OutputDesc> &desc,
+            InferenceEngine::OutputsDataMap& dataMap);
 
  public:
 #if GNA_LIB_VER == 2
@@ -160,8 +165,12 @@ private:
 
     GNAModelSerial(
         Gna2Model * model,
-        RuntimeEndPoint input,
-        RuntimeEndPoint output) : gna2Model(model), input(input), output(output) {
+        const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
+        const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
+        const InferenceEngine::InputsDataMap& inputsDataMap,
+        const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model),
+            inputs(serializeInputs(inputsDataMap, inputDesc)),
+            outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
     }
 
 #else
@@ -183,8 +192,12 @@ private:
       */
      GNAModelSerial(
          intel_nnet_type_t *ptr_nnet,
-         RuntimeEndPoint input,
-         RuntimeEndPoint output) : ptr_nnet(ptr_nnet), input(input), output(output) {
+         const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
+         const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
+         const InferenceEngine::InputsDataMap& inputsDataMap,
+         const InferenceEngine::OutputsDataMap& outputsDataMap) : ptr_nnet(ptr_nnet),
+                                                                  inputs(serializeInputs(inputsDataMap, inputDesc)),
+                                                                  outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
      }
 #endif
 
@@ -219,7 +232,13 @@ private:
      * @param basePointer
      * @param is - stream without header structure - TBD heder might be needed
      */
-    void Import(void *basePointer, size_t gnaGraphSize, std::istream &is);
+    void Import(void *basePointer,
+                                size_t gnaGraphSize,
+                                std::istream & is,
+                                std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
+                                std::vector<GNAPluginNS::OutputDesc> &desc,
+                                InferenceEngine::InputsDataMap& inputsDataMap,
+                                InferenceEngine::OutputsDataMap& outputsDataMap);
 
     /**
      * save gna graph to an outpus stream
@@ -231,4 +250,13 @@ private:
     void Export(void *basePtr,
                 size_t gnaGraphSize,
                 std::ostream &os) const;
+
+    static std::vector<GNAModelSerial::RuntimeEndPoint> serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
+            const std::vector<GNAPluginNS::OutputDesc>& outputsDesc);
+
+
+    static std::vector<GNAModelSerial::RuntimeEndPoint> serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
+                                                                        const std::shared_ptr<GNAPluginNS::InputDesc>);
+
+    void setHeader(ModelHeader header);
 };
index 0e19052..97e7740 100644 (file)
@@ -1140,13 +1140,15 @@ InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(const std::str
 #else
     auto serial = GNAModelSerial(&std::get<0>(nnets.back())->obj, mt);
 #endif
-    serial.Import(basePtr, header.gnaMemSize, inputStream);
 
-    inputsDesc->getPtrInputsGlobal("input").push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.input.descriptor_offset));
-    // TODO: import of multioutput network not supported
-    outputsDesc.resize(1);
-    auto &outputDesc = outputsDesc.front();
-    outputDesc.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.output.descriptor_offset));
+    serial.setHeader(header);
+    serial.Import(basePtr,
+            header.gnaMemSize,
+            inputStream,
+            inputsDesc,
+            outputsDesc,
+            inputsDataMap,
+            outputsDataMap);
 
 #if GNA_LIB_VER == 2
     auto getOrientation = [](Gna2Operation & gnaOperation) {
@@ -1160,32 +1162,10 @@ InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(const std::str
     };
 #endif
 
-#if GNA_LIB_VER == 2
-    inputsDesc->orientation_in["input"] = getOrientation(std::get<0>(gnaModels.back())->obj.Operations[0]);
-    outputDesc.orientation = getOrientation(std::get<0>(gnaModels.back())->obj.Operations[std::get<0>(gnaModels.back())->obj.NumberOfOperations - 1]);
-#else
+#if GNA_LIB_VER == 1
     inputsDesc->orientation_in["input"] = getOrientation(std::get<0>(nnets.back())->obj.pLayers[0]);
-    outputDesc.orientation = getOrientation(std::get<0>(nnets.back())->obj.pLayers[std::get<0>(nnets.back())->obj.nLayers - 1]);
+    outputsDesc[0].orientation = getOrientation(std::get<0>(nnets.back())->obj.pLayers[std::get<0>(nnets.back())->obj.nLayers - 1]);
 #endif
-    outputDesc.num_bytes_per_element = header.output.element_size;
-
-    auto outputDims = SizeVector({header.nGroup, header.output.elements_count / header.nGroup});
-    auto inputDims = SizeVector({header.nGroup, header.input.elements_count / header.nGroup});
-
-    inputsDataMap["input"] = std::make_shared<InputInfo>();
-    inputsDataMap["input"]->setInputData(make_shared<Data>("input",
-                                                           TensorDesc(
-                                                                   Precision::FP32,
-                                                                   inputDims,
-                                                                   Layout::NC)));
-    outputsDataMap["output"] = make_shared<Data>("output",
-                                                 TensorDesc(
-                                                         Precision::FP32,
-                                                         outputDims,
-                                                         Layout::NC));
-
-    outputDesc.scale_factor = header.output.scaleFactor;
-    inputsDesc->inputScaleFactors.push_back(header.input.scaleFactor);
 
     num_rotate_rows = header.nRotateRows;
     num_rotate_columns = header.nRotateColumns;
@@ -1214,9 +1194,11 @@ void GNAPlugin::Export(const std::string &fileName) {
         THROW_GNA_EXCEPTION << " network not loaded";
     }
 
+#if GNA_LIB_VER == 1
     if (inputsDesc->ptr_inputs_global_id.size() != 1) {
         THROW_GNA_EXCEPTION << " exporting network with multiple inputs not supported";
     }
+#endif
 
     std::fstream outStream(fileName, ios_base::out | ios_base::binary);
 
@@ -1229,19 +1211,16 @@ void GNAPlugin::Export(const std::string &fileName) {
 #endif
     }
 #if GNA_LIB_VER == 2
-    auto serial = GNAModelSerial(&std::get<0>(gnaModels.front())->obj,
+    Gna2Model* modelToSerial = &std::get<0>(gnaModels.front())->obj;
 #else
-    auto serial = GNAModelSerial(&std::get<0>(nnets.front())->obj,
+    intel_nnet_type_t* modelToSerial = &std::get<0>(nnets.front())->obj;
 #endif
-                   {inputsDesc->inputScaleFactors.front(),
-                    inputsDesc->ptr_inputs_global_storage.front()[0],
-                    2,
-                    static_cast<uint32_t>(InferenceEngine::details::product(inputsDataMap.begin()->second->getTensorDesc().getDims()))},
-                   {outputsDesc.front().scale_factor,
-                    outputsDesc.front().ptrs.front(),
-                    outputsDesc.front().num_bytes_per_element,
-                    static_cast<uint32_t>(InferenceEngine::details::product(outputsDataMap.begin()->second->getTensorDesc().getDims()))})
-        .SetInputRotation(dnn->num_rotate_rows, dnn->num_rotate_columns);
+    auto serial = GNAModelSerial(modelToSerial,
+                                 inputsDesc,
+                                 outputsDesc,
+                                 inputsDataMap,
+                                 outputsDataMap)
+                    .SetInputRotation(dnn->num_rotate_rows, dnn->num_rotate_columns);
 
     for (auto && memoryConnection : graphCompiler.memory_connection) {
         serial.AddState(memoryConnection.second.gna_ptr, memoryConnection.second.reserved_size);
index 23b9622..3dd1cfb 100644 (file)
@@ -35,12 +35,12 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc(
     uint32_t sizeRequested,
     uint32_t *sizeGranted,
     void **memoryAddress) {
-    if (current != nullptr) {
-        return current->Gna2MemoryAlloc(sizeRequested, sizeGranted, memoryAddress);
-    }
     if (sizeGranted != nullptr) {
         *sizeGranted = sizeRequested;
     }
+    if (current != nullptr) {
+        return current->Gna2MemoryAlloc(sizeRequested, sizeGranted, memoryAddress);
+    }
     *memoryAddress = reinterpret_cast<void*>(1);
     return Gna2StatusSuccess;
 }
index 772c909..2fd699c 100644 (file)
@@ -68,6 +68,42 @@ TEST_F(GNAAOTTests, DISABLED_AffineWith2AffineOutputs_canbe_imported_verify_stru
 
 }
 
+TEST_F(GNAAOTTests, TwoInputsModel_canbe_export_imported) {
+#if GNA_LIB_VER == 1
+    GTEST_SKIP();
+#endif
+
+    const std::string X = registerFileForRemove("unit_tests.bin");
+
+    // running export to a file
+    export_network(TwoInputsModelForIO())
+            .inNotCompactMode()
+            .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), 1.0f)
+            .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_1"), 1.0f)
+            .as().gna().model().to(X);
+
+    // running infer using imported model instead of IR
+    assert_that().onInferModel().importedFrom(X)
+            .inNotCompactMode().gna().propagate_forward().called().once();
+}
+
+TEST_F(GNAAOTTests, PermuteModel_canbe_export_imported) {
+
+#if GNA_LIB_VER == 1
+    GTEST_SKIP();
+#endif
+
+    const std::string X = registerFileForRemove("unit_tests.bin");
+
+    // running export to a file
+    export_network(PermuteModelForIO())
+            .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).as().gna().model().to(X);
+
+    // running infer using imported model instead of IR
+    assert_that().onInferModel().importedFrom(X)
+            .inNotCompactMode().gna().propagate_forward().called().once();
+}
+
 TEST_F(GNAAOTTests, CanConvertFromAOTtoSueModel) {
 
 #if GNA_LIB_VER == 2
index 758df6d..378d71d 100644 (file)
@@ -459,6 +459,17 @@ void GNAPluginAOTMatcher :: match() {
 #if GNA_LIB_VER == 1 // TODO: GNA2: handle new API
     EXPECT_CALL(mockApi, GNAAlloc(_,_,_)).WillOnce(DoAll(SetArgPointee<2>(10000), Return(&data.front())));
     EXPECT_CALL(mockApi, GNADeviceOpenSetThreads(_, _)).WillOnce(Return(1));
+#else
+    EXPECT_CALL(mockApi, Gna2MemoryAlloc(_, _, _)).WillOnce(Invoke([&data](
+            uint32_t sizeRequested,
+            uint32_t *sizeGranted,
+            void **memoryAddress
+    ) {
+        data.resize(sizeRequested);
+        *sizeGranted = sizeRequested;
+        *memoryAddress = &data.front();
+        return Gna2StatusSuccess;
+    }));
 #endif
     plugin.LoadNetwork(network);
     plugin.Export(_env.exportedModelFileName);
index f34fe26..818def5 100644 (file)
@@ -9697,4 +9697,156 @@ std::string EltwiseAfterSplitModel(int tensor_size, bool bMul) {
 
     return ir;
 }
+
+std::string TwoInputsModelForIO() {
+    return R"V0G0N(
+<?xml version="1.0" ?>
+<net name="multiInputs2" version="7">
+       <layers>
+               <layer id="0" name="Placeholder" type="Input">
+                       <output>
+                               <port id="0" precision="FP32">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="Placeholder_1" type="Input">
+                       <output>
+                               <port id="0" precision="FP32">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="2" name="Add" type="Eltwise">
+                       <data operation="sum"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2" precision="FP32">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="3" name="Layer_output" type="Activation">
+                       <data type="tanh"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1" precision="FP32">
+                                       <dim>1</dim>
+                                       <dim>32</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+               <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
+               <edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
+       </edges>
+</net>
+    )V0G0N";
+}
+
+std::string PermuteModelForIO() {
+    return R"V0G0N(
+<?xml version="1.0" ?>
+<net name="permute" version="7">
+       <layers>
+               <layer id="0" name="Placeholder" type="Input" version="opset1">
+                       <output>
+                               <port id="0" precision="FP32">
+                                       <dim>1</dim>
+                                       <dim>640</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="1" name="Reshape/Cast_1238_const" type="Const" version="opset1">
+                       <output>
+                               <port id="1" precision="I32">
+                                       <dim>3</dim>
+                               </port>
+                       </output>
+                       <blobs>
+                               <custom offset="0" precision="I32" size="12"/>
+                       </blobs>
+               </layer>
+               <layer id="2" name="Reshape" type="Reshape" version="opset1">
+                       <data special_zero="False"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>640</dim>
+                               </port>
+                               <port id="1">
+                                       <dim>3</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2" precision="FP32">
+                                       <dim>1</dim>
+                                       <dim>160</dim>
+                                       <dim>4</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="3" name="transpose" type="Permute" version="opset1">
+                       <data order="0,2,1"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>160</dim>
+                                       <dim>4</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="1" precision="FP32">
+                                       <dim>1</dim>
+                                       <dim>4</dim>
+                                       <dim>160</dim>
+                               </port>
+                       </output>
+               </layer>
+               <layer id="5" name="Layer_output" type="Reshape" version="opset1">
+                       <data special_zero="False"/>
+                       <input>
+                               <port id="0">
+                                       <dim>1</dim>
+                                       <dim>4</dim>
+                                       <dim>160</dim>
+                               </port>
+                       </input>
+                       <output>
+                               <port id="2" precision="FP32">
+                                       <dim>1</dim>
+                                       <dim>640</dim>
+                               </port>
+                       </output>
+               </layer>
+       </layers>
+       <edges>
+               <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+               <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+               <edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
+               <edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
+       </edges>
+</net>
+    )V0G0N";
+}
+
 }  // namespace GNATestIRs
index bd0c995..5fa4562 100644 (file)
@@ -112,4 +112,7 @@ std::string SplitToConcatWith2By64InputsAlignedNoFCWithOutCopy();
 std::string SplitToConcatWith3By512InputsWithOutCopy();
 
 std::string ReshapeConvolutionLessThan48Filters();
+
+std::string TwoInputsModelForIO();
+std::string PermuteModelForIO();
 }  // namespace GNATestIRs