1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
12 #include "descriptions/gna_input_desc.hpp"
13 #include "descriptions/gna_output_desc.hpp"
14 #include "gna_plugin_log.hpp"
16 #include "gna2-model-api.h"
24 * 1.1 - added memory information
25 * 2.0 - for use with GNA2 library
26 * 2.1 - multiple i/o support
29 #define HEADER_MAJOR 2
30 #define HEADER_MINOR 1
32 #define HEADER_MAJOR 1
33 #define HEADER_MINOR 2
38 * @brief Header version 2.1
42 *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
46 * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
47 * usually it is an indicator of working with version of model different that is current export function produce
49 uint32_t headerSize = 0u;
52 * @details Version of format Major – unsigned int, ex: 0x0001
53 * every change in the header or in the layers definition should be reflected in version change
54 * for backward compatibility new parsers can read old versions of model with certain restrictions
58 * @details Version of Format Minor – unsigned int, corresponding to build revision for example
59 * changes in minor version are not affected layout of model
64 * @brief Memory required to be allocated using GNAAlloc()
66 uint64_t gnaMemSize = 0ull;
68 * @brief Number of GNA Layers
70 uint64_t layersCount = 0ull;
72 * @brief Grouping level
76 * Convolution related setting - they are affecting input transformation
78 uint32_t nRotateRows = 0u;
79 uint32_t nRotateColumns = 0u;
81 uint32_t nInputs = 0u;
82 uint32_t nOutputs = 0u;
85 * Reserved Data might be here
91 * @brief implements serialisation tasks for GNAGraph
93 class GNAModelSerial {
96 * In runtime endpoint mostly same as in serial version, except of descriptor field
98 struct RuntimeEndPoint {
100 * if scale factor is different then pased into infer , network might need to be requantized
102 float scaleFactor = 0;
106 void* descriptor_ptr = nullptr;
108 * Endpoint resolution in bytes.
110 uint32_t element_size = 0;
114 uint32_t elements_count = 0;
116 * Offset in bytes of pointer descriptor
118 uint64_t descriptor_offset = 0ull;
120 intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
122 RuntimeEndPoint() = default;
123 RuntimeEndPoint(double scaleFactor,
124 void* descriptor_ptr,
125 uint32_t element_size,
126 uint32_t elements_count,
127 intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
128 descriptor_ptr(descriptor_ptr),
129 element_size(element_size),
130 elements_count(elements_count),
131 orientation(orientation) {
134 using MemoryType = std::vector<std::pair<void*, uint32_t>>;
138 Gna2Model * gna2Model;
140 intel_nnet_type_t *ptr_nnet;
142 std::vector<RuntimeEndPoint> inputs;
143 std::vector<RuntimeEndPoint> outputs;
144 uint32_t nRotateRows = 0;
145 uint32_t nRotateColumns = 0;
147 MemoryType states, *pstates = nullptr;
148 ModelHeader modelHeader;
150 void ImportInputs(std::istream &is,
152 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
153 InferenceEngine::InputsDataMap& dataMap);
155 void ImportOutputs(std::istream &is,
157 std::vector<GNAPluginNS::OutputDesc> &desc,
158 InferenceEngine::OutputsDataMap& dataMap);
162 GNAModelSerial(Gna2Model * model, MemoryType & states_holder)
163 : gna2Model(model), pstates(&states_holder) {
168 const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
169 const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
170 const InferenceEngine::InputsDataMap& inputsDataMap,
171 const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model),
172 inputs(serializeInputs(inputsDataMap, inputDesc)),
173 outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
179 * @brief Used for import/export
181 * @param inputScale - in/out parameter representing input scale factor
182 * @param outputScale - in/out parameter representing output scale factor
184 GNAModelSerial(intel_nnet_type_t *ptr_nnet, MemoryType &states_holder)
185 : ptr_nnet(ptr_nnet), pstates(&states_holder) {
189 * @brief used for export only since runtime params are not passed by pointer
194 intel_nnet_type_t *ptr_nnet,
195 const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
196 const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
197 const InferenceEngine::InputsDataMap& inputsDataMap,
198 const InferenceEngine::OutputsDataMap& outputsDataMap) : ptr_nnet(ptr_nnet),
199 inputs(serializeInputs(inputsDataMap, inputDesc)),
200 outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
204 GNAModelSerial & SetInputRotation(uint32_t nRotateRows, uint32_t nRotateColumns) {
205 this->nRotateColumns = nRotateColumns;
206 this->nRotateRows = nRotateRows;
211 * mark certain part of gna_blob as state (in future naming is possible)
212 * @param descriptor_ptr
216 GNAModelSerial & AddState(void* descriptor_ptr, size_t size) {
217 states.emplace_back(descriptor_ptr, size);
222 * @brief calculate memory required for import gna graph
223 * @param is - opened input stream
226 static ModelHeader ReadHeader(std::istream &is);
229 * @brief Import model from FS into preallocated buffer,
230 * buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
233 * @param is - stream without header structure - TBD heder might be needed
235 void Import(void *basePointer,
238 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
239 std::vector<GNAPluginNS::OutputDesc> &desc,
240 InferenceEngine::InputsDataMap& inputsDataMap,
241 InferenceEngine::OutputsDataMap& outputsDataMap);
244 * save gna graph to an outpus stream
247 * @param gnaGraphSize
250 void Export(void *basePtr,
252 std::ostream &os) const;
254 static std::vector<GNAModelSerial::RuntimeEndPoint> serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
255 const std::vector<GNAPluginNS::OutputDesc>& outputsDesc);
258 static std::vector<GNAModelSerial::RuntimeEndPoint> serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
259 const std::shared_ptr<GNAPluginNS::InputDesc>);
261 void setHeader(ModelHeader header);