1 // Copyright (C) 2018-2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
12 #include "descriptions/gna_input_desc.hpp"
13 #include "descriptions/gna_output_desc.hpp"
14 #include "gna_plugin_log.hpp"
16 #include "gna2-model-api.h"
24 * 1.1 - added memory information
25 * 2.0 - for use with GNA2 library
26 * 2.1 - multiple i/o support
29 #define HEADER_MAJOR 2
30 #define HEADER_MINOR 1
32 #define HEADER_MAJOR 1
33 #define HEADER_MINOR 2
38 * @brief Header version 2.1
42 *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
46 * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
47 * usually it is an indicator of working with version of model different that is current export function produce
49 uint32_t headerSize = 0u;
52 * @details Version of format Major – unsigned int, ex: 0x0001
53 * every change in the header or in the layers definition should be reflected in version change
54 * for backward compatibility new parsers can read old versions of model with certain restrictions
58 * @details Version of Format Minor – unsigned int, corresponding to build revision for example
59 * changes in minor version are not affected layout of model
64 * @brief Memory required to be allocated using GNAAlloc()
66 uint64_t gnaMemSize = 0ull;
68 * @brief Number of GNA Layers
70 uint64_t layersCount = 0ull;
72 * @brief Grouping level
76 * Convolution related setting - they are affecting input transformation
78 uint32_t nRotateRows = 0u;
79 uint32_t nRotateColumns = 0u;
80 bool doRotateInput = false;
82 uint32_t nInputs = 0u;
83 uint32_t nOutputs = 0u;
86 * Reserved Data might be here
92 * @brief implements serialisation tasks for GNAGraph
94 class GNAModelSerial {
97 * In runtime endpoint mostly same as in serial version, except of descriptor field
99 struct RuntimeEndPoint {
101 * if scale factor is different then pased into infer , network might need to be requantized
103 float scaleFactor = 0;
107 void* descriptor_ptr = nullptr;
109 * Endpoint resolution in bytes.
111 uint32_t element_size = 0;
115 uint32_t elements_count = 0;
117 * Offset in bytes of pointer descriptor
119 uint64_t descriptor_offset = 0ull;
121 intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
123 RuntimeEndPoint() = default;
124 RuntimeEndPoint(double scaleFactor,
125 void* descriptor_ptr,
126 uint32_t element_size,
127 uint32_t elements_count,
128 intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
129 descriptor_ptr(descriptor_ptr),
130 element_size(element_size),
131 elements_count(elements_count),
132 orientation(orientation) {
135 using MemoryType = std::vector<std::pair<void*, uint32_t>>;
139 Gna2Model * gna2Model;
141 intel_nnet_type_t *ptr_nnet;
143 std::vector<RuntimeEndPoint> inputs;
144 std::vector<RuntimeEndPoint> outputs;
145 uint32_t nRotateRows = 0;
146 uint32_t nRotateColumns = 0;
147 bool doRotateInput = false;
149 MemoryType states, *pstates = nullptr;
150 ModelHeader modelHeader;
152 void ImportInputs(std::istream &is,
154 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
155 InferenceEngine::InputsDataMap& dataMap);
157 void ImportOutputs(std::istream &is,
159 std::vector<GNAPluginNS::OutputDesc> &desc,
160 InferenceEngine::OutputsDataMap& dataMap);
164 GNAModelSerial(Gna2Model * model, MemoryType & states_holder)
165 : gna2Model(model), pstates(&states_holder) {
170 const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
171 const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
172 const InferenceEngine::InputsDataMap& inputsDataMap,
173 const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model),
174 inputs(serializeInputs(inputsDataMap, inputDesc)),
175 outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
181 * @brief Used for import/export
183 * @param inputScale - in/out parameter representing input scale factor
184 * @param outputScale - in/out parameter representing output scale factor
186 GNAModelSerial(intel_nnet_type_t *ptr_nnet, MemoryType &states_holder)
187 : ptr_nnet(ptr_nnet), pstates(&states_holder) {
191 * @brief used for export only since runtime params are not passed by pointer
196 intel_nnet_type_t *ptr_nnet,
197 const std::shared_ptr<GNAPluginNS::InputDesc> inputDesc,
198 const std::vector<GNAPluginNS::OutputDesc>& outputsDesc,
199 const InferenceEngine::InputsDataMap& inputsDataMap,
200 const InferenceEngine::OutputsDataMap& outputsDataMap) : ptr_nnet(ptr_nnet),
201 inputs(serializeInputs(inputsDataMap, inputDesc)),
202 outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
206 GNAModelSerial & SetInputRotation(uint32_t nRotateRows, uint32_t nRotateColumns, bool do_rotate_inputs) {
207 this->nRotateColumns = nRotateColumns;
208 this->nRotateRows = nRotateRows;
209 this->doRotateInput = do_rotate_inputs;
214 * mark certain part of gna_blob as state (in future naming is possible)
215 * @param descriptor_ptr
219 GNAModelSerial & AddState(void* descriptor_ptr, size_t size) {
220 states.emplace_back(descriptor_ptr, size);
225 * @brief calculate memory required for import gna graph
226 * @param is - opened input stream
229 static ModelHeader ReadHeader(std::istream &is);
232 * @brief Import model from FS into preallocated buffer,
233 * buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
236 * @param is - stream without header structure - TBD heder might be needed
238 void Import(void *basePointer,
241 std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
242 std::vector<GNAPluginNS::OutputDesc> &desc,
243 InferenceEngine::InputsDataMap& inputsDataMap,
244 InferenceEngine::OutputsDataMap& outputsDataMap);
247 * save gna graph to an outpus stream
250 * @param gnaGraphSize
253 void Export(void *basePtr,
255 std::ostream &os) const;
257 static std::vector<GNAModelSerial::RuntimeEndPoint> serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
258 const std::vector<GNAPluginNS::OutputDesc>& outputsDesc);
261 static std::vector<GNAModelSerial::RuntimeEndPoint> serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
262 const std::shared_ptr<GNAPluginNS::InputDesc>);
264 void setHeader(ModelHeader header);