1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
6 #include "cpp_interfaces/base/ie_plugin_base.hpp"
7 #include "gna_plugin.hpp"
8 #include "ie_plugin_config.hpp"
10 #include "blob_factory.hpp"
11 #include "gna_plugin_log.hpp"
12 #include "gna_layer_info.hpp"
15 #include "ie_memcpy.h"
18 void ExportGnaNetworkAndrzej(const char *ptr_name, intel_nnet_type_t* pNeuralNetwork);
34 #include <unordered_map>
35 #include <unordered_set>
37 #include <dnn_memory.hpp>
38 #include <ie_layers.h>
39 #include "details/caseless.hpp"
40 #include <gna-api-types-xnn.h>
42 #include "gna-api-dumper.h"
46 #include "quantization/quantization.h"
48 #include "graph_tools.hpp"
49 #include "gna_plugin_config.hpp"
50 #include "gna/gna_config.hpp"
51 #include "quantization/model_quantizer.hpp"
52 #include "gna_model_serial.hpp"
53 #include "gna_memory_state.hpp"
54 #include "details/ie_cnn_network_tools.h"
56 using namespace InferenceEngine;
58 using namespace GNAPluginNS;
59 using namespace InferenceEngine::details;
62 #define VERBOSE_LEVEL (1)
64 #define VERBOSE_LEVEL (0)
68 #define PLOT_LEVEL (1)
70 #define PLOT_LEVEL (0)
74 #define PAGE_SIZE_BYTES 4096
76 #define FROM_IR_DIM(mem, idx)\
77 ((mem->dims.size() > idx - 1) ? mem->dims[idx - 1] : 1)
79 inline int16_t GNAPluginNS::ConvertFloatToInt16(float src) {
80 float rounding_value = (src > 0) ? 0.5f : -0.5f;
81 float value = src + rounding_value;
82 if (value > 32767.0) {
84 } else if (value < -32768.0) {
87 return (int16_t)value;
90 void GNAPluginNS::ConvertToInt16(int16_t *ptr_dst,
92 const uint32_t num_rows,
93 const uint32_t num_columns,
94 const float scale_factor) {
95 if (!ptr_dst || !ptr_src) {
98 for (uint32_t i = 0; i < num_rows*num_columns; i++) {
99 ptr_dst[i] = GNAPluginNS::ConvertFloatToInt16(ptr_src[i]*scale_factor);
102 void GNAPluginNS::ConvertToFloat(float *ptr_dst,
104 const uint32_t num_rows,
105 const uint32_t num_columns,
106 const float scale_factor) {
107 if (!ptr_dst || !ptr_src) {
110 for (uint32_t i = 0; i < num_rows; i++) {
111 int32_t *ptr_int_row = ptr_src + i * num_columns;
112 float *ptr_float_row = ptr_dst + i * num_columns;
113 for (uint32_t j = 0; j < num_columns; j++) {
114 ptr_float_row[j] = static_cast<float>(ptr_int_row[j]) / scale_factor;
119 template <typename T, typename U>
120 void GNAPlugin::copyInputData(T *dst,
124 uint32_t num_vector_elements,
125 uint32_t num_vector_stride,
126 intel_dnn_orientation_t orientation) {
130 if (orientation == kDnnInterleavedOrientation) {
131 for (uint32_t i = 0; i < num_frames; i++) {
132 for (uint32_t j = 0; j < num_vector_elements; j++) {
133 if (!std::is_same<T, U>::value) {
134 dst[j * num_group + i] = GNAPluginNS::ConvertFloatToInt16(src[i * num_vector_elements + j] * get_input_scale_factor());
136 dst[j * num_group + i] = src[i * num_vector_elements + j];
139 // pad to meet weight matrix row length requirement
140 for (uint32_t j = num_vector_elements; j < num_vector_stride; j++) {
141 dst[j * num_group + i] = 0;
145 for (uint32_t i = num_frames; i < num_group; i++) {
146 for (uint32_t j = 0; j < num_vector_stride; j++) {
147 dst[j * num_group + i] = 0;
151 if (!std::is_same<T, U>::value) {
152 for (uint32_t i = 0; i < num_frames; i++) {
153 T *ptr_dst_vec = const_cast<T *>(reinterpret_cast<const T *>(dst) + i * num_vector_stride);
154 U *ptr_src_vec = const_cast<U *>(reinterpret_cast<const U *>(src) + i * num_vector_elements);
155 std::memset(ptr_dst_vec, 0, num_vector_stride * sizeof(T));
156 for (int j=0; j < num_vector_elements; j++) {
157 ptr_dst_vec[j] = GNAPluginNS::ConvertFloatToInt16(ptr_src_vec[j] * get_input_scale_factor());
162 for (uint32_t i = 0; i < num_frames; i++) {
163 void *ptr_dst_vec = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(dst) + i * num_vector_stride * sizeof(T));
164 void *ptr_src_vec = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(src) + i * num_vector_elements * sizeof(U));
165 std::memset(ptr_dst_vec, 0, num_vector_stride * sizeof(T));
166 std::memcpy(ptr_dst_vec, ptr_src_vec, num_vector_elements * sizeof(T));
170 for (uint32_t i = num_frames; i < num_group; i++) {
171 void *ptr_dst_vec = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(dst) + i * num_vector_stride * sizeof(T));
172 std::memset(ptr_dst_vec, 0, num_vector_stride * sizeof(T));
177 template <typename T, typename U>
178 void GNAPlugin::copyInputDataWithSplit(T *const dst,
180 const GNASplitLayer& splitInfo,
181 size_t precision_size) {
186 const U *src_ptr = src;
187 precision_size = sizeof(T);
188 // we found split/slice layer connected to Input
189 for (auto&& outputLayer : splitInfo.splitOutputLayers) {
190 uint32_t begin = outputLayer.offset/precision_size;
191 uint32_t end = (outputLayer.offset + outputLayer.pure_size)/precision_size;
192 if (dst_ptr - dst >= end) {
193 // output layer with bind pointer as previous one. Skip
196 for (uint32_t i = begin; i < end; ++i) {
197 if (!std::is_same<T, U>::value) {
198 *(dst_ptr++) = GNAPluginNS::ConvertFloatToInt16(*(src_ptr++) * get_input_scale_factor());
200 *(dst_ptr++) = *(src_ptr++);
204 end = (outputLayer.offset + ALIGN64(outputLayer.pure_size))/precision_size;
205 std::memset(dst_ptr, 0, (end - begin )* sizeof(uint16_t));
206 dst_ptr += end - begin;
210 void GNAPlugin::ExportScores(void *ptr_dst,
212 intel_dnn_orientation_t orientation,
215 uint32_t num_vector_elements,
216 uint32_t num_active_elements,
217 uint32_t num_vector_stride,
218 uint32_t num_bytes_per_element_input,
219 uint32_t num_bytes_per_element) {
220 // source scores are possibly padded to multiple of 8 and possibly interleaved
221 // rotate if necessary and only copy actual scores (not padding)
222 if (orientation == kDnnInterleavedOrientation) {
223 if (num_bytes_per_element == 2) {
224 int16_t *dst = reinterpret_cast<int16_t *>(ptr_dst);
225 int16_t *src = reinterpret_cast<int16_t *>(ptr_src);
226 for (uint32_t i = 0; i < num_frames; i++) {
227 for (uint32_t j = 0; j < num_active_elements; j++) {
228 dst[i * num_vector_elements + j] = src[j * num_group + i];
230 for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
231 dst[i * num_vector_elements + j] = 0;
234 } else if (num_bytes_per_element == 4) { // should work for both int and float
235 int32_t *dst = reinterpret_cast<int32_t *>(ptr_dst);
236 int8_t *src = reinterpret_cast<int8_t*>(ptr_src);
237 for (uint32_t i = 0; i < num_frames; i++) {
238 for (uint32_t j = 0; j < num_active_elements; j++) {
239 auto input_ptr = src + (j * num_group + i) * num_bytes_per_element_input;
240 auto dst_ptr = dst + (i * num_vector_elements + j);
242 switch (num_bytes_per_element_input) {
244 *dst_ptr = static_cast<int32_t>(*reinterpret_cast<int16_t*>(input_ptr));
248 *dst_ptr = *reinterpret_cast<int32_t*>(input_ptr);
252 THROW_GNA_EXCEPTION << "Unsupported output layer precision: " << num_bytes_per_element_input << "bytes";
255 for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
256 dst[i * num_vector_elements + j] = 0;
260 THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << num_bytes_per_element << "bytes";
263 if (num_bytes_per_element == 2) {
264 for (uint32_t i = 0; i < num_frames; i++) {
265 void *ptr_dst_vec = reinterpret_cast<void *> (reinterpret_cast<uint8_t *>(ptr_dst) + i * num_vector_elements * sizeof(int16_t));
266 void *ptr_src_vec = reinterpret_cast<void *> (reinterpret_cast<uint8_t *>(ptr_src) + i * num_vector_stride * sizeof(int16_t));
267 memset(ptr_dst_vec, 0, num_vector_elements * sizeof(int16_t));
268 memcpy(ptr_dst_vec, ptr_src_vec, num_active_elements * sizeof(int16_t));
270 } else if (num_bytes_per_element == 4) { // should work for both int and float
271 for (uint32_t i = 0; i < num_frames; i++) {
272 void *ptr_dst_vec = reinterpret_cast<void *> (reinterpret_cast<uint8_t *>(ptr_dst) + i * num_vector_elements * sizeof(float));
273 void *ptr_src_vec = reinterpret_cast<void *> (reinterpret_cast<uint8_t *>(ptr_src) + i * num_vector_stride * sizeof(float));
274 memset(ptr_dst_vec, 0, num_vector_elements * sizeof(float));
275 memcpy(ptr_dst_vec, ptr_src_vec, num_active_elements * sizeof(float));
278 THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << num_bytes_per_element << "bytes";
283 void GNAPlugin::ImportFrames(
286 Precision input_precision,
287 intel_dnn_orientation_t orientation,
290 uint32_t num_vector_elements,
291 uint32_t num_vector_stride) {
292 if (orientation == kDnnInterleavedOrientation) {
293 // TODO : fix that as well
294 if (input_precision == Precision::U8) {
295 int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
296 uint8_t *src = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(ptr_src));
297 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
298 } else if (input_precision.size() == 2) {
299 int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
300 int16_t *src = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_src));
301 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
302 } else if (input_precision.size() == 4) {
304 float *dst = const_cast<float *>(reinterpret_cast<const float *>(ptr_dst));
305 float *src = const_cast<float *>(reinterpret_cast<const float *>(ptr_src));
306 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
308 int16_t *dst = reinterpret_cast<int16_t *>(ptr_dst);
309 const float *src = reinterpret_cast<const float *>(ptr_src);
310 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
314 if (input_precision == Precision::U8) {
315 uint8_t *src = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(ptr_src));
317 float *dst = const_cast<float *>(reinterpret_cast<const float *>(ptr_dst));
318 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
320 int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
321 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
324 } else if (input_precision.size()== 2) {
325 int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
326 int16_t *src = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_src));
327 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
328 } else if (input_precision.size() == 4) {
330 float *dst = const_cast<float *>(reinterpret_cast<const float *>(ptr_dst));
331 float *src = const_cast<float *>(reinterpret_cast<const float *>(ptr_src));
332 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
334 uint16_t *dst = const_cast<uint16_t *>(reinterpret_cast<const uint16_t *>(ptr_dst));
335 float *src = const_cast<float *>(reinterpret_cast<const float *>(ptr_src));
336 copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
342 void GNAPlugin::fillMemoryConnections(std::unordered_map<std::string,
343 std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs) {
344 for (auto &memory : memoryPairs) {
345 auto inputLayer = memory.second[1];
346 auto outputLayer = memory.second[0];
348 IE_ASSERT(1 == outputLayer->insData.size());
350 // creating connection for layers output as form of extramap
351 memory_connection.emplace_back(memory.first, GNAMemoryLayer(inputLayer, outputLayer));
355 void GNAPlugin::fillConcatConnections(InferenceEngine::CNNLayerPtr layer) {
356 // creating connection for each layer outputs as form of extramap
357 GNAPlugin::GNAConcatLayer layerInfoItem(layer);
358 size_t concat_size = 0;
359 std::string& id = layer->name;
361 for (size_t i = 0; i < layer->insData.size(); ++i) {
362 auto dataInput = layer->insData[i].lock();
364 THROW_GNA_EXCEPTION << "Input layer pointer for concat is unexpectedly absent";
367 auto ptrConcatLayerInput = dataInput->creatorLayer.lock();
368 if (!ptrConcatLayerInput) {
369 THROW_GNA_EXCEPTION << "Input layer for concat is unexpectedly absent";
371 layerInfoItem.concatInputLayers.emplace_back(
372 GNAPlugin::GNAConcatLayer::ConcatConnectedLayerInfo({ptrConcatLayerInput->name, concat_size}));
375 InferenceEngine::details::product(begin(dataInput->dims),
376 end(dataInput->dims)) * dataInput->precision.size();
377 concat_size += layer_size;
379 layerInfoItem.reserved_size = concat_size;
380 concat_connection.emplace(id, layerInfoItem);
383 void GNAPlugin::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
384 // creating connection for each layer inputs as form of extramap
385 GNAPlugin::GNASplitLayer layerInfoItem(layer);
386 size_t split_size = 0;
387 std::string& id = layer->name;
388 auto dataInput = layer->insData.begin()->lock();
390 THROW_GNA_EXCEPTION << "Input layer pointer for split/slice is unexpectedly absent";
392 auto ptrSplitLayerInput = dataInput->creatorLayer.lock();
393 if (!ptrSplitLayerInput) {
394 THROW_GNA_EXCEPTION << "Input layer for split/slice is unexpectedly absent";
397 LayerInfo ptrSplitLayerInputLayerInfo(ptrSplitLayerInput);
398 for (size_t i = 0; i < layer->outData.size(); ++i) {
400 size_t output_layer_size = 0;
401 auto& dataOutput = layer->outData[i];
403 if (!dataOutput || !dataInput) {
404 THROW_GNA_EXCEPTION << "Output layer pointer for split/slice is unexpectedly absent";
407 for (auto&& ptrSplitLayerOutputPair : dataOutput->getInputTo()) {
408 auto& ptrSplitLayerOutput = ptrSplitLayerOutputPair.second;
409 if (!ptrSplitLayerOutput) {
410 THROW_GNA_EXCEPTION << "Output layer for split/slice is unexpectedly absent";
413 padding = std::max(padding, LayerInfo(ptrSplitLayerOutput).paddingSize())
414 * dataOutput->precision.size();
416 InferenceEngine::details::product(begin(dataOutput->dims),
417 end(dataOutput->dims)) * dataOutput->precision.size();
419 if (ptrSplitLayerOutput->type == "AffineFilter") {
420 size_t aligned64_offset = ptrSplitLayerOutput->GetParamAsInt("offset");
421 layerInfoItem.splitOutputLayers.emplace_back(ptrSplitLayerOutput->name, aligned64_offset, output_layer_size);
423 layerInfoItem.splitOutputLayers.emplace_back(ptrSplitLayerOutput->name, split_size, output_layer_size);
427 split_size += padding + output_layer_size;
429 layerInfoItem.reserved_size = split_size;
430 layerInfoItem.splitInputLayer =
431 GNAPlugin::GNASplitLayer::SplitConnectedLayerInfo({ptrSplitLayerInput->type, 0,
432 InferenceEngine::details::product(begin(dataInput->dims),
433 end(dataInput->dims)) * dataInput->precision.size()});
434 split_connection.emplace(id, layerInfoItem);
437 void GNAPlugin::DiagonalPrimitive(InferenceEngine::CNNLayerPtr layer) {
438 AffinePrimitive(layer, true);
441 void GNAPlugin::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) {
442 auto &convolution = dynamic_cast<ConvolutionLayer &>(*layer.get());
443 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
445 auto inputs = layer->insData.begin()->lock();
446 auto outputs = *layer->outData.begin();
448 uint32_t num_feature_map_rows = FROM_IR_DIM(inputs, 1) / convolution._stride_x;
449 uint32_t num_feature_map_columns = FROM_IR_DIM(inputs, 3) * convolution._stride_x / num_feature_maps;
451 uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
452 uint32_t num_columns_in = FROM_IR_DIM(inputs, 3);
453 uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
454 uint32_t num_padding = ALIGN(convolution._kernel_x * num_feature_map_columns * num_feature_maps, 8)
455 - convolution._kernel_x * num_feature_map_columns * num_feature_maps;
461 // TODO: questionable why for biases that are no in IR we inventing precision
462 auto biasPrecision = convolution._biases ? convolution._biases->precision() : outputs->precision;
464 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
465 auto ¤tComponent = dnnComponentsForLayer.back().second;
468 cout << "IR layer : " << std::left << std::setw(20) << layer->name << dnnComponentsForLayer.size() - 1 << "\n";
470 auto num_input_padding = ALIGN(num_feature_maps * num_feature_map_columns * num_feature_map_rows, 8)
471 - num_feature_maps * num_feature_map_columns * num_feature_map_rows;
472 auto num_filter_rows = convolution._kernel_x / convolution._stride_x;
473 dnn.InitConvolutional1DComponent(currentComponent,
475 num_feature_maps * num_feature_map_columns * num_feature_map_rows + num_input_padding,
477 num_rows_out * convolution._out_depth,
478 inputs->precision.size(),
479 outputs->precision.size(),
480 convolution._weights->precision().size(),
481 biasPrecision.size(),
482 convolution._out_depth,
484 num_feature_maps * num_feature_map_columns * num_filter_rows + num_padding,
486 num_feature_maps, // interesting - why this is so in gna_example
487 num_feature_map_rows,
488 num_feature_map_columns,
490 quantized == nullptr ? 1 : quantized->_weights_quant.scale,
491 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
497 // update num_feature_maps for next convolutional layer
498 num_feature_maps = convolution._out_depth; // = number of filters
500 size_t num_data_bytes_out =
501 InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
502 * outputs->precision.size();
504 size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->precision.size();
506 auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
508 // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
509 if (LayerInfo(connectedInputLayer).isInput()) {
510 // Kaldi features are opposite orientation
511 dnn.num_rotate_rows = num_feature_map_columns;
512 dnn.num_rotate_columns = num_feature_map_rows;
515 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
518 auto TransposeMatrix = [](uint8_t *ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols) {
519 std::vector<uint8_t> temp_buffer(num_rows * num_cols * element_size);
520 for (uint32_t i = 0; i < num_rows; i++) {
521 for (uint32_t j = 0; j < num_cols; j++) {
522 ie_memcpy(&temp_buffer.front() + (j*num_rows + i)*element_size,
523 temp_buffer.size() - (i * num_cols + j) * element_size,
524 ptr_matrix + (i*num_cols+j)*element_size,
531 std::vector<uint8_t > transposedWeights;
532 for (uint32_t k = 0; k < convolution._out_depth; k++) {
533 uint8_t *ptr_filt_current
534 = convolution._weights->cbuffer().as<uint8_t *>() + k * num_columns_in * convolution._kernel[X_AXIS] * convolution.precision.size();
535 auto transposedPart = TransposeMatrix(ptr_filt_current, convolution.precision.size(), num_columns_in, convolution._kernel[X_AXIS]);
536 transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
539 if (num_padding == 0) {
540 gnamem->readonly().push_local_ptr(ptr_weights, transposedWeights.data(), convolution._weights->byteSize(), 64);
542 auto elementsIn = convolution._kernel_x * num_feature_map_columns + num_padding;
543 auto paddedWeights = elementsIn * convolution._out_depth;
544 auto paddedWeightsSize = paddedWeights * convolution.precision.size();
545 auto elements_in_row = convolution._kernel_x * num_feature_map_columns;
546 gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void * data, size_t size) {
547 for (int i = 0; i < convolution._out_depth; i++) {
549 transposedWeights.data() + elements_in_row * i * convolution.precision.size(),
550 elements_in_row * convolution.precision.size());
552 data = reinterpret_cast<uint8_t *>(data) + elementsIn * convolution.precision.size();
557 if (convolution._biases) {
558 gnamem->readonly().push_ptr(ptr_biases,
559 convolution._biases->cbuffer().as<const void *>(),
560 convolution._biases->byteSize(),
563 gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
567 void GNAPlugin::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
568 auto &power = dynamic_cast<PowerLayer &>(*layer.get());
569 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
571 if (power.power != 1.0) {
572 THROW_IE_EXCEPTION << "[GNA plugin] unsupported power factor, expected 1 but was " << power.power;
575 auto input = layer->insData[0].lock();
577 auto outputs = *layer->outData.begin();
579 uint32_t num_rows_in = FROM_IR_DIM(input, 1);
580 uint32_t num_columns_in = FROM_IR_DIM(input, 2);
581 uint32_t num_rows_out = num_rows_in;
588 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
589 auto ¤tComponent = dnnComponentsForLayer.back().second;
590 dnn.InitAffineComponent(currentComponent,
594 input->precision.size(),
595 outputs->precision.size(),
596 // TODO: only fp32 and Int16 tested
597 quantized == nullptr ? input->precision.size() : 2,
598 quantized == nullptr ? input->precision.size() : 4,
599 quantized == nullptr ? 1 : quantized->_weights_quant.scale,
600 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
608 cout << "IR layer : " << std::left << std::setw(20) << layer->name << "diagonal_"<< dnnComponentsForLayer.size() - 1 << "\n";
611 size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
612 * outputs->precision.size();
614 size_t num_data_bytes_in = InferenceEngine::details::product(begin(input->dims), end(input->dims))
615 * input->precision.size();
617 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
618 connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
620 if (power.scale != 1.0f) {
621 if (quantized == nullptr) {
622 gnamem->readonly().push_value(ptr_weights, power.scale, num_rows_out, 64);
624 auto scaledIdentity = quantized->_weights_quant.scale * power.scale;
626 #define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5):((a) + 0.5))
628 auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
629 gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
633 if (power.offset != 0.0f) {
634 if (quantized == nullptr) {
635 gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
637 gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
640 gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
644 void GNAPlugin::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
645 auto &pooling = dynamic_cast<PoolingLayer &>(*layer.get());
646 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
648 auto inputs = layer->insData.begin()->lock();
649 auto outputs = *layer->outData.begin();
651 uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
652 uint32_t num_columns_in = FROM_IR_DIM(inputs, 3);
653 uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
654 uint32_t num_columns_out = FROM_IR_DIM(outputs, 3);
655 uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
660 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
661 auto ¤tComponent = dnnComponentsForLayer.back().second;
664 cout << "IR layer : " << std::left << std::setw(20) << layer->name << dnnComponentsForLayer.size() - 1 << "\n";
666 switch (pooling._type) {
667 case PoolingLayer::MAX: break;
668 // we are loosing precision here
669 case PoolingLayer::AVG:
671 // TODO: convert to SUMM pooling
672 THROW_GNA_EXCEPTION << "Layer :" << layer->name << " not supported";
675 dnn.InitMaxpoolComponent(currentComponent,
677 num_columns_in * num_rows_in ,
679 num_columns_out * num_rows_out,
680 inputs->precision.size(),
681 outputs->precision.size(),
682 pooling._kernel[X_AXIS],
683 pooling._kernel[X_AXIS],
686 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
690 size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
691 * outputs->precision.size();
693 size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->precision.size();
695 connectInput(layer, ptr_inputs, num_data_bytes_in);
696 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
699 void GNAPlugin::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
700 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
702 auto inputs = layer->insData.begin()->lock();
703 auto outputs = *layer->outData.begin();
705 uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
706 uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
707 uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
708 uint32_t num_columns_out = FROM_IR_DIM(outputs, 2);
709 uint32_t num_padding_in = ALIGN(num_rows_in, 8) - num_rows_in;
710 uint32_t num_padding_out = ALIGN(num_rows_out, 8) - num_rows_out;
713 auto orientation = (num_cnn_rows_out > 0) ? kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
715 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
716 auto ¤tComponent = dnnComponentsForLayer.back().second;
717 dnn.InitCopyComponent(currentComponent,
719 ALIGN(num_rows_in, 8),
721 ALIGN(num_rows_out, 8),
723 inputs->precision.size(),
724 outputs->precision.size(),
725 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
726 num_rows_out + num_padding_out,
731 size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
732 begin(outputs->dims), end(outputs->dims)), 8)
733 * outputs->precision.size();
734 size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, 8) * inputs->precision.size();
736 connectInput(layer, ptr_inputs, num_data_bytes_in);
737 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
740 void GNAPlugin::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
741 auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
743 if (concatLayer == nullptr) {
746 if (concatLayer->insData.size() != 2) {
747 THROW_GNA_EXCEPTION << "Concat layer has unsupported number of incoming layers.";
750 auto prevInput0 = concatLayer->insData[0].lock();
751 auto prevInput1 = concatLayer->insData[1].lock();
752 if (!prevInput0 || !prevInput1) {
753 THROW_GNA_EXCEPTION << "Input layer for concat is unexpectedly absent";
755 if (prevInput0->precision.size() != prevInput1->precision.size()) {
756 THROW_GNA_EXCEPTION << "Different precision for Concat input layers are not supported";
759 auto& concatLayerInfo = concat_connection.find(concatLayer->name)->second;
760 for (auto &&outLayer : concatLayer->outData.front()->getInputTo()) {
761 if ( LayerInfo(outLayer.second).isConcat() ) {
762 connectOutput(layer, &concatLayerInfo.gna_ptr,
763 &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size);
768 for (auto && inputLayer : concatLayerInfo.concatInputLayers) {
769 if ( InferenceEngine::details::CaselessEq<std::string>()
770 (inputLayer.name, "input") ) {
771 connectInput(layer, &concatLayerInfo.gna_ptr,
772 concatLayerInfo.reserved_size-inputLayer.offset, static_cast<int32_t>(-inputLayer.offset), idx);
778 void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
779 auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (layer.get());
781 if (cropLayer == nullptr) {
784 if (cropLayer->axis.size() > 1) {
785 THROW_GNA_EXCEPTION <<
786 "Crop layer does not support the number of cropped dimentions = "
787 << cropLayer->axis.size() << ".";
790 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
791 size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
792 size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size();
794 if (ALIGN64(cropOffset) == cropOffset) {
795 // leave crop as it is
796 GNAPlugin::GNACropLayer cropLayerInfoItem(layer);
797 std::string& id = layer->name;
798 crop_connection.emplace(id, cropLayerInfoItem);
799 auto cropLayerInfo = crop_connection.find(cropLayer->name);
801 if (cropLayerInfo == crop_connection.end()) {
802 THROW_GNA_EXCEPTION <<
803 "Item is not in the storage but it was added recently...\n";
806 // calculate index idx for connectInput last parameter
807 connectInput(layer, &cropLayerInfo->second.gna_ptr, cropOutputSize + cropOffset, cropOffset, 0);
809 // cases for certain output layers
810 for (auto &&outLayer : layer->outData.front()->getInputTo()) {
811 auto& nextLayer = outLayer.second;
812 if ( LayerInfo(nextLayer).isConcat() ) {
813 connectOutput(layer, &cropLayerInfo->second.gna_ptr, &cropLayerInfo->second.gna_ptr, cropOutputSize);
817 gnalog() << "Crop " << layer->name << " is being replaced by Affine layer...\n";
818 auto outputs = *layer->outData.begin();
819 auto inputs = layer->insData.begin()->lock();
821 uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
822 uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
823 uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
824 uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
831 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
832 auto ¤tComponent = dnnComponentsForLayer.back().second;
833 dnn.InitAffineComponent(currentComponent,
834 num_rows_in + num_padding,
837 inputs->precision.size(),
839 quantized == nullptr ? inputs->precision.size() : 2,
841 quantized == nullptr ? 1 : quantized->_weights_quant.scale,
842 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
849 size_t num_data_bytes_out =
850 InferenceEngine::details::product(
851 begin(outputs->dims), end(outputs->dims)) * 4;
853 size_t num_data_bytes_in = num_columns_in *
854 ALIGN(num_rows_in, 8) * inputs->precision.size();
856 connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
857 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
859 FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true);
861 (quantized == nullptr) ?
862 gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64):
863 gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
867 void GNAPlugin::SplitPrimitive(InferenceEngine::CNNLayerPtr layer) {
871 void GNAPlugin::SlicePrimitive(InferenceEngine::CNNLayerPtr layer) {
875 void GNAPlugin::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
876 auto &eltwise = dynamic_cast<EltwiseLayer &>(*layer.get());
877 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
879 // for eltwise should be one input of 4 bytes and one of 2 bytes - detecting that
880 auto inputs2Bytes = layer->insData[0].lock();
881 auto inputs4Bytes = layer->insData[1].lock();
883 int biasesLayerIdx = 1;
886 if (eltwise._operation == EltwiseLayer::Sum) {
887 if (inputs4Bytes->precision.size() != 4) {
888 std::swap(inputs4Bytes, inputs2Bytes);
891 IE_ASSERT(inputs2Bytes->precision.size() == 2);
892 IE_ASSERT(inputs4Bytes->precision.size() == 4);
894 // for mul both inputs should be 2 bytes precision
895 IE_ASSERT(inputs2Bytes->precision.size() == 2);
896 IE_ASSERT(inputs4Bytes->precision.size() == 2);
900 auto outputs = *layer->outData.begin();
902 uint32_t num_rows_in = FROM_IR_DIM(inputs4Bytes, 1);
903 uint32_t num_columns_in = FROM_IR_DIM(inputs4Bytes, 2);
904 uint32_t num_rows_out = num_rows_in;
905 uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
912 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
913 auto ¤tComponent = dnnComponentsForLayer.back().second;
914 dnn.InitAffineComponent(currentComponent,
915 num_rows_in + num_padding,
917 num_rows_out + num_padding,
918 inputs2Bytes->precision.size(),
919 outputs->precision.size(),
920 // TODO: only fp32 and Int16 tested
921 quantized == nullptr ? inputs2Bytes->precision.size() : 2,
922 quantized == nullptr ? inputs4Bytes->precision.size() : 4,
923 quantized == nullptr ? 1 : quantized->_weights_quant.scale,
924 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
932 cout << "IR layer : " << std::left << std::setw(20) << layer->name << "diagonal_"<< dnnComponentsForLayer.size() - 1 << "\n";
935 size_t num_data_bytes_out =
936 InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims)) * outputs->precision.size();
938 size_t num_data_bytes_in =
939 num_columns_in * (num_rows_in + num_padding) * inputs2Bytes->precision.size();
941 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
942 connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
944 switch (eltwise._operation) {
945 case EltwiseLayer::Sum:
946 if (quantized == nullptr) {
947 gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
949 auto scaledIdentity = quantized->_weights_quant.scale;
951 #define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5):((a) + 0.5))
953 auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
955 gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
957 connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
960 case EltwiseLayer::Prod:
961 if (quantized == nullptr) {
962 gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
964 gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
966 connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
970 THROW_GNA_EXCEPTION << "Unsupported eltwise operation: " << eltwise._operation;
974 void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag) {
975 auto &weightable = dynamic_cast<WeightableLayer &>(*layer.get());
976 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
978 auto inputs = layer->insData.begin()->lock();
979 auto outputs = *layer->outData.begin();
981 uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
982 uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
983 uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1);
984 uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
991 // TODO: questionable why for biases that are no in IR we inventing precision
992 auto biasPrecision = weightable._biases ? weightable._biases->precision() : outputs->precision;
994 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
995 auto ¤tComponent = dnnComponentsForLayer.back().second;
998 cout << "IR layer : " << std::left << std::setw(20) << layer->name << (isDiag ? "diagonal_" : "affine_") << dnnComponentsForLayer.size() - 1 << "\n";
1001 dnn.InitAffineComponent(currentComponent,
1002 num_rows_in + num_padding,
1005 inputs->precision.size(),
1006 outputs->precision.size(),
1007 weightable._weights->precision().size(),
1008 biasPrecision.size(),
1009 quantized == nullptr ? 1 : quantized->_weights_quant.scale,
1010 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
1017 size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
1018 * outputs->precision.size();
1020 size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->precision.size();
1022 auto connectionInfo = connectInput(layer, ptr_inputs, num_data_bytes_in);
1023 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
1025 auto transpose = false;
1026 auto transposedRows = 0;
1027 auto transposedCols = 0;
1029 if (0 && connectionInfo.needTransposeWeights) {
1030 // direct order is 0, 1, 2, 3, supported order is only 0,3,2,1 where dim 2 is usually equals to 1
1031 auto permuteOrder = connectionInfo.permute->GetParamAsInts("order");
1032 if (permuteOrder != vector<int>({0, 3, 2, 1})) {
1033 THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute order: was " << layer->GetParamAsString("order") <<
1034 ", but only support 0, 3, 2, 1";
1038 * TODO: weights transpose happened after quantisation might result in poor quality for in 8 - move this to passes
1040 if (weightable._weights->precision() == Precision::I8) {
1041 THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute operation for 8 bit weights for layer: " << layer->name;
1044 // this affine connected to convolution via pool or activation
1045 gnalog() << "Transposing weights for layer: " << layer->name << "\n";
1047 transpose = !isDiag;
1048 transposedRows = connectionInfo.permute->input()->getDims()[3];
1049 transposedCols = connectionInfo.permute->input()->getDims()[1];
1052 if (num_padding == 0) {
1054 gnamem->readonly().push_ptr(ptr_weights,
1055 weightable._weights->cbuffer().as<const void *>(),
1056 weightable._weights->byteSize(),
1059 gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void * data, size_t size) {
1060 for (int k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
1061 auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size();
1062 auto cbuffer = weightable._weights->cbuffer().as<const uint8_t *>() + rowOffset;
1063 auto u8Data = reinterpret_cast<uint8_t *>(data) + rowOffset;
1064 for (int j = 0; j < transposedCols; j++) {
1065 for (int i = 0; i < transposedRows; i++) {
1066 auto offsetWrite = (transposedRows * j + i) * weightable.precision.size();
1067 auto offsetRead = (i * transposedCols + j) * weightable.precision.size();
1068 std::memcpy(u8Data + offsetWrite, cbuffer + offsetRead, weightable.precision.size());
1076 THROW_GNA_EXCEPTION << "transpozed weights with non zero padding not yet supported";
1078 auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
1079 auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
1080 auto paddedWeightsSize = paddedWeights * weightable.precision.size();
1082 gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void * data, size_t size) {
1083 for (int i = 0; i < (isDiag ? 1 : num_rows_out); i++) {
1085 weightable._weights->cbuffer().as<const uint8_t *>() + num_rows_in * i * weightable.precision.size(),
1086 num_rows_in * weightable.precision.size());
1087 data = reinterpret_cast<uint8_t *>(data) + (num_rows_in + num_padding) * weightable.precision.size();
1092 if (weightable._biases) {
1093 gnamem->readonly().push_ptr(ptr_biases,
1094 weightable._biases->cbuffer().as<const void *>(),
1095 weightable._biases->byteSize(),
1098 gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
1102 void GNAPlugin::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr layer, void* ptrWeights, size_t offset, bool isQuantized) {
1103 auto outputs = *layer->outData.begin();
1104 auto inputs = layer->insData.begin()->lock();
1106 uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
1107 uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
1110 THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
1113 gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void * data, size_t size) {
1115 for (int input = offset; input < num_rows_out + offset; ++input) {
1116 auto mem_ptr = reinterpret_cast<uint8_t *>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
1118 auto float_ptr = reinterpret_cast<float *>(mem_ptr);
1121 auto int_ptr = reinterpret_cast<uint16_t *>(mem_ptr);
1129 void GNAPlugin::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
1130 auto filterLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
1132 if (filterLayer == nullptr) {
1136 std::string& name = filterLayer->name;
1137 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
1139 // we look for this concat layer pointer in extra concat map
1140 auto prevLayer = CNNNetPrevLayer(layer.get(), 0);
1141 if (!LayerInfo(prevLayer).isSplit() && !LayerInfo(prevLayer).isSlice()) {
1142 THROW_GNA_EXCEPTION << "Case with Affine Aligning Filter for not Split/Slice layers is not implemented yet!";
1150 auto outputs = *layer->outData.begin();
1151 auto inputs = layer->insData.begin()->lock();
1153 uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
1154 uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
1155 uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
1157 uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
1159 gnalog() << "Filter " << layer->name << " is being inserted...\n";
1160 auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->precision() : outputs->precision;
1161 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
1162 auto ¤tComponent = dnnComponentsForLayer.back().second;
1163 dnn.InitAffineComponent(currentComponent,
1164 num_rows_in + num_padding,
1167 inputs->precision.size(),
1168 outputs->precision.size(),
1169 filterLayer->_weights->precision().size(),
1170 biasPrecision.size(),
1171 quantized == nullptr ? 1 : quantized->_weights_quant.scale,
1172 quantized == nullptr ? 1 : quantized->_dst_quant.scale,
1179 size_t num_data_bytes_out =
1180 InferenceEngine::details::product(
1181 begin(outputs->dims), end(outputs->dims)) * 4;
1183 size_t num_data_bytes_in = num_columns_in *
1184 ALIGN(num_rows_in, 8) * inputs->precision.size();
1186 connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
1187 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
1189 if (num_padding == 0) {
1190 gnamem->readonly().push_ptr(ptr_weights,
1191 filterLayer->_weights->cbuffer().as<const void *>(),
1192 filterLayer->_weights->byteSize(),
1195 auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
1196 auto paddedWeights = elementsIn * num_rows_out;
1197 auto paddedWeightsSize = paddedWeights * filterLayer->precision.size();
1199 gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void * data, size_t size) {
1200 for (int i = 0; i < num_rows_out; i++) {
1202 filterLayer->_weights->cbuffer().as<const uint8_t *>() + num_rows_in * i * filterLayer->precision.size(),
1203 num_rows_in * filterLayer->precision.size());
1204 data = reinterpret_cast<uint8_t *>(data) + (num_rows_in + num_padding) * filterLayer->precision.size();
1209 if (filterLayer->_biases) {
1210 gnamem->readonly().push_ptr(ptr_biases,
1211 filterLayer->_biases->cbuffer().as<const void *>(),
1212 filterLayer->_biases->byteSize(),
1215 gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
1219 void GNAPlugin::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
1220 auto *generic = dynamic_cast<GenericLayer *>(layer.get());
1222 std::vector<intel_pwl_segment_t> ptr_pwl_segments;
1224 uint32_t num_columns;
1229 if (generic == nullptr) {
1234 if (CaselessEq<string>()(layer->type, "activation")) {
1235 type = generic->GetParamAsString("type");
1243 auto inputs = layer->insData.begin()->lock();
1244 auto outputs = *layer->outData.begin();
1245 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
1246 float output_scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
1248 auto orientation = (num_cnn_rows_out > 0) ? kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
1250 if (inputs->dims.size() == 4) {
1251 num_columns = FROM_IR_DIM(inputs, 3) * FROM_IR_DIM(inputs, 1);
1254 num_columns = FROM_IR_DIM(inputs, 2);
1255 num_rows = FROM_IR_DIM(inputs, 1);
1258 size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
1259 * outputs->precision.size();
1261 size_t num_data_bytes_in = InferenceEngine::details::product(begin(inputs->dims), end(inputs->dims))
1262 * inputs->precision.size();
1264 static caseless_unordered_map<std::string, DnnActivationType> supportedActivations = {
1265 {"sigmoid", kActSigmoid},
1268 {"leakyrelu", kActLeakyRelu},
1269 {"clamp", kActKaldiLstmClipping},
1270 {"identity", kActIdentity}
1273 auto it = supportedActivations.find(type);
1274 if (it == supportedActivations.end()) {
1275 THROW_GNA_EXCEPTION << "Activation function type not yet supported: " << type;
1277 auto activation_type = DnnActivation::fromType(it->second);
1278 activation_type.negative_slope = (it->second == kActRelu) ? dynamic_cast<ReLULayer*>(layer.get())->negative_slope : 0.0f;
1280 // TODO: need to take graph dependency instead of linear
1281 auto &prevComponent = dnnComponentsForLayer.back().second;
1282 dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
1283 auto ¤tComponent = dnnComponentsForLayer.back().second;
1285 intel_pwl_segment_t *ptr_pwl_segments_target = nullptr;
1287 if (!inputs->precision.is_float()) {
1288 // TODO: generalize activation function code
1289 // now that scale factors are known, create PWL approximations to activation functions
1290 float input_scale_factor = dnn.OutputScaleFactor(prevComponent);
1291 if (uniformPwlDesign) {
1292 switch (activation_type) {
1293 case kActSigmoid:ptr_pwl_segments.resize(SIGMOID_NUM_SEGMENTS);
1295 case kActTanh:ptr_pwl_segments.resize(TANH_NUM_SEGMENTS);
1297 case kActRelu:ptr_pwl_segments.resize(RELU_NUM_SEGMENTS);
1299 case kActLeakyRelu:ptr_pwl_segments.resize(RELU_NUM_SEGMENTS);
1301 case kActKaldiLstmClipping:
1302 case kActIdentity:ptr_pwl_segments.resize(IDENTITY_NUM_SEGMENTS);
1305 default:THROW_GNA_EXCEPTION << "Activation function type not yet supported " << activation_type;
1307 PwlDesign16(activation_type,
1308 &*ptr_pwl_segments.begin(),
1309 static_cast<uint32_t>(ptr_pwl_segments.size()),
1311 output_scale_factor);
1313 PwlDesignOpt16(activation_type,
1316 output_scale_factor);
1318 ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t *>(&ptr_pwl_segments_target);
1321 dnn.InitPiecewiseLinearComponent(currentComponent,
1326 inputs->precision.size(),
1327 outputs->precision.size(),
1328 ptr_pwl_segments.size(),
1329 output_scale_factor,
1332 ptr_pwl_segments_target);
1334 #define GET_ACTIVATION_NAME(name)\
1338 string actName = "unknown";
1339 switch (activation_type) {
1340 GET_ACTIVATION_NAME(kActSigmoid);
1341 GET_ACTIVATION_NAME(kActTanh);
1342 GET_ACTIVATION_NAME(kActRelu);
1343 GET_ACTIVATION_NAME(kActLeakyRelu);
1344 GET_ACTIVATION_NAME(kActKaldiLstmClipping);
1345 GET_ACTIVATION_NAME(kActIdentity);
1347 cout << "IR layer : " << std::left << std::setw(20) << layer->name << actName << "_" << dnnComponentsForLayer.size() - 1 <<"\n";
1350 connectInput(layer, ptr_inputs, num_data_bytes_in);
1351 connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
1353 if (ptr_pwl_segments_target != nullptr) {
1354 gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
1355 &ptr_pwl_segments.front(),
1356 ptr_pwl_segments.size() * sizeof(intel_pwl_segment_t),
1362 void GNAPlugin::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
1363 auto layerOrder = layer->GetParamAsInts("order");
1365 if (layerOrder != vector<int>({0, 3, 2, 1})) {
1366 THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute order: was " << layer->GetParamAsString("order") <<
1367 ", but only support 0,3,2,1";
1371 class LayersBuilder {
1372 using CreatorFnc = std::function<void(GNAPlugin*, CNNLayerPtr)>;
1375 LayersBuilder(const std::vector<std::string> &types, CreatorFnc callback) {
1376 for (auto && str : types) {
1377 getStorage()[str] = callback;
1380 static caseless_unordered_map<std::string, CreatorFnc> &getStorage() {
1381 static caseless_unordered_map<std::string, CreatorFnc> LayerBuilder;
1382 return LayerBuilder;
1386 #define CREATE(name) [](GNAPlugin *p, CNNLayerPtr l) {p->name(l);}
1387 void SKIP(GNAPlugin*, CNNLayerPtr) {}
1389 void GNAPlugin::CreateLayerPrimitive(CNNLayerPtr layer) {
1390 static const LayersBuilder layersBuilder[] = {
1391 {{"Input"}, [](GNAPlugin*, CNNLayerPtr l) {}}, // skip input layers they are not used in GNA lib, only as a memory blobs
1392 {{"FullyConnected", "InnerProduct"}, CREATE(AffinePrimitive)},
1393 {{"ScaleShift"}, CREATE(DiagonalPrimitive)},
1394 {{"AffineFilter"}, CREATE(AffineFilterPrimitive)},
1396 CREATE(EltwisePrimitive)}, // same as diagonal while weights are not taken from network, rather than from another output
1397 {{"Split"}, SKIP}, // skip information about which part of prev layer need to consume handle during layer creation
1399 {{"clamp", "sigmoid", "relu", "tanh", "identity"}, CREATE(PWLPrimitive)},
1400 {{"Convolution"}, CREATE(ConvolutionPrimitive)},
1401 {{"Permute"}, CREATE(PermutePrimitive)}, // permute of certain form (2D transpose) can be assimilated in followed FC layer
1402 {{"Pooling"}, CREATE(PoolingPrimitive)},
1403 {{"Power"} , CREATE(PowerPrimitive)},
1404 {{"Concat"}, CREATE(ConcatPrimitive)},
1405 {{"Reshape"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
1406 {{"Crop"}, CREATE(CropPrimitive)},
1407 {{"Copy"}, CREATE(CopyPrimitive)},
1409 auto it = LayersBuilder::getStorage().find(layer->type);
1410 if (it != LayersBuilder::getStorage().end()) {
1411 it->second(this, layer);
1413 THROW_GNA_EXCEPTION << "Unsupported layer: " << layer->name << ":" << layer->type;
1418 GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
1419 SetConfig(configMap);
1422 GNAPluginNS::GNAPlugin::LayerType GNAPlugin::LayerTypeFromStr(const std::string &str) const {
1423 static const caseless_map<std::string, GNAPlugin::LayerType> LayerNameToType = {
1424 { "Input" , Input },
1425 { "Convolution" , Convolution },
1427 { "Sigmoid" , Sigmoid },
1429 { "Pooling" , Pooling },
1430 { "FullyConnected" , FullyConnected },
1431 { "InnerProduct" , InnerProduct},
1432 { "Split" , Split },
1433 { "Slice" , Slice },
1434 { "Eltwise" , Eltwise },
1435 { "Reshape" , Reshape },
1436 { "ScaleShift" , ScaleShift },
1437 { "Clamp" , Clamp },
1438 { "Concat" , Concat },
1440 { "Permute" , Permute },
1442 { "Memory" , Memory },
1445 auto it = LayerNameToType.find(str);
1446 if (it != LayerNameToType.end())
1452 bool GNAPlugin::AreLayersSupported(ICNNNetwork& network, std::string& errMessage) {
1453 CNNLayerSet inputLayers;
1454 InferenceEngine::InputsDataMap inputs;
1455 std::unordered_set<CNNLayer *> allLayers;
1456 auto specifiedDevice = network.getTargetDevice();
1457 auto network_precision = network.getPrecision();
1458 network.getInputsInfo(inputs);
1459 auto network_input_precision = inputs.begin()->second->getInputPrecision();
1460 auto batch_size = network.getBatchSize();
1461 if (network_precision != Precision::FP32) {
1462 errMessage = "The plugin does not support networks with " + std::string(network_precision.name()) + " format.\n";
1465 if (network_input_precision != Precision::FP32 &&
1466 network_input_precision != Precision::I16 &&
1467 network_input_precision != Precision::U8) {
1468 errMessage = "The plugin does not support input precision with " + std::string(network_input_precision.name()) + " format.\n";
1471 if (specifiedDevice != InferenceEngine::TargetDevice::eCPU &&
1472 specifiedDevice != InferenceEngine::TargetDevice::eGNA &&
1473 specifiedDevice != InferenceEngine::TargetDevice::eDefault) {
1474 errMessage = "The plugin does not support target device: " + std::string(getDeviceName(specifiedDevice)) + ".\n";
1478 if (inputs.empty()) {
1479 errMessage = "Network is empty (GNA)\n";
1483 auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
1484 if (secondLayers.empty()) {
1485 errMessage = "Network consists of input layer only (GNA)\n";
1489 bool check_result = true;
1490 InferenceEngine::details::UnorderedDFS(allLayers,
1491 secondLayers.begin()->second,
1492 [&](const CNNLayerPtr layer) {
1493 if (LayerTypeFromStr(layer->type) == NO_TYPE) {
1494 errMessage = "Layer is unsupported by GNA: " + layer->name + ":" + layer->type + "\n";
1495 check_result = false;
1497 if (batch_size != 1 && LayerInfo::isBatchSizeConstrained(layer->type)) {
1498 errMessage = "topology with layer: " + layer->name + ", type: " + layer->type +
1499 ", and batch size(" + to_string(batch_size) + ") != 1 not supported";
1500 check_result = false;
1504 return check_result;
1507 float GNAPlugin::get_input_scale_factor() const {
1508 return input_scale_factor.empty() ? 1.0 : input_scale_factor.begin()->second;
1511 void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
1512 // Check the input network
1514 if (!AreLayersSupported(network, error)) {
1515 THROW_GNA_EXCEPTION << error.c_str();
1518 // network optimisation phases
1519 auto run_passes = [&] (CNNNetPtr network) {
1520 auto layers = CNNNetSortTopologically(*network.get());
1521 substitutePRelu(layers);
1522 layers = CNNNetSortTopologically(*network.get());
1523 reorderMaxPool(layers);
1524 // ToDo sort if bool flag "changed"
1525 // returned from insertion function
1526 insertAligningFilterLayer(layers);
1528 #if ENABLE_AUTO_PERMUTE
1529 layers = CNNNetSortTopologically(*network.get());
1530 reversePermutations(layers);
1532 layers = CNNNetSortTopologically(*network.get());
1533 insertIdentityLayer(layers);
1534 layers = CNNNetSortTopologically(*network.get());
1535 insertCopyLayer(layers);
1536 layers = CNNNetSortTopologically(*network.get());
1537 insertDiagonalLayer(layers);
1538 layers = CNNNetSortTopologically(*network.get());
1539 substituteScaleShiftBroadCast(layers);
1542 Config supported = Config({
1543 {TargetDevice::eGNA, Precision::FP32, [&](InferenceEngine::ICNNNetwork &network) -> CNNNetworkPtr {
1544 if (gnaPrecision == Precision::I16) {
1545 ModelQuantizer<QuantI16> q;
1546 return q.quantize(network, run_passes, get_input_scale_factor());
1549 if (gnaPrecision == Precision::I8) {
1550 ModelQuantizer<QuantI8> q;
1551 return q.quantize(network, run_passes, get_input_scale_factor());
1553 THROW_GNA_EXCEPTION << "no mans land for GNA precision";
1555 // TODO: need to have advanced precision matcher based on layers/biases
1556 {TargetDevice::eGNA, Precision::MIXED},
1557 {TargetDevice::eGNA, Precision::I16},
1558 {TargetDevice::eCPU, Precision::FP32
1559 #define EMULATE_GNA_API_LAYERS
1560 #ifdef EMULATE_GNA_API_LAYERS
1561 , [&](InferenceEngine::ICNNNetwork & network) {
1562 auto visitor = [&](InferenceEngine::CNNLayerPtr lp) {
1565 auto copiedNet = InferenceEngine::CNNNetCopy(network, visitor);
1566 run_passes(copiedNet);
1574 supported.setDefaultDevice(TargetDevice::eGNA);
1575 auto newNet = supported.find_configuration(network).convert(network);
1579 // creating intel dnn_t structures from network
1580 auto sortedNet = CNNNetSortTopologically(*newNet);
1581 std::vector<CNNLayerPtr> sortedNoMem;
1582 std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
1583 // find all memory layers pairs and mark which one used as outputs
1584 for (auto &layer : sortedNet) {
1585 auto generic = dynamic_cast<GenericLayer *>(layer.get());
1586 if (generic == nullptr) {
1587 sortedNoMem.push_back(layer);
1590 LayerInfo layerInfo(layer);
1591 if (layerInfo.isMemory()) {
1592 // collect all memory pairs
1593 auto id = generic->GetParamAsString("id");
1594 memoryPairs[id].resize(generic->GetParamAsInt("size"));
1595 memoryPairs[id][generic->GetParamAsInt("index")] = layer;
1597 } else if (layerInfo.isConcat()) {
1598 fillConcatConnections(layer);
1599 } else if (layerInfo.isSplit() || layerInfo.isSlice()) {
1600 fillSplitConnections(layer);
1602 sortedNoMem.push_back(layer);
1605 // fill in extra storage with memory layers
1606 fillMemoryConnections(memoryPairs);
1608 if (memory_connection.size() != 0) {
1609 gna_lib_async_threads_num = 1;
1612 auto networkPrecision = newNet->getPrecision();
1614 if (!networkPrecision.is_float()) {
1615 gnadevice.reset(new GNADeviceHelper(gna_proc_type,
1616 gna_lib_async_threads_num,
1617 gna_openmp_multithreading,
1618 performance_counting));
1619 gnamem.reset(new gna_memory_type(
1620 make_polymorph<GNAAllocator>(*gnadevice.get()), PAGE_SIZE_BYTES));
1622 gnamem.reset(new gna_memory_type(make_polymorph<std::allocator<uint8_t>>()));
1625 // keep inputs information and create input primitives
1626 newNet->getInputsInfo(inputsDataMap);
1627 if (inputsDataMap.empty()) {
1628 THROW_GNA_EXCEPTION << " No inputs for the topology";
1632 newNet->getOutputsInfo(outputsDataMap);
1633 if (outputsDataMap.empty()) {
1634 THROW_GNA_EXCEPTION << "No outputs for the topology";
1636 if (outputsDataMap.size() != 1) {
1637 THROW_GNA_EXCEPTION << "cannot infer topologies with more than one output";
1639 outputDims = outputsDataMap.begin()->second->dims;
1641 for (auto && input : inputsDataMap) {
1642 get_ptr_inputs_global(input.first).resize(gna_lib_async_threads_num);
1645 ptr_outputs_global.resize(gna_lib_async_threads_num);
1646 // CreatingLayer primitives
1647 // TODO: solely gna_example convolution hack
1648 num_feature_maps = 1;
1649 for (auto layer = sortedNoMem.begin(); layer != sortedNoMem.end(); ++layer) {
1650 CreateLayerPrimitive(*layer);
1652 DnnComponentsForLayer::iterator output_component = std::find_if(dnnComponentsForLayer.begin(),
1653 dnnComponentsForLayer.end(),
1654 [&](const std::pair<std::string, intel_dnn_component_t>& v)
1655 { return outputsDataMap.begin()->first == v.first; });
1657 if (output_component == dnnComponentsForLayer.end()) {
1658 if (dnnComponentsForLayer.empty()) {
1659 THROW_GNA_EXCEPTION << "No outputs found in internal structures";
1661 // likely layer is fused. Take last one
1662 output_component = std::prev(dnnComponentsForLayer.end());
1663 gnalog() << "Output layer "<< outputsDataMap.begin()->first
1664 << " has not been found in component list. Took "
1665 << output_component->first << " instead \n" << std::flush;
1667 gnamem->bind_ptr(&ptr_outputs_global.front(), &output_component->second.ptr_outputs);
1669 // make room for active list
1670 gnamem->reserve_ptr(nullptr, ALIGN64(output_component->second.num_bytes_per_output * output_component->second.num_rows_out));
1672 void *pParallelExecutionData = nullptr;
1674 // reserving more bytes for intermidiate data in parallel case - TODO: this works incorrectly in compact mode at lest
1675 rwSegmentSize = gnamem->getRWBytes();
1676 if (gna_lib_async_threads_num > 1) {
1677 gnamem->reserve_ptr(&pParallelExecutionData, gnamem->getRWBytes() * (gna_lib_async_threads_num - 1));
1682 dnn.Init(gnamem->getBasePtr(),
1683 gnamem->getTotalBytes(),
1684 networkPrecision.is_float() ? kDnnFloat : kDnnInt,
1687 // TODO: this copy unneed infact we can directly create gna structs from list
1688 for (auto &element : dnnComponentsForLayer) {
1689 dnn.component.push_back(element.second);
1692 // in fp32 mode last PWL cannot be computed without that
1693 dnn.InitActiveList(NULL);
1695 nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap()));
1697 if (!networkPrecision.is_float()) {
1698 // number of layer gets calculated inside that InitGNAStruct function
1699 dnn.InitGNAStruct(&std::get<0>(nnets.front())->obj);
1702 // creating same gna RW segment for parallel infer requests
1703 for (int i = 1; i != gna_lib_async_threads_num; i++) {
1704 nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap()));
1706 // this can be improved by just copy all structures, but we are too lazy
1707 dnn.InitGNAStruct(&std::get<0>(nnets.back())->obj);
1709 // relocate rw pointers to new offset
1710 auto basePtr = reinterpret_cast<uint8_t*>(pParallelExecutionData) + rwSegmentSize * (i - 1);
1712 auto relocate = [basePtr, this](void *& ptr_out, void * ptr_in) {
1713 if (ptr_in == nullptr) {
1716 auto offset = reinterpret_cast<uint8_t *>(ptr_in) - reinterpret_cast<uint8_t *>(gnamem->getBasePtr());
1717 ptr_out = basePtr + offset;
1721 for (auto &&input : ptr_inputs_global_storage) {
1722 relocate(input[i], input[0]);
1725 relocate(ptr_outputs_global[i], ptr_outputs_global[0]);
1726 for (int j = 0; j != std::get<0>(nnets.front())->obj.nLayers; j++) {
1727 auto & layer = std::get<0>(nnets[i])->obj.pLayers[j];
1729 relocate(layer.pInputs, layer.pInputs);
1730 relocate(layer.pOutputs, layer.pOutputs);
1731 relocate(layer.pOutputsIntermediate, layer.pOutputsIntermediate);
1735 // calculating input orientation without memory layers, since their orientation not changed during infer right now
1736 std::unordered_map<string, string> skippedLayers;
1737 for (auto &layer : sortedNet) {
1738 for (int i = 0; CNNNetHasPrevLayer(layer.get(), i); i++) {
1739 auto prevLayer = CNNNetPrevLayer(layer.get(), i);
1740 if (!skippedLayers.count(prevLayer->name)) {
1741 if (CNNNetHasPrevLayer(prevLayer.get())) {
1745 // we are in the one of input layers
1746 if (LayerInfo(prevLayer).isMemory()) {
1751 auto dnnLayer = findDnnLayer(layer);
1752 string inputName = prevLayer->name;
1753 if (skippedLayers.count(prevLayer->name)) {
1754 inputName = skippedLayers[prevLayer->name];
1757 // non functional layer - skipped by gna
1758 if (nullptr == dnnLayer) {
1759 // storing input name for skipped layer
1760 skippedLayers[layer->name] = inputName;
1764 // input orientation might be already initialized, thus verify that it matches
1765 if (!orientation_in.count(inputName)) {
1766 orientation_in[inputName] = dnnLayer->orientation_in;
1768 if (orientation_in[inputName] != dnnLayer->orientation_in) {
1769 THROW_GNA_EXCEPTION << "orientation for input layer: " << inputName << "cannot be calculated";
1775 orientation_out = output_component->second.orientation_out;
1776 num_bytes_per_output = output_component->second.num_bytes_per_output;
1778 // find output layer
1779 auto output = std::find_if(sortedNet.begin(),
1781 [&](const CNNLayerPtr& v)
1782 { return outputsDataMap.begin()->first == v.get()->name; });
1783 if (output == sortedNet.end()) {
1784 // likely layer is fused. Take last one
1785 output = std::prev(sortedNet.end());
1787 auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(*output);
1788 output_scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
1790 num_rotate_rows = dnn.num_rotate_rows;
1791 num_rotate_columns = dnn.num_rotate_columns;
1796 dnn.WriteGraphWizModel("graph.dot");
1797 // ExportGnaNetworkAndrzej("layers/loaded_from_ir", &nnet->obj);
1800 void GNAPlugin::DumpXNNToFile() const {
1801 // TODO: output precision as well as pointer might be incorrect, LSTM for sure
1802 // gna looks automatically set layer 0 as output and adjust it's pointer / precision/ size respectively
1803 if (!dumpXNNPath.empty()) {
1805 THROW_GNA_EXCEPTION << "Cannot generate XNNDump for float network";
1807 auto dump = gnadevice->dumpXnn(&std::get<0>(nnets.front())->obj, ptr_active_indices, num_active_indices);
1808 dump.header.rw_region_size = gnamem->getRWBytes();
1809 dump.header.input_scaling_factor = get_input_scale_factor();
1810 dump.header.output_scaling_factor = output_scale_factor;
1811 std::ofstream dumpStream(dumpXNNPath, std::ios::out | std::ios::binary);
1812 dumpStream.write(reinterpret_cast<char*>(&dump.header), sizeof(intel_gna_model_header));
1813 dumpStream.write(reinterpret_cast<char*>(dump.model.get()), dump.header.model_size);
1817 void RotateFeatures(uint8_t *ptr_feat,
1818 size_t element_size,
1819 uint32_t num_feature_vectors,
1820 uint32_t num_feature_vector_elements,
1821 uint32_t num_rotate_rows,
1822 uint32_t num_rotate_columns) {
1823 if (num_feature_vector_elements == num_rotate_rows * num_rotate_columns) {
1824 std::vector<uint8_t> temp(num_feature_vector_elements * element_size);
1825 for (uint32_t k = 0; k < num_feature_vectors; k++) {
1826 uint8_t *ptr_in = ptr_feat + k * num_feature_vector_elements * element_size;
1827 for (uint32_t i = 0; i < num_rotate_rows; i++) {
1828 for (uint32_t j = 0; j < num_rotate_columns; j++) {
1829 ie_memcpy(&temp.front() + (j * num_rotate_rows + i)*element_size,
1830 temp.size() - (i * num_rotate_columns + j)*element_size,
1831 ptr_in + (i * num_rotate_columns + j)*element_size,
1835 memcpy(ptr_in, &temp.front(), num_feature_vector_elements * element_size);
1838 THROW_GNA_EXCEPTION << "Rotate dimensions (" << num_rotate_rows << "," << num_rotate_columns
1839 <<") do not match buffer length of "<< num_feature_vector_elements <<" in RotateFeatures()!";
1843 uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, InferenceEngine::BlobMap &result) {
1844 auto freeNnet = std::find_if(std::begin(nnets), std::end(nnets), [](decltype(nnets.front()) & item) {
1845 return std::get<1>(item) == -1;
1848 if (freeNnet == nnets.end()) {
1849 if (memory_connection.size() != 0) {
1851 freeNnet = nnets.begin();
1853 THROW_IE_EXCEPTION << as_status << REQUEST_BUSY
1854 << "GNA executable network has max of "
1855 << static_cast<uint32_t >(gna_lib_async_threads_num)
1856 << " parallel infer requests, please sync one of already running";
1861 auto nnet = std::get<0>(*freeNnet).get();
1862 auto idx = static_cast<uint32_t>(std::distance(std::begin(nnets), freeNnet));
1864 for (auto &input : inputs) {
1865 auto inputLayout = input.second->layout();
1866 if (inputLayout != Layout::NC && inputLayout != Layout::CN && inputLayout != NCHW) {
1867 THROW_GNA_EXCEPTION << "Expected input blob to have Layout::NC or Layout::CN, but was: "
1868 << input.second->layout();
1870 if (inputLayout == NCHW) {
1873 auto is2D = input.second->layout() == Layout::NC || input.second->layout() == Layout::CN;
1875 if (!ptr_inputs_global_id.count(input.first)) {
1876 // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
1877 THROW_GNA_EXCEPTION << "network not loaded : input pointer for " << input.first << " not set";
1880 if (get_ptr_inputs_global(input.first)[idx] == nullptr) {
1881 // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
1882 THROW_GNA_EXCEPTION << "network not loaded : input pointer for (" << input.first << " at inferRequest #"
1883 << idx << " not set";
1886 if (orientation_in[input.first] == kDnnUnknownOrientation) {
1887 // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
1888 THROW_GNA_EXCEPTION << "network not loaded : input orientation for " << input.first << " not set";
1891 if (orientation_out == kDnnUnknownOrientation) {
1892 // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
1893 THROW_GNA_EXCEPTION << "network not loaded : output orientation not set";
1896 auto dims = input.second->dims();
1898 ImportFrames(get_ptr_inputs_global(input.first)[idx],
1899 input.second->cbuffer().as<float *>(),
1900 input.second->precision(),
1901 orientation_in[input.first],
1902 dims[dims.size() - 1],
1903 is2D ? dims[1] : dims[dims.size() - 1],
1904 is2D ? dims[0] : dims[0] * dims[1] * dims[2],
1905 is2D ? dims[0] : dims[0] * dims[1] * dims[2]);
1906 bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1;
1907 if (((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
1908 != (orientation_in[input.first] == kDnnInterleavedOrientation))
1910 RotateFeatures(reinterpret_cast<uint8_t *>(get_ptr_inputs_global(input.first)[idx]),
1912 // TODO: only works for cnn4a and google command so far
1913 dims[dims.size() - 1],
1914 is2D ? dims[0] : dims[0] * dims[2], // num_feature_vectors looks batch should be there
1916 num_rotate_columns);
1922 std::get<1>(*freeNnet) = 1;
1924 std::get<1>(*freeNnet) = gnadevice->propagate(&nnet->obj, ptr_active_indices, num_active_indices);
1926 std::get<2>(*freeNnet) = result;
1930 void GNAPlugin::Wait(uint32_t idx) {
1931 // already synced TODO: might be copy required ???
1932 if (std::get<1>(nnets[idx]) == -1) return;
1935 gnadevice->wait(std::get<1>(nnets[idx]));
1938 std::get<1>(nnets[idx]) = -1;
1939 auto & result = std::get<2>(nnets[idx]);
1941 dnn.BeginNewWrite();
1942 if (dnn.num_components() != 0) {
1943 dnn.WriteDnnText("Net_.txt", kDnnFloat);
1944 dnn.WriteInputAndOutputText();
1946 dnn.WriteInputAndOutputTextGNA(&std::get<0>(nnets.front())->obj);
1948 if (result.size() != 1) {
1949 THROW_GNA_EXCEPTION << "Invalid number of outputs for infer request: " << result.size() << ", only 1 supported";
1951 auto & output = *result.begin()->second;
1953 if (output.layout() == Layout::NC) {
1954 // TODO: rotate can be incorporated with exporting - used only in unit tests so far
1956 // if (orientation_out != kDnnInterleavedOrientation) {
1957 // if (inputs.size() != 1) {
1958 // THROW_GNA_EXCEPTION << "Invalid number of inputs for for deinterleave " << inputs.size()
1959 // << ", only 1 supported";
1961 // auto dims = inputs.begin()->second->dims();
1962 // RotateFeatures(reinterpret_cast<uint8_t*>(ptr_outputs_global),
1963 // gnadevice ? 2 : 4,
1964 // dims[dims.size() - 1],
1965 // dims[0], // num_feature_vectors looks batch should be there
1967 // dims[dims.size() - 1]);
1969 // we concider the last layer as output ...
1970 size_t output_layer_index = std::max(0, static_cast<int>(std::get<0>(nnets[idx])->obj.nLayers - 1));
1971 if (gnadevice && std::get<0>(nnets[idx])->obj.pLayers[output_layer_index].pOutputs != ptr_outputs_global[idx]) {
1972 // ...as this is not true, we should look for output layer index
1973 for (int j = 0; j != std::get<0>(nnets[idx])->obj.nLayers; j++) {
1974 if (std::get<0>(nnets[idx])->obj.pLayers[j].pOutputs == ptr_outputs_global[idx]) {
1975 output_layer_index = j;
1981 ExportScores(output.buffer(),
1982 ptr_outputs_global[idx],
1984 output.dims()[output.dims().size() - 1],
1989 // TODO: create better getter consider multiple outputs case
1990 gnadevice ? std::get<0>(nnets[idx])->obj.pLayers[output_layer_index].nBytesPerOutput : sizeof(float),
1992 } else if (output.layout() != Layout::CN) {
1993 THROW_GNA_EXCEPTION << "Expected output blob to have Layout::NC or Layout::CN. But was " << output.layout();
1999 static int num_infers = 0;
2001 f = fopen("ex_scores.txt", "w");
2005 for (int i = 0; i < output.dims()[1]; i++) {
2006 for (int j = 0; j < output.dims()[0]; j++) {
2007 fprintf(f, "%d ", output.cbuffer().as<int32_t *>()[output.dims()[0] * i + j]);
2014 ConvertToFloat(output.buffer(),
2018 output_scale_factor);
2021 for (int i = 0; i < output.dims()[1]; i++) {
2022 for (int j = 0; j < output.dims()[0]; j++) {
2023 fprintf(f, "%.2f ", output.cbuffer().as<float *>()[output.dims()[0] * i + j]);
2033 void GNAPlugin::Reset() {
2034 for (auto && memLayer : memory_connection) {
2035 std::memset(memLayer.second.gna_ptr, 0, memLayer.second.reserved_size);
2037 for (auto && concatLayer : concat_connection) {
2038 std::memset(concatLayer.second.gna_ptr, 0, concatLayer.second.reserved_size);
2042 void GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &output) {
2045 if (inputsDataMap.size() != 1) {
2046 THROW_GNA_EXCEPTION << "cannot infer using Infer(Blob&, Blob&)"<< "model accepts " << inputsDataMap.size() << "inputs";
2048 if (outputsDataMap.size() != 1) {
2049 THROW_GNA_EXCEPTION << "cannot infer using Infer(Blob&, Blob&)"<< "model accepts " << outputsDataMap.size() << "outputs";
2052 bmInput[inputsDataMap.begin()->first] = std::shared_ptr<Blob>(const_cast<Blob*>(&input), [](Blob*){});
2053 bmOutput[outputsDataMap.begin()->first] = std::shared_ptr<Blob>(&output, [](Blob*){});
2054 Infer(bmInput, bmOutput);
2057 void GNAPlugin::Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result) {
2058 Wait(QueueInference(input, result));
2061 Blob::Ptr GNAPlugin::GetOutputBlob(InferenceEngine::Precision precision) {
2062 // need to have intermediate blob for interleave conversion
2063 InferenceEngine::Blob::Ptr outputBlob;
2064 outputBlob = make_blob_with_precision(precision, NC, outputDims);
2065 outputBlob->allocate();
2069 Blob::Ptr GNAPlugin::GetInputBlob(std::string name, InferenceEngine::Precision precision) {
2070 InferenceEngine::Blob::Ptr inputBlob;
2071 // need to have intermediate blob for interleave conversion
2072 // TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not
2073 auto inputDims = inputsDataMap[name]->getDims();
2074 inputBlob = make_blob_with_precision(precision, inputDims.size() == 2 ? NC : NCHW, inputDims);
2075 inputBlob->allocate();
2079 std::vector<InferenceEngine::MemoryStateInternal::Ptr> GNAPlugin::QueryState() {
2080 if (memory_connection.empty()) {
2084 return {std::make_shared<GNAMemoryState>(shared_from_this())};
2087 InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(const std::string &modelFileName) {
2088 // no need to return anything dueto weird design of internal base classes
2089 std::fstream inputStream(modelFileName, ios_base::in | ios_base::binary);
2090 if (inputStream.fail()) {
2091 THROW_GNA_EXCEPTION << "Cannot open file to import model: " << modelFileName;
2094 auto header = GNAModelSerial::ReadHeader(inputStream);
2096 gnadevice.reset(new GNADeviceHelper(gna_proc_type,
2097 gna_lib_async_threads_num,
2098 gna_openmp_multithreading));
2099 gnamem.reset(new gna_memory_type(make_polymorph<GNAAllocator>(*gnadevice.get()), PAGE_SIZE_BYTES));
2101 void *basePtr = nullptr;
2102 gnamem->reserve_ptr(&basePtr, header.gnaMemSize);
2105 nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(header.layersCount), -1, InferenceEngine::BlobMap()));
2106 std::get<0>(nnets.back())->obj.nGroup = header.nGroup;
2107 GNAModelSerial::MemoryType mt;
2108 auto serial = GNAModelSerial(&std::get<0>(nnets.back())->obj, mt);
2109 serial.Import(basePtr, header.gnaMemSize, inputStream);
2112 get_ptr_inputs_global("input").push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.input.descriptor_offset));
2113 ptr_outputs_global.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.output.descriptor_offset));
2115 auto getOrientation = [](intel_nnet_layer_t & layer) {
2116 return layer.nLayerKind == INTEL_CONVOLUTIONAL ?
2117 kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
2120 orientation_in["input"] = getOrientation(std::get<0>(nnets.back())->obj.pLayers[0]);
2121 orientation_out = getOrientation(std::get<0>(nnets.back())->obj.pLayers[std::get<0>(nnets.back())->obj.nLayers-1]);
2123 num_bytes_per_output = header.output.element_size;
2126 outputDims = SizeVector({header.output.elements_count / header.nGroup, header.nGroup});
2127 auto inputDims = SizeVector({header.input.elements_count / header.nGroup, header.nGroup});
2129 inputsDataMap["input"] = std::make_shared<InputInfo>();
2130 inputsDataMap["input"]->setInputData(make_shared<Data>("input",
2134 outputsDataMap["output"] = make_shared<Data>("output",
2139 output_scale_factor = header.output.scaleFactor;
2140 input_scale_factor["input"] = header.input.scaleFactor;
2142 num_rotate_rows = header.nRotateRows;
2143 num_rotate_columns = header.nRotateColumns;
2145 for (auto && memory : mt) {
2146 GNAMemoryLayer memoryLayer(nullptr, nullptr);
2147 memoryLayer.gna_ptr = memory.first;
2148 memoryLayer.reserved_size = memory.second;
2150 memory_connection.emplace_back(make_pair(std::string("noname"), memoryLayer));
2156 dnn.WriteGraphWizModel("graph.dot");
2157 // ExportGnaNetworkAndrzej("layers/loaded_from_aot_file", &nnet->obj);
2163 void GNAPlugin::Export(const std::string &fileName) {
2164 if (ptr_inputs_global_id.empty() || ptr_outputs_global.empty()) {
2165 THROW_GNA_EXCEPTION << " network not loaded";
2168 if (ptr_inputs_global_id.size() != 1) {
2169 THROW_GNA_EXCEPTION << " exporting network with multiple inputs not supported";
2172 std::fstream outStream(fileName, ios_base::out | ios_base::binary);
2174 // TODO: nnet group parameter looks only used in application - so can we move this line into load network.
2175 auto inputDims = inputsDataMap.begin()->second->getDims();
2176 if (inputDims.size() == 2) {
2177 std::get<0>(nnets.front())->obj.nGroup = inputDims[1];
2180 auto serial = GNAModelSerial(&std::get<0>(nnets.front())->obj,
2181 {get_input_scale_factor(),
2182 ptr_inputs_global_storage.front()[0],
2184 static_cast<uint32_t>(InferenceEngine::details::product(inputsDataMap.begin()->second->getDims()))},
2185 {output_scale_factor,
2186 ptr_outputs_global[0],
2187 num_bytes_per_output,
2188 static_cast<uint32_t>(InferenceEngine::details::product(outputsDataMap.begin()->second->getDims()))})
2189 .SetInputRotation(dnn.num_rotate_rows, dnn.num_rotate_columns);
2191 for (auto && memoryConnection : memory_connection) {
2192 serial.AddState(memoryConnection.second.gna_ptr, memoryConnection.second.reserved_size);
2195 serial.Export(gnamem->getBasePtr(), gnamem->getTotalBytes(), outStream);
2198 void GNAPlugin::GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) {
2199 if (performance_counting) {
2200 gnadevice->getGnaPerfCounters(perfMap);
2204 void GNAPlugin::AddExtension(InferenceEngine::IExtensionPtr extension) {}
2206 void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
2207 std::vector<std::string> supportedConfigOptions = {
2208 GNA_CONFIG_KEY(SCALE_FACTOR),
2209 GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE),
2210 GNA_CONFIG_KEY(DEVICE_MODE),
2211 GNA_CONFIG_KEY(COMPACT_MODE),
2212 CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
2213 GNA_CONFIG_KEY(PRECISION),
2214 GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN),
2215 CONFIG_KEY(PERF_COUNT),
2216 GNA_CONFIG_KEY(LIB_N_THREADS),
2217 CONFIG_KEY(SINGLE_THREAD)
2220 for (auto& item : config) {
2221 auto keys = std::find_if(supportedConfigOptions.begin(), supportedConfigOptions.end(), [&item](std::string supportedConfigOption) {
2222 return item.first.find(supportedConfigOption) != std::string::npos;
2224 if (keys == supportedConfigOptions.end()) {
2225 THROW_GNA_EXCEPTION << as_status << NOT_FOUND << "Incorrect GNA Plugin config. Key " << item.first << " not supported";
2229 // holds actual value of a found key
2232 auto if_set = [&](std::string keyInput, const std::function<void()> & handler) {
2233 auto keyInMap = config.find(keyInput);
2234 if (keyInMap != config.end()) {
2235 value = keyInMap->second;
2240 auto if_start = [&](std::string keyInput, const std::function<void()> & handler) {
2241 for (auto && c : config) {
2242 if (c.first.find(keyInput) == 0) {
2243 if (c.first.size() > keyInput.size() + 1) {
2244 key = c.first.substr(keyInput.size() + 1);
2252 auto fp32eq = [](float p1, float p2) -> bool {
2253 return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
2256 auto & log = gnalog();
2258 if_start(GNA_CONFIG_KEY(SCALE_FACTOR), [&, this] {
2259 // only identical scale factors supported so far
2260 auto ref = input_scale_factor.size() ? input_scale_factor.begin()->second : 1.0;
2261 input_scale_factor[key] = std::stod(value);
2262 if (ref != 1.0 && !fp32eq(input_scale_factor[key], ref)) {
2263 std::string message = "only identical input scale factors supported, but provided: "
2264 + std::to_string(ref) + " and " + std::to_string(input_scale_factor[key]);
2265 log << "only identical input scale factors supported, but provided: " << ref <<" and " << input_scale_factor[key];
2266 THROW_GNA_EXCEPTION << "only identical input scale factors supported, but provided: " << ref <<" and " << input_scale_factor[key];
2270 if (input_scale_factor.empty()) {
2271 if_set(GNA_CONFIG_KEY(SCALE_FACTOR), [&] {
2272 input_scale_factor["placeHolder"] = std::stod(value);
2276 if_set(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), [&] {
2277 dumpXNNPath = value;
2280 if_set(GNA_CONFIG_KEY(DEVICE_MODE), [&] {
2281 static caseless_unordered_map <std::string, uint32_t> supported_values = {
2282 {GNAConfigParams::GNA_AUTO, GNA_AUTO},
2283 {GNAConfigParams::GNA_HW, GNA_HARDWARE},
2284 {GNAConfigParams::GNA_SW, GNA_SOFTWARE},
2285 {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
2287 auto procType = supported_values.find(value);
2288 if (procType == supported_values.end()) {
2289 log << "GNA device mode unsupported: " << value;
2290 THROW_GNA_EXCEPTION << "GNA device mode unsupported: " << value;
2292 gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
2295 if_set(GNA_CONFIG_KEY(COMPACT_MODE), [&] {
2296 if (value == PluginConfigParams::YES) {
2297 compact_mode = true;
2298 } else if (value == PluginConfigParams::NO) {
2299 compact_mode = false;
2301 log << "GNA compact mode should be YES/NO, but not" << value;
2302 THROW_GNA_EXCEPTION << "GNA compact mode should be YES/NO, but not" << value;
2306 if_set(CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), [&] {
2307 if (value == PluginConfigParams::YES) {
2308 exclusive_async_requests = true;
2309 } else if (value == PluginConfigParams::NO) {
2310 exclusive_async_requests = false;
2312 log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
2313 THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
2317 if_set(GNA_CONFIG_KEY(PRECISION), [&] {
2318 auto precision = Precision::FromStr(value);
2319 if (precision != Precision::I8 && precision != Precision::I16) {
2320 log << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
2321 THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
2323 gnaPrecision = precision;
2326 if_set(GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), [&] {
2327 if (value == PluginConfigParams::YES) {
2328 uniformPwlDesign = true;
2329 } else if (value == PluginConfigParams::NO) {
2330 uniformPwlDesign = false;
2332 log << "GNA pwl uniform algorithm parameter "
2333 << "should be equal to YES/NO, but not" << value;
2334 THROW_GNA_EXCEPTION << "GNA pwl uniform algorithm parameter "
2335 << "should be equal to YES/NO, but not" << value;
2339 if_set(CONFIG_KEY(PERF_COUNT), [&] {
2340 if (value == PluginConfigParams::YES) {
2341 performance_counting = true;
2342 } else if (value == PluginConfigParams::NO) {
2343 performance_counting = false;
2345 log << "GNA performance counter enabling parameter "
2346 << "should be equal to YES/NO, but not" << value;
2347 THROW_GNA_EXCEPTION << "GNA performance counter enabling parameter "
2348 << "should be equal to YES/NO, but not" << value;
2352 if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
2353 uint64_t lib_threads = std::stoul(value, NULL, 10);
2354 if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {
2355 log << "Unsupported accelerator lib number of threads: " << value << ", should be greateer than 0 and less than 127";
2356 THROW_GNA_EXCEPTION << "Unsupported accelerator lib number of threads: " << value
2357 << ", should be greateer than 0 and less than 127";
2359 gna_lib_async_threads_num = lib_threads;
2362 if_set(CONFIG_KEY(SINGLE_THREAD), [&] {
2363 if (value == PluginConfigParams::YES) {
2364 gna_openmp_multithreading = false;
2365 } else if (value == PluginConfigParams::NO) {
2366 gna_openmp_multithreading = true;
2368 log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
2369 THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
2375 * @depricated Use the version with config parameter
2377 void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
2378 InferenceEngine::QueryNetworkResult& res) const {
2379 QueryNetwork(network, {}, res);
2382 void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
2383 const std::map<std::string, std::string>& config,
2384 InferenceEngine::QueryNetworkResult& res) const {
2385 std::unordered_set<CNNLayer *> allLayers;
2386 InferenceEngine::InputsDataMap inputs;
2388 network.getInputsInfo(inputs);
2389 std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
2391 if (inputs.empty()) {
2392 THROW_GNA_EXCEPTION << "Network is empty (GNA)\n";
2395 auto const & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
2396 if (secondLayers.empty()) {
2397 THROW_GNA_EXCEPTION << "Network consists of input layer only (GNA)\n";
2400 InferenceEngine::details::UnorderedDFS(allLayers,
2401 secondLayers.begin()->second,
2402 [&](CNNLayerPtr const layer) {
2403 if (GNAPluginNS::GNAPlugin::LayerTypeFromStr(layer->type) != NO_TYPE) {
2404 res.supportedLayers.insert(layer->name);
2409 intel_dnn_component_t * GNAPlugin::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
2410 if (current->insData.empty()) return nullptr;
2412 auto prev_layer = current->insData.front().lock()->creatorLayer.lock();
2414 return findDnnLayer(prev_layer);
2416 void GNAPlugin::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr, void *ptr_inputs, size_t num_data_bytes_out) {
2417 gnalog() << "Connecting output " << layer->name << " ...\n";
2418 // in case of Memory Layer it's input allocated in meminput layer
2419 if (layer->outData.size() == 1) {
2420 for (auto &&outLayer : layer->outData.front()->getInputTo()) {
2421 auto& nextLayer = outLayer.second;
2422 auto nextMemoryLayerIt =
2423 std::find_if(begin(memory_connection), end(memory_connection),
2424 [&](MemoryConnection::value_type &comp) {
2425 return comp.second.getOutput()->name
2428 if (nextMemoryLayerIt != memory_connection.end()) {
2429 auto &nextMemoryLayer = nextMemoryLayerIt->second;
2430 // memory layer not yet initialized
2431 if (nextMemoryLayer.reserved_size == 0) {
2432 gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(num_data_bytes_out));
2433 gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
2435 nextMemoryLayer.reserved_offset = 0;
2436 nextMemoryLayer.reserved_size = ALIGN64(num_data_bytes_out);
2438 IE_ASSERT(nextMemoryLayer.reserved_size == ALIGN64(num_data_bytes_out));
2440 gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
2446 // if one of next layers is concat...
2447 for (auto &&outLayer : layer->outData.front()->getInputTo()) {
2448 auto nextLayer = outLayer.second;
2449 if ( LayerInfo(nextLayer).isConcat() ) {
2450 auto& name = layer->name;
2451 // we look for this concat layer pointer in extra concat map
2452 auto concatLayerInfo = concat_connection.find(
2455 if (concatLayerInfo != concat_connection.end()) {
2456 auto &concatLayerInfoItem = concatLayerInfo->second;
2458 // find this input in vector sum all outputs in primitive
2459 auto it = std::find_if(concatLayerInfoItem.concatInputLayers.begin(),
2460 concatLayerInfoItem.concatInputLayers.end(),
2461 [&name](GNAPlugin::GNAConcatLayer::ConcatConnectedLayerInfo &item) {
2462 return item.name == name;
2464 // reserve full size for concat
2465 if (!concatLayerInfoItem.output_allocation_flag) {
2466 // check if this concat is being included by other one
2467 // by going thru each concat and checking inputs
2469 std::find_if(concat_connection.begin(),
2470 concat_connection.end(),
2472 (const std::pair<std::string, GNAPlugin::GNAConcatLayer> &concatItem) -> bool {
2473 auto it = std::find_if(concatItem.second.concatInputLayers.begin(),
2474 concatItem.second.concatInputLayers.end(),
2476 (const GNAPlugin::GNAConcatLayer::ConcatConnectedLayerInfo &item) -> bool {
2477 return item.name == concatLayerInfo->first;
2479 return it != concatItem.second.concatInputLayers.end();
2481 if (included == concat_connection.end()) {
2482 gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size));
2484 for (auto && inputLayer : concatLayerInfoItem.concatInputLayers) {
2485 if ( InferenceEngine::details::CaselessEq<std::string>()
2486 (inputLayer.name, "input") ) {
2487 bytes_alllocated_for_input[inputLayer.name] = ALIGN64(concatLayerInfoItem.reserved_size) - inputLayer.offset;
2491 concatLayerInfo->second.output_allocation_flag = true;
2493 gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, it->offset);
2502 intel_dnn_component_t * unused_input = nullptr;
2504 unused_input = find_first_unused_input(layer);
2505 if (unused_input != nullptr) {
2506 gnamem->bind_ptr(ptr, &unused_input->ptr_inputs, 0, ALIGN64(num_data_bytes_out));
2509 // cannot reuse suitable input
2510 if (unused_input == nullptr) {
2511 gnamem->reserve_ptr(ptr, ALIGN64(num_data_bytes_out));
2515 intel_dnn_component_t * GNAPlugin::findDnnLayer(CNNLayerPtr __layer) {
2516 auto component = std::find_if(begin(dnnComponentsForLayer),
2517 end(dnnComponentsForLayer),
2518 [&](DnnComponentsForLayer::value_type &comp) {
2519 return comp.first == __layer->name;
2521 // check for generic prev layer
2522 if (component != dnnComponentsForLayer.end()) {
2523 return &component->second;
2529 std::vector<void *>& GNAPlugin::get_ptr_inputs_global(std::string name) {
2530 if (!ptr_inputs_global_id.count(name)) {
2531 ptr_inputs_global_storage.push_front({});
2532 ptr_inputs_global_id[name] = ptr_inputs_global_storage.begin();
2534 return *ptr_inputs_global_id[name];
2537 GNAPlugin::ConnectionDetails GNAPlugin::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx) {
2538 // selecting particular input layers
2539 auto prevLayer = CNNNetPrevLayer(layer, idx);
2541 gnalog() << "Connecting input " << layer->name << " to " << prevLayer->name << " ...\n";
2543 // real input not a memory input
2544 if (LayerInfo(prevLayer).isInput()) {
2545 if (0 == bytes_alllocated_for_input[prevLayer->name]) {
2546 gnamem->push_value(&get_ptr_inputs_global(prevLayer->name).front(), static_cast<uint8_t>(0), num_data_bytes_in, 64);
2547 bytes_alllocated_for_input[prevLayer->name] = num_data_bytes_in;
2549 if (ALIGN(num_data_bytes_in, 64) > ALIGN(bytes_alllocated_for_input[prevLayer->name], 64)) {
2551 << "Layer: " << layer->name
2552 << " Cannot bind pointer to already allocated input(" << prevLayer->name
2553 << "), due to size_allocated=" << bytes_alllocated_for_input[prevLayer->name]
2554 << ", and size_requested=" << num_data_bytes_in;
2558 gnamem->bind_ptr(ptr, &get_ptr_inputs_global(prevLayer->name).front(), offset);
2560 gnamem->bind_ptr(&get_ptr_inputs_global(prevLayer->name).front(), ptr, -offset);
2566 LayerInfo layerInfoObj(prevLayer);
2567 LayerInfo thisLayerInfoObj(layer);
2568 // connecting to split/slice splitiing layers
2569 if (layerInfoObj.isSplit() || layerInfoObj.isSlice()) {
2570 auto& splittingLayer = prevLayer;
2571 auto& splitName = splittingLayer->name;
2572 auto& name = layer->name;
2574 // we look for this concat layer pointer in extra concat map
2575 auto splitLayerInfo = split_connection.find(splitName);
2577 if (splitLayerInfo != split_connection.end()) {
2578 auto &splitLayerInfoItem = splitLayerInfo->second;
2579 // find this input in vector sum all outputs in primitive
2580 auto it = std::find_if(splitLayerInfoItem.splitOutputLayers.begin(),
2581 splitLayerInfoItem.splitOutputLayers.end(),
2582 [&name](GNAPlugin::GNASplitLayer::SplitConnectedLayerInfo &item) {
2583 return item.name == name;
2586 if (it != splitLayerInfoItem.splitOutputLayers.end()) {
2587 gnalog() << "Connecting split/slice input \n";
2588 auto res = connectInput(splittingLayer, ptr,
2589 splitLayerInfoItem.reserved_size, it->offset, 0);
2590 gnalog() << "Connected \n";
2594 THROW_GNA_EXCEPTION << "Split/Slice layer: " << splitName
2595 << " is not included in extra map. Something wrong happened";
2596 } else if (layerInfoObj.isConcat()) {
2597 auto concatLayerInfo = concat_connection.find(
2599 if (concatLayerInfo != concat_connection.end()) {
2600 auto & concatLayerInfoItem = concatLayerInfo->second;
2601 // dnnLayer that is input for concat layer
2602 gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset);
2603 // return layer over concat
2604 return CNNNetPrevLayer(prevLayer);
2606 } else if (layerInfoObj.isCrop()) {
2607 auto cropLayerInfo = crop_connection.find(
2609 if (cropLayerInfo != crop_connection.end()) {
2610 auto & cropLayerInfoItem = cropLayerInfo->second;
2611 gnamem->bind_ptr(ptr, &cropLayerInfoItem.gna_ptr, offset);
2612 return CNNNetPrevLayer(prevLayer);
2615 auto prevDnnLayer = findDnnLayer(prevLayer);
2617 // check for generic prev layer
2618 if (prevDnnLayer != nullptr) {
2619 gnamem->bind_ptr(ptr, &prevDnnLayer->ptr_outputs, offset);
2623 auto prevMemoryLayer =
2624 std::find_if(begin(memory_connection), end(memory_connection), [&](MemoryConnection::value_type &comp) {
2625 return comp.second.getInput()->name == prevLayer->name;
2627 if (prevMemoryLayer != memory_connection.end()) {
2628 // dnnLayer that is input for memory output layer
2629 auto& memoryLayer = prevMemoryLayer->second;
2630 if (memoryLayer.reserved_size == 0) {
2631 gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(num_data_bytes_in));
2632 gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
2634 memoryLayer.reserved_offset = offset;
2635 memoryLayer.reserved_size = ALIGN64(num_data_bytes_in);
2637 IE_ASSERT(memoryLayer.reserved_size == ALIGN64(num_data_bytes_in));
2639 gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, memoryLayer.reserved_offset);
2645 // several layers are to be skipped right now
2646 if (LayerInfo(prevLayer).isReshape()) {
2647 gnalog() << "Skipping reshape layer: " << prevLayer->name << "\n";
2648 return connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0);
2651 if (LayerInfo(prevLayer).isPermute()) {
2652 gnalog() << "Skipping permute layer: " << prevLayer->name << "\n";
2653 return {connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0).input, true, prevLayer};
2657 THROW_GNA_EXCEPTION << "Cannot connect input for: " << layer->name;