-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
for (uint32_t i = 0; i < num_frames; i++) {
for (uint32_t j = 0; j < num_vector_elements; j++) {
if (!std::is_same<T, U>::value) {
- dst[j * num_group + i] = GNAPluginNS::ConvertFloatToInt16(src[i * num_vector_elements + j] * input_scale_factor);
+ dst[j * num_group + i] = GNAPluginNS::ConvertFloatToInt16(src[i * num_vector_elements + j] * get_input_scale_factor());
} else {
dst[j * num_group + i] = src[i * num_vector_elements + j];
}
U *ptr_src_vec = const_cast<U *>(reinterpret_cast<const U *>(src) + i * num_vector_elements);
std::memset(ptr_dst_vec, 0, num_vector_stride * sizeof(T));
for (int j=0; j < num_vector_elements; j++) {
- ptr_dst_vec[j] = GNAPluginNS::ConvertFloatToInt16(ptr_src_vec[j] * input_scale_factor);
+ ptr_dst_vec[j] = GNAPluginNS::ConvertFloatToInt16(ptr_src_vec[j] * get_input_scale_factor());
}
}
for (auto&& outputLayer : splitInfo.splitOutputLayers) {
uint32_t begin = outputLayer.offset/precision_size;
uint32_t end = (outputLayer.offset + outputLayer.pure_size)/precision_size;
+ if (dst_ptr - dst >= end) {
+ // output layer with bind pointer as previous one. Skip
+ continue;
+ }
for (uint32_t i = begin; i < end; ++i) {
if (!std::is_same<T, U>::value) {
- *(dst_ptr++) = GNAPluginNS::ConvertFloatToInt16(*(src_ptr++) * input_scale_factor);
+ *(dst_ptr++) = GNAPluginNS::ConvertFloatToInt16(*(src_ptr++) * get_input_scale_factor());
} else {
*(dst_ptr++) = *(src_ptr++);
}
uint32_t num_group,
uint32_t num_vector_elements,
uint32_t num_vector_stride) {
- // special case if split/slice layers connected
- // with Input detected
- auto it = split_connection.end();
- if (split_connection.size() != 0) {
- it = std::find_if(split_connection.begin(), split_connection.end(), []
- (const std::pair<std::string, GNASplitLayer> &item) -> bool {
- return CaselessEq<std::string>()(item.second.splitInputLayer.name, "Input");
- });
- }
if (orientation == kDnnInterleavedOrientation) {
// TODO : fix that as well
- if (input_precision.size() == 2) {
+ if (input_precision == Precision::U8) {
+ int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
+ uint8_t *src = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(ptr_src));
+ copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
+ } else if (input_precision.size() == 2) {
int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
int16_t *src = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_src));
- if (it != split_connection.end()) {
- copyInputDataWithSplit(dst, src, it->second, input_precision.size());
- } else {
- copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
- }
+ copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
} else if (input_precision.size() == 4) {
if (!gnadevice) {
float *dst = const_cast<float *>(reinterpret_cast<const float *>(ptr_dst));
float *src = const_cast<float *>(reinterpret_cast<const float *>(ptr_src));
- if (it != split_connection.end()) {
- copyInputDataWithSplit(dst, src, it->second, input_precision.size());
- } else {
- copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
- }
+ copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
} else {
int16_t *dst = reinterpret_cast<int16_t *>(ptr_dst);
const float *src = reinterpret_cast<const float *>(ptr_src);
- if (it != split_connection.end()) {
- copyInputDataWithSplit(dst, src, it->second, input_precision.size());
- } else {
- copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
- }
+ copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
}
}
} else {
- if (input_precision.size()== 2) {
+ if (input_precision == Precision::U8) {
+ uint8_t *src = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(ptr_src));
+ if (!gnadevice) {
+ float *dst = const_cast<float *>(reinterpret_cast<const float *>(ptr_dst));
+ copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
+ } else {
+ int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
+ copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
+ }
+
+ } else if (input_precision.size()== 2) {
int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
int16_t *src = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_src));
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
}
}
-void GNAPlugin::fillMemoryConnections(std::map<std::string,
- std::vector<InferenceEngine::CNNLayerPtr>>&
- memoryPairs) {
+void GNAPlugin::fillMemoryConnections(std::unordered_map<std::string,
+ std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs) {
for (auto &memory : memoryPairs) {
auto inputLayer = memory.second[1];
auto outputLayer = memory.second[0];
LayerInfo ptrSplitLayerInputLayerInfo(ptrSplitLayerInput);
for (size_t i = 0; i < layer->outData.size(); ++i) {
size_t padding = 0;
- size_t layer_size = 0;
+ size_t output_layer_size = 0;
auto& dataOutput = layer->outData[i];
if (!dataOutput || !dataInput) {
padding = std::max(padding, LayerInfo(ptrSplitLayerOutput).paddingSize())
* dataOutput->precision.size();
- layer_size =
+ output_layer_size =
InferenceEngine::details::product(begin(dataOutput->dims),
end(dataOutput->dims)) * dataOutput->precision.size();
- layerInfoItem.splitOutputLayers.emplace_back(ptrSplitLayerOutput->name, split_size, layer_size);
+ if (ptrSplitLayerOutput->type == "AffineFilter") {
+ size_t aligned64_offset = ptrSplitLayerOutput->GetParamAsInt("offset");
+ layerInfoItem.splitOutputLayers.emplace_back(ptrSplitLayerOutput->name, aligned64_offset, output_layer_size);
+ } else {
+ layerInfoItem.splitOutputLayers.emplace_back(ptrSplitLayerOutput->name, split_size, output_layer_size);
+ }
}
- split_size += ptrSplitLayerInputLayerInfo.isInput() ?
- ALIGN64(padding + layer_size):
- padding + layer_size;
+ split_size += padding + output_layer_size;
}
layerInfoItem.reserved_size = split_size;
layerInfoItem.splitInputLayer =
auto ¤tComponent = dnnComponentsForLayer.back().second;
dnn.InitCopyComponent(currentComponent,
orientation,
- num_rows_in + num_padding_in,
+ ALIGN(num_rows_in, 8),
num_columns_in,
- num_rows_out + num_padding_out,
+ ALIGN(num_rows_out, 8),
num_columns_out,
inputs->precision.size(),
outputs->precision.size(),
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
begin(outputs->dims), end(outputs->dims)), 8)
* outputs->precision.size();
- size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding_in) * inputs->precision.size();
+ size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, 8) * inputs->precision.size();
connectInput(layer, ptr_inputs, num_data_bytes_in);
connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
THROW_GNA_EXCEPTION << "Different precision for Concat input layers are not supported";
}
+ auto& concatLayerInfo = concat_connection.find(concatLayer->name)->second;
for (auto &&outLayer : concatLayer->outData.front()->getInputTo()) {
if ( LayerInfo(outLayer.second).isConcat() ) {
- auto& concatLayerInfo = concat_connection.find(concatLayer->name)->second;
connectOutput(layer, &concatLayerInfo.gna_ptr,
&concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size);
}
}
+
+ size_t idx = 0;
+ for (auto && inputLayer : concatLayerInfo.concatInputLayers) {
+ if ( InferenceEngine::details::CaselessEq<std::string>()
+ (inputLayer.name, "input") ) {
+ connectInput(layer, &concatLayerInfo.gna_ptr,
+ concatLayerInfo.reserved_size-inputLayer.offset, static_cast<int32_t>(-inputLayer.offset), idx);
+ }
+ ++idx;
+ }
}
void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
- size_t cropSize = cropLayer->dim.back() * cropLayer->precision.size();
+ size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size();
- if (ALIGN(cropOffset, 8) == cropOffset) {
+ if (ALIGN64(cropOffset) == cropOffset) {
// leave crop as it is
GNAPlugin::GNACropLayer cropLayerInfoItem(layer);
std::string& id = layer->name;
}
// calculate index idx for connectInput last parameter
- connectInput(layer, &cropLayerInfo->second.gna_ptr, cropSize + cropOffset, cropOffset, 0);
+ connectInput(layer, &cropLayerInfo->second.gna_ptr, cropOutputSize + cropOffset, cropOffset, 0);
// cases for certain output layers
for (auto &&outLayer : layer->outData.front()->getInputTo()) {
auto& nextLayer = outLayer.second;
if ( LayerInfo(nextLayer).isConcat() ) {
- connectOutput(layer, &cropLayerInfo->second.gna_ptr, &cropLayerInfo->second.gna_ptr, cropSize);
+ connectOutput(layer, &cropLayerInfo->second.gna_ptr, &cropLayerInfo->second.gna_ptr, cropOutputSize);
}
}
} else {
begin(outputs->dims), end(outputs->dims)) * 4;
size_t num_data_bytes_in = num_columns_in *
- (num_rows_in + num_padding) * inputs->precision.size();
+ ALIGN(num_rows_in, 8) * inputs->precision.size();
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
- gnamem->readonly().push_initializer(ptr_weights, num_rows_out * (num_rows_in + num_padding)*layer->precision.size(), [=](void * data, size_t size) {
- int out = 0;
- for (int input = cropLayer->offset.back(); input < num_rows_out + cropLayer->offset.back(); ++input) {
- auto mem_ptr = reinterpret_cast<uint8_t *>(data) + input * layer->precision.size() + out * (num_rows_in+num_padding) * layer->precision.size();
- if (quantized == nullptr) {
- auto float_ptr = reinterpret_cast<float *>(mem_ptr);
- *float_ptr = 1.0f;
- } else {
- auto int_ptr = reinterpret_cast<uint16_t *>(mem_ptr);
- *int_ptr = 1;
- }
- ++out;
- }
- }, 64);
- if (quantized == nullptr) {
- gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
- } else {
+ FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true);
+
+ (quantized == nullptr) ?
+ gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64):
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
- }
}
}
uint32_t num_rows_in = FROM_IR_DIM(inputs4Bytes, 1);
uint32_t num_columns_in = FROM_IR_DIM(inputs4Bytes, 2);
uint32_t num_rows_out = num_rows_in;
+ uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
void *ptr_inputs;
void *ptr_outputs;
dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
auto ¤tComponent = dnnComponentsForLayer.back().second;
dnn.InitAffineComponent(currentComponent,
- num_rows_in,
+ num_rows_in + num_padding,
num_columns_in,
- num_rows_out,
+ num_rows_out + num_padding,
inputs2Bytes->precision.size(),
outputs->precision.size(),
// TODO: only fp32 and Int16 tested
cout << "IR layer : " << std::left << std::setw(20) << layer->name << "diagonal_"<< dnnComponentsForLayer.size() - 1 << "\n";
#endif
- size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
- * outputs->precision.size();
+ size_t num_data_bytes_out =
+ InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims)) * outputs->precision.size();
- size_t num_data_bytes_in = InferenceEngine::details::product(begin(inputs2Bytes->dims), end(inputs2Bytes->dims))
- * inputs2Bytes->precision.size();
+ size_t num_data_bytes_in =
+ num_columns_in * (num_rows_in + num_padding) * inputs2Bytes->precision.size();
connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5):((a) + 0.5))
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
+
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
}
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
auto transpose = false;
auto transposedRows = 0;
auto transposedCols = 0;
- /**
- * TODO: enable transpose correction between Conv/affine layers implement dedicated pass
- * TF topologies have inplace permutes so we dont care
- * kaldi topologies did this internally
- */
+
if (0 && connectionInfo.needTransposeWeights) {
- gnalog() << "Transposing weights for layer: " << layer->name << "\n";
// direct order is 0, 1, 2, 3, supported order is only 0,3,2,1 where dim 2 is usually equals to 1
auto permuteOrder = connectionInfo.permute->GetParamAsInts("order");
if (permuteOrder != vector<int>({0, 3, 2, 1})) {
THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute order: was " << layer->GetParamAsString("order") <<
", but only support 0, 3, 2, 1";
}
+
+ /**
+ * TODO: weights transpose happened after quantisation might result in poor quality for in 8 - move this to passes
+ */
+ if (weightable._weights->precision() == Precision::I8) {
+ THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute operation for 8 bit weights for layer: " << layer->name;
+ }
+
+ // this affine connected to convolution via pool or activation
+ gnalog() << "Transposing weights for layer: " << layer->name << "\n";
+
transpose = !isDiag;
transposedRows = connectionInfo.permute->input()->getDims()[3];
transposedCols = connectionInfo.permute->input()->getDims()[1];
weightable._weights->byteSize(),
64);
} else {
- // ToDO: write unit tests for transpose
gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void * data, size_t size) {
for (int k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size();
for (int i = 0; i < transposedRows; i++) {
auto offsetWrite = (transposedRows * j + i) * weightable.precision.size();
auto offsetRead = (i * transposedCols + j) * weightable.precision.size();
- memcpy(u8Data + offsetWrite, cbuffer + offsetRead, weightable.precision.size());
+ std::memcpy(u8Data + offsetWrite, cbuffer + offsetRead, weightable.precision.size());
}
}
}
}, 64);
}
} else {
+ if (transpose) {
+ THROW_GNA_EXCEPTION << "transpozed weights with non zero padding not yet supported";
+ }
auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
auto paddedWeightsSize = paddedWeights * weightable.precision.size();
}
}
+void GNAPlugin::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr layer, void* ptrWeights, size_t offset, bool isQuantized) {
+ auto outputs = *layer->outData.begin();
+ auto inputs = layer->insData.begin()->lock();
+
+ uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
+ uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
+
+ if (!ptrWeights) {
+ THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
+ }
+
+ gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void * data, size_t size) {
+ int out = 0;
+ for (int input = offset; input < num_rows_out + offset; ++input) {
+ auto mem_ptr = reinterpret_cast<uint8_t *>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
+ if (!isQuantized) {
+ auto float_ptr = reinterpret_cast<float *>(mem_ptr);
+ *float_ptr = 1.0f;
+ } else {
+ auto int_ptr = reinterpret_cast<uint16_t *>(mem_ptr);
+ *int_ptr = 1;
+ }
+ ++out;
+ }
+ }, 64);
+}
+
+void GNAPlugin::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
+ auto filterLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
+
+ if (filterLayer == nullptr) {
+ return;
+ }
+
+ std::string& name = filterLayer->name;
+ auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+
+ // we look for this concat layer pointer in extra concat map
+ auto prevLayer = CNNNetPrevLayer(layer.get(), 0);
+ if (!LayerInfo(prevLayer).isSplit() && !LayerInfo(prevLayer).isSlice()) {
+ THROW_GNA_EXCEPTION << "Case with Affine Aligning Filter for not Split/Slice layers is not implemented yet!";
+ }
+
+ void *ptr_inputs;
+ void *ptr_outputs;
+ void *ptr_weights;
+ void *ptr_biases;
+
+ auto outputs = *layer->outData.begin();
+ auto inputs = layer->insData.begin()->lock();
+
+ uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
+ uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
+ uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
+
+ uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
+
+ gnalog() << "Filter " << layer->name << " is being inserted...\n";
+ auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->precision() : outputs->precision;
+ dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
+ auto ¤tComponent = dnnComponentsForLayer.back().second;
+ dnn.InitAffineComponent(currentComponent,
+ num_rows_in + num_padding,
+ num_columns_in,
+ num_rows_out,
+ inputs->precision.size(),
+ outputs->precision.size(),
+ filterLayer->_weights->precision().size(),
+ biasPrecision.size(),
+ quantized == nullptr ? 1 : quantized->_weights_quant.scale,
+ quantized == nullptr ? 1 : quantized->_dst_quant.scale,
+ ptr_inputs,
+ ptr_outputs,
+ ptr_weights,
+ ptr_biases,
+ false);
+
+ size_t num_data_bytes_out =
+ InferenceEngine::details::product(
+ begin(outputs->dims), end(outputs->dims)) * 4;
+
+ size_t num_data_bytes_in = num_columns_in *
+ ALIGN(num_rows_in, 8) * inputs->precision.size();
+
+ connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
+ connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
+
+ if (num_padding == 0) {
+ gnamem->readonly().push_ptr(ptr_weights,
+ filterLayer->_weights->cbuffer().as<const void *>(),
+ filterLayer->_weights->byteSize(),
+ 64);
+ } else {
+ auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
+ auto paddedWeights = elementsIn * num_rows_out;
+ auto paddedWeightsSize = paddedWeights * filterLayer->precision.size();
+
+ gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void * data, size_t size) {
+ for (int i = 0; i < num_rows_out; i++) {
+ std::memcpy(data,
+ filterLayer->_weights->cbuffer().as<const uint8_t *>() + num_rows_in * i * filterLayer->precision.size(),
+ num_rows_in * filterLayer->precision.size());
+ data = reinterpret_cast<uint8_t *>(data) + (num_rows_in + num_padding) * filterLayer->precision.size();
+ }
+ }, 64);
+ }
+
+ if (filterLayer->_biases) {
+ gnamem->readonly().push_ptr(ptr_biases,
+ filterLayer->_biases->cbuffer().as<const void *>(),
+ filterLayer->_biases->byteSize(),
+ 64);
+ } else {
+ gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
+ }
+}
+
void GNAPlugin::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto *generic = dynamic_cast<GenericLayer *>(layer.get());
std::string type;
{{"Input"}, [](GNAPlugin*, CNNLayerPtr l) {}}, // skip input layers they are not used in GNA lib, only as a memory blobs
{{"FullyConnected", "InnerProduct"}, CREATE(AffinePrimitive)},
{{"ScaleShift"}, CREATE(DiagonalPrimitive)},
+ {{"AffineFilter"}, CREATE(AffineFilterPrimitive)},
{{"Eltwise"},
CREATE(EltwisePrimitive)}, // same as diagonal while weights are not taken from network, rather than from another output
{{"Split"}, SKIP}, // skip information about which part of prev layer need to consume handle during layer creation
GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
- // holds actual value of a found key
- std::string value;
- auto if_set = [&](std::string key, const std::function<void()> & handler) {
- auto keyInMap = configMap.find(key);
- if (keyInMap != configMap.end()) {
- value = keyInMap->second;
- handler();
- }
- };
-
- if_set(GNA_CONFIG_KEY(SCALE_FACTOR), [&] {
- input_scale_factor = std::stod(value);
- });
-
- if_set(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), [&] {
- dumpXNNPath = value;
- });
-
- if_set(GNA_CONFIG_KEY(DEVICE_MODE), [&] {
- static caseless_unordered_map <std::string, uint32_t> supported_values = {
- {GNAConfigParams::GNA_AUTO, GNA_AUTO},
- {GNAConfigParams::GNA_HW, GNA_HARDWARE},
- {GNAConfigParams::GNA_SW, GNA_SOFTWARE},
- {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
- };
- auto procType = supported_values.find(value);
- if (procType == supported_values.end()) {
- THROW_GNA_EXCEPTION << "GNA device mode unsupported: " << value;
- }
- gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
- });
-
- if_set(GNA_CONFIG_KEY(COMPACT_MODE), [&] {
- if (value == PluginConfigParams::YES) {
- compact_mode = true;
- } else if (value == PluginConfigParams::NO) {
- compact_mode = false;
- } else {
- THROW_GNA_EXCEPTION << "GNA compact mode should be YES/NO, but not" << value;
- }
- });
-
- if_set(CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), [&] {
- if (value == PluginConfigParams::YES) {
- exclusive_async_requests = true;
- } else if (value == PluginConfigParams::NO) {
- exclusive_async_requests = false;
- } else {
- THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
- }
- });
-
- if_set(GNA_CONFIG_KEY(PRECISION), [&] {
- auto precision = Precision::FromStr(value);
- if (precision != Precision::I8 && precision != Precision::I16) {
- THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
- }
- gnaPrecision = precision;
- });
-
- if_set(GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), [&] {
- if (value == PluginConfigParams::YES) {
- uniformPwlDesign = true;
- } else if (value == PluginConfigParams::NO) {
- uniformPwlDesign = false;
- } else {
- THROW_GNA_EXCEPTION << "GNA pwl uniform algorithm parameter "
- << "should be equal to YES/NO, but not" << value;
- }
- });
-
- if_set(CONFIG_KEY(PERF_COUNT), [&] {
- if (value == PluginConfigParams::YES) {
- performance_counting = true;
- } else if (value == PluginConfigParams::NO) {
- performance_counting = false;
- } else {
- THROW_GNA_EXCEPTION << "GNA performance counter enabling parameter "
- << "should be equal to YES/NO, but not" << value;
- }
- });
-
- if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
- uint64_t lib_threads = std::stoul(value, NULL, 10);
- if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {
- THROW_GNA_EXCEPTION << "Unsupported accelerator lib number of threads: " << value
- << ", should be greateer than 0 and less than 127";
- }
- gna_lib_async_threads_num = lib_threads;
- });
-
- if_set(CONFIG_KEY(SINGLE_THREAD), [&] {
- if (value == PluginConfigParams::YES) {
- gna_openmp_multithreading = false;
- } else if (value == PluginConfigParams::NO) {
- gna_openmp_multithreading = true;
- } else {
- THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
- }
- });
+ SetConfig(configMap);
}
-GNAPluginNS::GNAPlugin::LayerType GNAPlugin::LayerTypeFromStr(const std::string &str) {
+GNAPluginNS::GNAPlugin::LayerType GNAPlugin::LayerTypeFromStr(const std::string &str) const {
static const caseless_map<std::string, GNAPlugin::LayerType> LayerNameToType = {
{ "Input" , Input },
{ "Convolution" , Convolution },
auto network_precision = network.getPrecision();
network.getInputsInfo(inputs);
auto network_input_precision = inputs.begin()->second->getInputPrecision();
- auto batch_sise = network.getBatchSize();
+ auto batch_size = network.getBatchSize();
if (network_precision != Precision::FP32) {
errMessage = "The plugin does not support networks with " + std::string(network_precision.name()) + " format.\n";
return false;
}
if (network_input_precision != Precision::FP32 &&
- network_input_precision != Precision::I16) {
+ network_input_precision != Precision::I16 &&
+ network_input_precision != Precision::U8) {
errMessage = "The plugin does not support input precision with " + std::string(network_input_precision.name()) + " format.\n";
return false;
}
errMessage = "Layer is unsupported by GNA: " + layer->name + ":" + layer->type + "\n";
check_result = false;
}
- if (batch_sise != 1 && LayerInfo::isBatchSizeConstrained(layer->type)) {
+ if (batch_size != 1 && LayerInfo::isBatchSizeConstrained(layer->type)) {
+ errMessage = "topology with layer: " + layer->name + ", type: " + layer->type +
+ ", and batch size(" + to_string(batch_size) + ") != 1 not supported";
check_result = false;
}
}, false);
return check_result;
}
+float GNAPlugin::get_input_scale_factor() const {
+ return input_scale_factor.empty() ? 1.0 : input_scale_factor.begin()->second;
+}
+
void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
// Check the input network
std::string error;
substitutePRelu(layers);
layers = CNNNetSortTopologically(*network.get());
reorderMaxPool(layers);
- applyOrientations(layers);
+ // ToDo sort if bool flag "changed"
+ // returned from insertion function
+ insertAligningFilterLayer(layers);
+
+#if ENABLE_AUTO_PERMUTE
+ layers = CNNNetSortTopologically(*network.get());
+ reversePermutations(layers);
+#endif
+ layers = CNNNetSortTopologically(*network.get());
insertIdentityLayer(layers);
+ layers = CNNNetSortTopologically(*network.get());
+ insertCopyLayer(layers);
+ layers = CNNNetSortTopologically(*network.get());
insertDiagonalLayer(layers);
+ layers = CNNNetSortTopologically(*network.get());
+ substituteScaleShiftBroadCast(layers);
};
Config supported = Config({
{TargetDevice::eGNA, Precision::FP32, [&](InferenceEngine::ICNNNetwork &network) -> CNNNetworkPtr {
if (gnaPrecision == Precision::I16) {
ModelQuantizer<QuantI16> q;
- return q.quantize(network, run_passes, input_scale_factor);
+ return q.quantize(network, run_passes, get_input_scale_factor());
}
if (gnaPrecision == Precision::I8) {
ModelQuantizer<QuantI8> q;
- return q.quantize(network, run_passes, input_scale_factor);
+ return q.quantize(network, run_passes, get_input_scale_factor());
}
THROW_GNA_EXCEPTION << "no mans land for GNA precision";
}},
supported.setDefaultDevice(TargetDevice::eGNA);
auto newNet = supported.find_configuration(network).convert(network);
- auto networkPrecision = newNet->getPrecision();
- if (!networkPrecision.is_float()) {
- gnadevice.reset(new GNADeviceHelper(gna_proc_type,
- gna_lib_async_threads_num,
- gna_openmp_multithreading,
- performance_counting));
- gnamem.reset(new gna_memory_type(
- make_polymorph<GNAAllocator>(*gnadevice.get()), PAGE_SIZE_BYTES));
- } else {
- gnamem.reset(new gna_memory_type(make_polymorph<std::allocator<uint8_t>>()));
- }
+
// creating intel dnn_t structures from network
auto sortedNet = CNNNetSortTopologically(*newNet);
std::vector<CNNLayerPtr> sortedNoMem;
- std::map<std::string,
- std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
+ std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
// find all memory layers pairs and mark which one used as outputs
for (auto &layer : sortedNet) {
auto generic = dynamic_cast<GenericLayer *>(layer.get());
// fill in extra storage with memory layers
fillMemoryConnections(memoryPairs);
+ if (memory_connection.size() != 0) {
+ gna_lib_async_threads_num = 1;
+ }
+
+ auto networkPrecision = newNet->getPrecision();
+
+ if (!networkPrecision.is_float()) {
+ gnadevice.reset(new GNADeviceHelper(gna_proc_type,
+ gna_lib_async_threads_num,
+ gna_openmp_multithreading,
+ performance_counting));
+ gnamem.reset(new gna_memory_type(
+ make_polymorph<GNAAllocator>(*gnadevice.get()), PAGE_SIZE_BYTES));
+ } else {
+ gnamem.reset(new gna_memory_type(make_polymorph<std::allocator<uint8_t>>()));
+ }
+
// keep inputs information and create input primitives
newNet->getInputsInfo(inputsDataMap);
if (inputsDataMap.empty()) {
THROW_GNA_EXCEPTION << " No inputs for the topology";
}
- if (inputsDataMap.size() != 1) {
- THROW_GNA_EXCEPTION << " cannot infer topologies with more than one inputs";
- }
-
- inputDims = inputsDataMap.begin()->second->getDims();
// keep output dims
newNet->getOutputsInfo(outputsDataMap);
}
outputDims = outputsDataMap.begin()->second->dims;
- ptr_inputs_global.resize(gna_lib_async_threads_num);
+ for (auto && input : inputsDataMap) {
+ get_ptr_inputs_global(input.first).resize(gna_lib_async_threads_num);
+ }
+
ptr_outputs_global.resize(gna_lib_async_threads_num);
// CreatingLayer primitives
// TODO: solely gna_example convolution hack
for (auto layer = sortedNoMem.begin(); layer != sortedNoMem.end(); ++layer) {
CreateLayerPrimitive(*layer);
}
- gnamem->bind_ptr(&ptr_outputs_global.front(), &dnnComponentsForLayer.back().second.ptr_outputs);
+ DnnComponentsForLayer::iterator output_component = std::find_if(dnnComponentsForLayer.begin(),
+ dnnComponentsForLayer.end(),
+ [&](const std::pair<std::string, intel_dnn_component_t>& v)
+ { return outputsDataMap.begin()->first == v.first; });
+
+ if (output_component == dnnComponentsForLayer.end()) {
+ if (dnnComponentsForLayer.empty()) {
+ THROW_GNA_EXCEPTION << "No outputs found in internal structures";
+ }
+ // likely layer is fused. Take last one
+ output_component = std::prev(dnnComponentsForLayer.end());
+ gnalog() << "Output layer "<< outputsDataMap.begin()->first
+ << " has not been found in component list. Took "
+ << output_component->first << " instead \n" << std::flush;
+ }
+ gnamem->bind_ptr(&ptr_outputs_global.front(), &output_component->second.ptr_outputs);
// make room for active list
- auto &last_component = dnnComponentsForLayer.back().second;
- gnamem->reserve_ptr(nullptr, ALIGN64(last_component.num_bytes_per_output * last_component.num_rows_out));
+ gnamem->reserve_ptr(nullptr, ALIGN64(output_component->second.num_bytes_per_output * output_component->second.num_rows_out));
void *pParallelExecutionData = nullptr;
// in fp32 mode last PWL cannot be computed without that
dnn.InitActiveList(NULL);
- nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(0), -1, InferenceEngine::BlobMap()));
+ nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap()));
if (!networkPrecision.is_float()) {
// number of layer gets calculated inside that InitGNAStruct function
dnn.InitGNAStruct(&std::get<0>(nnets.front())->obj);
}
- // creating same gna RW segment for paralle infer requests
+ // creating same gna RW segment for parallel infer requests
for (int i = 1; i != gna_lib_async_threads_num; i++) {
- nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(0), -1, InferenceEngine::BlobMap()));
+ nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap()));
// this can be improved by just copy all structures, but we are too lazy
dnn.InitGNAStruct(&std::get<0>(nnets.back())->obj);
}
};
- relocate(ptr_inputs_global[i], ptr_inputs_global[0]);
+ for (auto &&input : ptr_inputs_global_storage) {
+ relocate(input[i], input[0]);
+ }
+
relocate(ptr_outputs_global[i], ptr_outputs_global[0]);
for (int j = 0; j != std::get<0>(nnets.front())->obj.nLayers; j++) {
auto & layer = std::get<0>(nnets[i])->obj.pLayers[j];
relocate(layer.pOutputsIntermediate, layer.pOutputsIntermediate);
}
}
- orientation_in = dnn.component[0].orientation_in;
- orientation_out = dnn.component[dnn.num_components()-1].orientation_out;
- num_bytes_per_output = dnn.component[dnn.num_components()-1].num_bytes_per_output;
- auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(sortedNoMem.back());
+ // calculating input orientation without memory layers, since their orientation not changed during infer right now
+ std::unordered_map<string, string> skippedLayers;
+ for (auto &layer : sortedNet) {
+ for (int i = 0; CNNNetHasPrevLayer(layer.get(), i); i++) {
+ auto prevLayer = CNNNetPrevLayer(layer.get(), i);
+ if (!skippedLayers.count(prevLayer->name)) {
+ if (CNNNetHasPrevLayer(prevLayer.get())) {
+ continue;
+ }
+
+ // we are in the one of input layers
+ if (LayerInfo(prevLayer).isMemory()) {
+ continue;
+ }
+ }
+
+ auto dnnLayer = findDnnLayer(layer);
+ string inputName = prevLayer->name;
+ if (skippedLayers.count(prevLayer->name)) {
+ inputName = skippedLayers[prevLayer->name];
+ }
+
+ // non functional layer - skipped by gna
+ if (nullptr == dnnLayer) {
+ // storing input name for skipped layer
+ skippedLayers[layer->name] = inputName;
+ continue;
+ }
+
+ // input orientation might be already initialized, thus verify that it matches
+ if (!orientation_in.count(inputName)) {
+ orientation_in[inputName] = dnnLayer->orientation_in;
+ } else {
+ if (orientation_in[inputName] != dnnLayer->orientation_in) {
+ THROW_GNA_EXCEPTION << "orientation for input layer: " << inputName << "cannot be calculated";
+ }
+ }
+ }
+ }
+
+ orientation_out = output_component->second.orientation_out;
+ num_bytes_per_output = output_component->second.num_bytes_per_output;
+
+ // find output layer
+ auto output = std::find_if(sortedNet.begin(),
+ sortedNet.end(),
+ [&](const CNNLayerPtr& v)
+ { return outputsDataMap.begin()->first == v.get()->name; });
+ if (output == sortedNet.end()) {
+ // likely layer is fused. Take last one
+ output = std::prev(sortedNet.end());
+ }
+ auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(*output);
output_scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
num_rotate_rows = dnn.num_rotate_rows;
}
auto dump = gnadevice->dumpXnn(&std::get<0>(nnets.front())->obj, ptr_active_indices, num_active_indices);
dump.header.rw_region_size = gnamem->getRWBytes();
- dump.header.input_scaling_factor = input_scale_factor;
+ dump.header.input_scaling_factor = get_input_scale_factor();
dump.header.output_scaling_factor = output_scale_factor;
std::ofstream dumpStream(dumpXNNPath, std::ios::out | std::ios::binary);
dumpStream.write(reinterpret_cast<char*>(&dump.header), sizeof(intel_gna_model_header));
}
}
-uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result) {
- return QueueInference(*input.begin()->second.get(), result);
-
- /*if (!syncPoints.empty()) {
- syncPoints.back().second = result;
- }*/
-}
-
-uint32_t GNAPlugin::QueueInference(const InferenceEngine::Blob &input, InferenceEngine::BlobMap &result) {
- auto inputLayout = input.layout();
- if (inputLayout != Layout::NC && inputLayout != Layout::CN && inputLayout != NCHW) {
- THROW_GNA_EXCEPTION << "Expected input blob to have Layout::NC or Layout::CN, but was: " << input.layout();
- }
- if (inputLayout == NCHW) {
- inputLayout = NC;
- }
- auto is2D = input.layout() == Layout::NC || input.layout() == Layout ::CN;
-
+uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, InferenceEngine::BlobMap &result) {
auto freeNnet = std::find_if(std::begin(nnets), std::end(nnets), [](decltype(nnets.front()) & item) {
return std::get<1>(item) == -1;
});
if (freeNnet == nnets.end()) {
- THROW_IE_EXCEPTION << as_status << REQUEST_BUSY
- << "GNA executable network has max of " << static_cast<uint32_t >(gna_lib_async_threads_num)
- << " parallel infer requests, please sync one of already running";
+ if (memory_connection.size() != 0) {
+ Wait(0);
+ freeNnet = nnets.begin();
+ } else {
+ THROW_IE_EXCEPTION << as_status << REQUEST_BUSY
+ << "GNA executable network has max of "
+ << static_cast<uint32_t >(gna_lib_async_threads_num)
+ << " parallel infer requests, please sync one of already running";
+ }
}
+
auto nnet = std::get<0>(*freeNnet).get();
auto idx = static_cast<uint32_t>(std::distance(std::begin(nnets), freeNnet));
- if (ptr_inputs_global[idx] == nullptr) {
- // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
- THROW_GNA_EXCEPTION << "network not loaded : global input pointer not set";
- }
+ for (auto &input : inputs) {
+ auto inputLayout = input.second->layout();
+ if (inputLayout != Layout::NC && inputLayout != Layout::CN && inputLayout != NCHW) {
+ THROW_GNA_EXCEPTION << "Expected input blob to have Layout::NC or Layout::CN, but was: "
+ << input.second->layout();
+ }
+ if (inputLayout == NCHW) {
+ inputLayout = NC;
+ }
+ auto is2D = input.second->layout() == Layout::NC || input.second->layout() == Layout::CN;
- if (orientation_in == kDnnUnknownOrientation) {
- // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
- THROW_GNA_EXCEPTION << "network not loaded : input orientation not set";
- }
+ if (!ptr_inputs_global_id.count(input.first)) {
+ // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
+ THROW_GNA_EXCEPTION << "network not loaded : input pointer for " << input.first << " not set";
+ }
- if (orientation_out == kDnnUnknownOrientation) {
- // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
- THROW_GNA_EXCEPTION << "network not loaded : output orientation not set";
- }
+ if (get_ptr_inputs_global(input.first)[idx] == nullptr) {
+ // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
+ THROW_GNA_EXCEPTION << "network not loaded : input pointer for (" << input.first << " at inferRequest #"
+ << idx << " not set";
+ }
- ImportFrames(ptr_inputs_global[idx],
- input.cbuffer().as<float *>(),
- input.precision(),
- orientation_in,
- input.dims()[input.dims().size() - 1],
- is2D ? input.dims()[1] : input.dims()[input.dims().size() - 1],
- is2D ? input.dims()[0] :input.dims()[0]*input.dims()[2],
- is2D ? input.dims()[0] :input.dims()[0]*input.dims()[2]);
+ if (orientation_in[input.first] == kDnnUnknownOrientation) {
+ // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
+ THROW_GNA_EXCEPTION << "network not loaded : input orientation for " << input.first << " not set";
+ }
+
+ if (orientation_out == kDnnUnknownOrientation) {
+ // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
+ THROW_GNA_EXCEPTION << "network not loaded : output orientation not set";
+ }
- if ((inputLayout == Layout::NC || inputLayout == Layout::NCHW) != (orientation_in == kDnnInterleavedOrientation)) {
- RotateFeatures(reinterpret_cast<uint8_t*>(ptr_inputs_global[idx]),
- gnadevice ? 2 : 4,
- // TODO: only works for cnn4a and google command so far
- input.dims()[input.dims().size() - 1],
- is2D ? input.dims()[0] :input.dims()[0]*input.dims()[2], // num_feature_vectors looks batch should be there
- num_rotate_rows,
- num_rotate_columns);
+ auto dims = input.second->dims();
+
+ ImportFrames(get_ptr_inputs_global(input.first)[idx],
+ input.second->cbuffer().as<float *>(),
+ input.second->precision(),
+ orientation_in[input.first],
+ dims[dims.size() - 1],
+ is2D ? dims[1] : dims[dims.size() - 1],
+ is2D ? dims[0] : dims[0] * dims[1] * dims[2],
+ is2D ? dims[0] : dims[0] * dims[1] * dims[2]);
+ bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1;
+ if (((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
+ != (orientation_in[input.first] == kDnnInterleavedOrientation))
+ && !isOneChannel) {
+ RotateFeatures(reinterpret_cast<uint8_t *>(get_ptr_inputs_global(input.first)[idx]),
+ gnadevice ? 2 : 4,
+ // TODO: only works for cnn4a and google command so far
+ dims[dims.size() - 1],
+ is2D ? dims[0] : dims[0] * dims[2], // num_feature_vectors looks batch should be there
+ num_rotate_rows,
+ num_rotate_columns);
+ }
}
if (!gnadevice) {
}
std::get<1>(nnets[idx]) = -1;
- auto & output = *std::get<2>(nnets[idx]).begin()->second;
+ auto & result = std::get<2>(nnets[idx]);
#ifdef PLOT
dnn.BeginNewWrite();
if (dnn.num_components() != 0) {
}
dnn.WriteInputAndOutputTextGNA(&std::get<0>(nnets.front())->obj);
#endif
+ if (result.size() != 1) {
+ THROW_GNA_EXCEPTION << "Invalid number of outputs for infer request: " << result.size() << ", only 1 supported";
+ }
+ auto & output = *result.begin()->second;
if (output.layout() == Layout::NC) {
// TODO: rotate can be incorporated with exporting - used only in unit tests so far
// TODO: restore:
// if (orientation_out != kDnnInterleavedOrientation) {
+// if (inputs.size() != 1) {
+// THROW_GNA_EXCEPTION << "Invalid number of inputs for for deinterleave " << inputs.size()
+// << ", only 1 supported";
+// }
+// auto dims = inputs.begin()->second->dims();
// RotateFeatures(reinterpret_cast<uint8_t*>(ptr_outputs_global),
// gnadevice ? 2 : 4,
-// input.dims()[input.dims().size() - 1],
-// input.dims()[0], // num_feature_vectors looks batch should be there
-// input.dims()[0],
-// input.dims()[input.dims().size() - 1]);
+// dims[dims.size() - 1],
+// dims[0], // num_feature_vectors looks batch should be there
+// dims[0],
+// dims[dims.size() - 1]);
// }
+ // we concider the last layer as output ...
+ size_t output_layer_index = std::max(0, static_cast<int>(std::get<0>(nnets[idx])->obj.nLayers - 1));
+ if (gnadevice && std::get<0>(nnets[idx])->obj.pLayers[output_layer_index].pOutputs != ptr_outputs_global[idx]) {
+ // ...as this is not true, we should look for output layer index
+ for (int j = 0; j != std::get<0>(nnets[idx])->obj.nLayers; j++) {
+ if (std::get<0>(nnets[idx])->obj.pLayers[j].pOutputs == ptr_outputs_global[idx]) {
+ output_layer_index = j;
+ break;
+ }
+ }
+ }
ExportScores(output.buffer(),
ptr_outputs_global[idx],
output.dims()[0],
output.dims()[0],
// TODO: create better getter consider multiple outputs case
- gnadevice ? std::get<0>(nnets[idx])->obj.pLayers[std::get<0>(nnets[idx])->obj.nLayers - 1].nBytesPerOutput : sizeof(float),
+ gnadevice ? std::get<0>(nnets[idx])->obj.pLayers[output_layer_index].nBytesPerOutput : sizeof(float),
sizeof(float));
} else if (output.layout() != Layout::CN) {
THROW_GNA_EXCEPTION << "Expected output blob to have Layout::NC or Layout::CN. But was " << output.layout();
}
}
-
-void GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &output) {
- BlobMap result;
- result["output"] = std::shared_ptr<Blob>(&output, [](Blob*){});
- Wait(QueueInference(input, result));
-}
-
void GNAPlugin::Reset() {
for (auto && memLayer : memory_connection) {
std::memset(memLayer.second.gna_ptr, 0, memLayer.second.reserved_size);
}
}
-void GNAPlugin::Infer(const BlobMap &inputs, BlobMap &result) {
- auto &input = *inputs.begin()->second.get();
- auto &output = *result.begin()->second.get();
- Infer(input, output);
+void GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &output) {
+ BlobMap bmInput;
+ BlobMap bmOutput;
+ if (inputsDataMap.size() != 1) {
+ THROW_GNA_EXCEPTION << "cannot infer using Infer(Blob&, Blob&)"<< "model accepts " << inputsDataMap.size() << "inputs";
+ }
+ if (outputsDataMap.size() != 1) {
+ THROW_GNA_EXCEPTION << "cannot infer using Infer(Blob&, Blob&)"<< "model accepts " << outputsDataMap.size() << "outputs";
+ }
+
+ bmInput[inputsDataMap.begin()->first] = std::shared_ptr<Blob>(const_cast<Blob*>(&input), [](Blob*){});
+ bmOutput[outputsDataMap.begin()->first] = std::shared_ptr<Blob>(&output, [](Blob*){});
+ Infer(bmInput, bmOutput);
+}
+
+void GNAPlugin::Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result) {
+ Wait(QueueInference(input, result));
}
Blob::Ptr GNAPlugin::GetOutputBlob(InferenceEngine::Precision precision) {
return outputBlob;
}
-Blob::Ptr GNAPlugin::GetInputBlob(InferenceEngine::Precision precision) {
+Blob::Ptr GNAPlugin::GetInputBlob(std::string name, InferenceEngine::Precision precision) {
InferenceEngine::Blob::Ptr inputBlob;
// need to have intermediate blob for interleave conversion
// TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not
+ auto inputDims = inputsDataMap[name]->getDims();
inputBlob = make_blob_with_precision(precision, inputDims.size() == 2 ? NC : NCHW, inputDims);
inputBlob->allocate();
return inputBlob;
auto serial = GNAModelSerial(&std::get<0>(nnets.back())->obj, mt);
serial.Import(basePtr, header.gnaMemSize, inputStream);
- ptr_inputs_global.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.input.descriptor_offset));
+
+ get_ptr_inputs_global("input").push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.input.descriptor_offset));
ptr_outputs_global.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.output.descriptor_offset));
auto getOrientation = [](intel_nnet_layer_t & layer) {
kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
};
- orientation_in = getOrientation(std::get<0>(nnets.back())->obj.pLayers[0]);
+ orientation_in["input"] = getOrientation(std::get<0>(nnets.back())->obj.pLayers[0]);
orientation_out = getOrientation(std::get<0>(nnets.back())->obj.pLayers[std::get<0>(nnets.back())->obj.nLayers-1]);
num_bytes_per_output = header.output.element_size;
outputDims = SizeVector({header.output.elements_count / header.nGroup, header.nGroup});
- inputDims = SizeVector({header.input.elements_count / header.nGroup, header.nGroup});
+ auto inputDims = SizeVector({header.input.elements_count / header.nGroup, header.nGroup});
inputsDataMap["input"] = std::make_shared<InputInfo>();
inputsDataMap["input"]->setInputData(make_shared<Data>("input",
Layout::NC);
output_scale_factor = header.output.scaleFactor;
- input_scale_factor = header.input.scaleFactor;
+ input_scale_factor["input"] = header.input.scaleFactor;
num_rotate_rows = header.nRotateRows;
num_rotate_columns = header.nRotateColumns;
}
void GNAPlugin::Export(const std::string &fileName) {
- if (ptr_inputs_global.empty() || ptr_outputs_global.empty()) {
+ if (ptr_inputs_global_id.empty() || ptr_outputs_global.empty()) {
THROW_GNA_EXCEPTION << " network not loaded";
}
+ if (ptr_inputs_global_id.size() != 1) {
+ THROW_GNA_EXCEPTION << " exporting network with multiple inputs not supported";
+ }
+
std::fstream outStream(fileName, ios_base::out | ios_base::binary);
// TODO: nnet group parameter looks only used in application - so can we move this line into load network.
+ auto inputDims = inputsDataMap.begin()->second->getDims();
if (inputDims.size() == 2) {
std::get<0>(nnets.front())->obj.nGroup = inputDims[1];
}
auto serial = GNAModelSerial(&std::get<0>(nnets.front())->obj,
- {input_scale_factor,
- ptr_inputs_global[0],
+ {get_input_scale_factor(),
+ ptr_inputs_global_storage.front()[0],
2,
static_cast<uint32_t>(InferenceEngine::details::product(inputsDataMap.begin()->second->getDims()))},
{output_scale_factor,
}
void GNAPlugin::AddExtension(InferenceEngine::IExtensionPtr extension) {}
-void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {}
+
+void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
+ std::vector<std::string> supportedConfigOptions = {
+ GNA_CONFIG_KEY(SCALE_FACTOR),
+ GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE),
+ GNA_CONFIG_KEY(DEVICE_MODE),
+ GNA_CONFIG_KEY(COMPACT_MODE),
+ CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
+ GNA_CONFIG_KEY(PRECISION),
+ GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN),
+ CONFIG_KEY(PERF_COUNT),
+ GNA_CONFIG_KEY(LIB_N_THREADS),
+ CONFIG_KEY(SINGLE_THREAD)
+ };
+
+ for (auto& item : config) {
+ auto keys = std::find_if(supportedConfigOptions.begin(), supportedConfigOptions.end(), [&item](std::string supportedConfigOption) {
+ return item.first.find(supportedConfigOption) != std::string::npos;
+ });
+ if (keys == supportedConfigOptions.end()) {
+ THROW_GNA_EXCEPTION << as_status << NOT_FOUND << "Incorrect GNA Plugin config. Key " << item.first << " not supported";
+ }
+ }
+
+ // holds actual value of a found key
+ std::string key;
+ std::string value;
+ auto if_set = [&](std::string keyInput, const std::function<void()> & handler) {
+ auto keyInMap = config.find(keyInput);
+ if (keyInMap != config.end()) {
+ value = keyInMap->second;
+ handler();
+ }
+ };
+
+ auto if_start = [&](std::string keyInput, const std::function<void()> & handler) {
+ for (auto && c : config) {
+ if (c.first.find(keyInput) == 0) {
+ if (c.first.size() > keyInput.size() + 1) {
+ key = c.first.substr(keyInput.size() + 1);
+ value = c.second;
+ handler();
+ }
+ }
+ }
+ };
+
+ auto fp32eq = [](float p1, float p2) -> bool {
+ return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+ };
+
+ auto & log = gnalog();
+
+ if_start(GNA_CONFIG_KEY(SCALE_FACTOR), [&, this] {
+ // only identical scale factors supported so far
+ auto ref = input_scale_factor.size() ? input_scale_factor.begin()->second : 1.0;
+ input_scale_factor[key] = std::stod(value);
+ if (ref != 1.0 && !fp32eq(input_scale_factor[key], ref)) {
+ std::string message = "only identical input scale factors supported, but provided: "
+ + std::to_string(ref) + " and " + std::to_string(input_scale_factor[key]);
+ log << "only identical input scale factors supported, but provided: " << ref <<" and " << input_scale_factor[key];
+ THROW_GNA_EXCEPTION << "only identical input scale factors supported, but provided: " << ref <<" and " << input_scale_factor[key];
+ }
+ });
+
+ if (input_scale_factor.empty()) {
+ if_set(GNA_CONFIG_KEY(SCALE_FACTOR), [&] {
+ input_scale_factor["placeHolder"] = std::stod(value);
+ });
+ }
+
+ if_set(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), [&] {
+ dumpXNNPath = value;
+ });
+
+ if_set(GNA_CONFIG_KEY(DEVICE_MODE), [&] {
+ static caseless_unordered_map <std::string, uint32_t> supported_values = {
+ {GNAConfigParams::GNA_AUTO, GNA_AUTO},
+ {GNAConfigParams::GNA_HW, GNA_HARDWARE},
+ {GNAConfigParams::GNA_SW, GNA_SOFTWARE},
+ {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
+ };
+ auto procType = supported_values.find(value);
+ if (procType == supported_values.end()) {
+ log << "GNA device mode unsupported: " << value;
+ THROW_GNA_EXCEPTION << "GNA device mode unsupported: " << value;
+ }
+ gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
+ });
+
+ if_set(GNA_CONFIG_KEY(COMPACT_MODE), [&] {
+ if (value == PluginConfigParams::YES) {
+ compact_mode = true;
+ } else if (value == PluginConfigParams::NO) {
+ compact_mode = false;
+ } else {
+ log << "GNA compact mode should be YES/NO, but not" << value;
+ THROW_GNA_EXCEPTION << "GNA compact mode should be YES/NO, but not" << value;
+ }
+ });
+
+ if_set(CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), [&] {
+ if (value == PluginConfigParams::YES) {
+ exclusive_async_requests = true;
+ } else if (value == PluginConfigParams::NO) {
+ exclusive_async_requests = false;
+ } else {
+ log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+ THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+ }
+ });
+
+ if_set(GNA_CONFIG_KEY(PRECISION), [&] {
+ auto precision = Precision::FromStr(value);
+ if (precision != Precision::I8 && precision != Precision::I16) {
+ log << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
+ THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
+ }
+ gnaPrecision = precision;
+ });
+
+ if_set(GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), [&] {
+ if (value == PluginConfigParams::YES) {
+ uniformPwlDesign = true;
+ } else if (value == PluginConfigParams::NO) {
+ uniformPwlDesign = false;
+ } else {
+ log << "GNA pwl uniform algorithm parameter "
+ << "should be equal to YES/NO, but not" << value;
+ THROW_GNA_EXCEPTION << "GNA pwl uniform algorithm parameter "
+ << "should be equal to YES/NO, but not" << value;
+ }
+ });
+
+ if_set(CONFIG_KEY(PERF_COUNT), [&] {
+ if (value == PluginConfigParams::YES) {
+ performance_counting = true;
+ } else if (value == PluginConfigParams::NO) {
+ performance_counting = false;
+ } else {
+ log << "GNA performance counter enabling parameter "
+ << "should be equal to YES/NO, but not" << value;
+ THROW_GNA_EXCEPTION << "GNA performance counter enabling parameter "
+ << "should be equal to YES/NO, but not" << value;
+ }
+ });
+
+ if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
+ uint64_t lib_threads = std::stoul(value, NULL, 10);
+ if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {
+ log << "Unsupported accelerator lib number of threads: " << value << ", should be greateer than 0 and less than 127";
+ THROW_GNA_EXCEPTION << "Unsupported accelerator lib number of threads: " << value
+ << ", should be greateer than 0 and less than 127";
+ }
+ gna_lib_async_threads_num = lib_threads;
+ });
+
+ if_set(CONFIG_KEY(SINGLE_THREAD), [&] {
+ if (value == PluginConfigParams::YES) {
+ gna_openmp_multithreading = false;
+ } else if (value == PluginConfigParams::NO) {
+ gna_openmp_multithreading = true;
+ } else {
+ log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+ THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+ }
+ });
+}
+
+/**
+ * @depricated Use the version with config parameter
+ */
+void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
+ InferenceEngine::QueryNetworkResult& res) const {
+ QueryNetwork(network, {}, res);
+}
+
+void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
+ const std::map<std::string, std::string>& config,
+ InferenceEngine::QueryNetworkResult& res) const {
+ std::unordered_set<CNNLayer *> allLayers;
+ InferenceEngine::InputsDataMap inputs;
+
+ network.getInputsInfo(inputs);
+ std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
+
+ if (inputs.empty()) {
+ THROW_GNA_EXCEPTION << "Network is empty (GNA)\n";
+ }
+
+ auto const & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
+ if (secondLayers.empty()) {
+ THROW_GNA_EXCEPTION << "Network consists of input layer only (GNA)\n";
+ }
+
+ InferenceEngine::details::UnorderedDFS(allLayers,
+ secondLayers.begin()->second,
+ [&](CNNLayerPtr const layer) {
+ if (GNAPluginNS::GNAPlugin::LayerTypeFromStr(layer->type) != NO_TYPE) {
+ res.supportedLayers.insert(layer->name);
+ }
+ }, false);
+ }
intel_dnn_component_t * GNAPlugin::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
if (current->insData.empty()) return nullptr;
} else {
IE_ASSERT(nextMemoryLayer.reserved_size == ALIGN64(num_data_bytes_out));
// same offsets
- gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_offset);
+ gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
}
return;
}
});
if (included == concat_connection.end()) {
gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size));
+
+ for (auto && inputLayer : concatLayerInfoItem.concatInputLayers) {
+ if ( InferenceEngine::details::CaselessEq<std::string>()
+ (inputLayer.name, "input") ) {
+ bytes_alllocated_for_input[inputLayer.name] = ALIGN64(concatLayerInfoItem.reserved_size) - inputLayer.offset;
+ }
+ }
}
concatLayerInfo->second.output_allocation_flag = true;
}
return nullptr;
}
-GNAPlugin::ConnectionDetails GNAPlugin::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, size_t offset, int idx) {
+std::vector<void *>& GNAPlugin::get_ptr_inputs_global(std::string name) {
+ if (!ptr_inputs_global_id.count(name)) {
+ ptr_inputs_global_storage.push_front({});
+ ptr_inputs_global_id[name] = ptr_inputs_global_storage.begin();
+ }
+ return *ptr_inputs_global_id[name];
+}
+
+GNAPlugin::ConnectionDetails GNAPlugin::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx) {
// selecting particular input layers
auto prevLayer = CNNNetPrevLayer(layer, idx);
// real input not a memory input
if (LayerInfo(prevLayer).isInput()) {
- if (0 == bytes_alllocated_for_input) {
- gnamem->push_value(&ptr_inputs_global.front(), static_cast<uint8_t>(0), num_data_bytes_in, 64);
- bytes_alllocated_for_input = num_data_bytes_in;
+ if (0 == bytes_alllocated_for_input[prevLayer->name]) {
+ gnamem->push_value(&get_ptr_inputs_global(prevLayer->name).front(), static_cast<uint8_t>(0), num_data_bytes_in, 64);
+ bytes_alllocated_for_input[prevLayer->name] = num_data_bytes_in;
}
- if (ALIGN(num_data_bytes_in, 64) > ALIGN(bytes_alllocated_for_input, 64)) {
- THROW_IE_EXCEPTION << "Layer: " << layer->name << " Cannot bind pointer to already allocated input, due to size_allocated="
- << bytes_alllocated_for_input << ", and size_requested=" << num_data_bytes_in;
+ if (ALIGN(num_data_bytes_in, 64) > ALIGN(bytes_alllocated_for_input[prevLayer->name], 64)) {
+ THROW_GNA_EXCEPTION
+ << "Layer: " << layer->name
+ << " Cannot bind pointer to already allocated input(" << prevLayer->name
+ << "), due to size_allocated=" << bytes_alllocated_for_input[prevLayer->name]
+ << ", and size_requested=" << num_data_bytes_in;
}
- gnamem->bind_ptr(ptr, &ptr_inputs_global.front(), offset);
+
+ if (offset >= 0) {
+ gnamem->bind_ptr(ptr, &get_ptr_inputs_global(prevLayer->name).front(), offset);
+ } else {
+ gnamem->bind_ptr(&get_ptr_inputs_global(prevLayer->name).front(), ptr, -offset);
+ }
+
return prevLayer;
}
prevLayer->name);
if (concatLayerInfo != concat_connection.end()) {
auto & concatLayerInfoItem = concatLayerInfo->second;
- // dnnLayer that is input for concat output layer
+ // dnnLayer that is input for concat layer
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset);
// return layer over concat
return CNNNetPrevLayer(prevLayer);