num_active_outputs_ = 0;
num_left_context = 0;
num_right_context = 0;
- do_rotate_input = false;
softmax_type = kSoftmaxNone;
ptr_sumgroup_sizes = nullptr;
num_sumgroup_sizes = 0;
ptr_inputs = &comp.ptr_inputs;
ptr_outputs = &comp.ptr_outputs;
}
+
+ if (comp.num_rows_in * comp.num_columns_in % 8 != 0) {
+ THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent is not multiply by 8";
+ }
+ auto filter_stride_size = comp.op.conv1D.num_feature_maps * comp.op.conv1D.num_feature_map_columns;
+ auto max_number_of_out_elements = (comp.num_columns_in - comp.op.conv1D.num_filter_coefficients) / filter_stride_size + 1;
+ if (comp.num_columns_out / max_number_of_out_elements != comp.op.conv1D.num_filters) {
+ THROW_GNA_EXCEPTION << "Number of outputs or feature map config is incorrect in Convolutional1DComponent";
+ }
}
void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp,
std::vector<intel_dnn_component_t> component;
uint32_t num_left_context;
uint32_t num_right_context;
+ uint32_t new_num_conv_columns = 0;
bool do_rotate_input;
uint32_t num_rotate_rows = 0;
uint32_t num_rotate_columns = 0;
}
+/**
+ * Create AMIntelDNN Convolutional1DComponent from ConvolutionLayer
+ *
+ * GNA Convolution input is NHCW and output is transposed to NHWC
+ *
+ * OpenVINO default layout is NCHW
+ * TensorFlow default layout is NHWC
+ *
+ * There is option in ModelOptimizer
+ * --disable_nhwc_to_nchw
+ * Disables default translation from NHWC to NCHW
+ * By default MO converts TensorFlow default NHWC to OpenVino default NCHW
+ * So when MR was created with this option layout will be NHWC
+ *
+ * @param layer Pointer to ConvolutionLayer
+ */
void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& convolution = dynamic_cast<ConvolutionLayer&>(*layer.get());
- auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
IE_ASSERT(!layer->insData.empty());
IE_ASSERT(!layer->outData.empty());
+ printConvolutionLayer(convolution);
+
auto inputs = layer->insData.begin()->lock();
- auto outputs = *layer->outData.begin();
+ auto outputs = layer->outData.front();
+ if (inputs->getLayout() != Layout::NHWC &&
+ inputs->getLayout() != Layout::NCHW &&
+ inputs->getLayout() != Layout::NC) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "with layout " << inputs->getLayout() << " isn't currently supported on GNA";
+ }
+ if (inputs->getLayout() != outputs->getLayout()) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "I/O layout mismatch: " << inputs->getLayout() << " vs " << outputs->getLayout();
+ }
- uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
- uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
- uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
- uint32_t w_dim_out = FROM_IR_DIM(outputs, 1);
- uint32_t h_dim_out = FROM_IR_DIM(outputs, 2);
+ auto in_order = getFromIRDimsOrderNCHW(inputs->getLayout());
+ auto in_batch = FROM_IR_DIM(inputs, in_order[0]);
+ auto in_channels = FROM_IR_DIM(inputs, in_order[1]);
+ auto in_height = FROM_IR_DIM(inputs, in_order[2]);
+ auto in_width = FROM_IR_DIM(inputs, in_order[3]);
- if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case
- swap(h_dim_in, w_dim_in);
- swap(h_dim_out, w_dim_out);
- swap(convolution._kernel_x, convolution._kernel_y);
- swap(convolution._stride_x, convolution._stride_y);
+ auto out_order = getFromIRDimsOrderNCHW(outputs->getLayout());
+ auto out_batch = FROM_IR_DIM(outputs, out_order[0]);
+ auto out_channels = FROM_IR_DIM(outputs, out_order[1]);
+ auto out_height = FROM_IR_DIM(outputs, out_order[2]);
+ auto out_width = FROM_IR_DIM(outputs, out_order[3]);
+
+ if (in_batch != 1 || out_batch != 1) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "with batch size not equals 1 is not supported";
+ }
+ if (convolution._kernel_x != 1 && convolution._kernel_y != 1 && convolution._kernel_y != in_channels) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "with 2D kernel is not supported on GNA";
+ }
+ if ((in_channels > 1) && (in_height > 1) && (in_width > 1)) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "with 3D input is not supported on GNA";
+ }
+ if (convolution._dilation_x != 1 || convolution._dilation_y != 1) {
+ // TODO: Issue 24839
+ THROW_GNA_LAYER_EXCEPTION(layer) << "with dilation is not supported on GNA";
+ }
+ if (inputs->getLayout() != Layout::NHWC && in_height != 1) {
+ // TensorFlow default layout is NHWC
+ // OpenVino Default layout is NCHW
+ // GNA Convolution input is NHCW
+ // When layer layout is in NHWC it means that is was created by PassManager
+ THROW_GNA_LAYER_EXCEPTION(layer) << "in_height != 1 This case requires additional Permute and it is not implemented yet";
+ }
+ if (convolution._kernel_x > in_width * in_height) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "Kernel dimensions are bigger than input dimensions. "
+ << convolution._kernel_x << " vs " << in_width * in_height;
}
- uint32_t num_feature_map_rows = w_dim_in / convolution._stride_x;
- uint32_t num_feature_map_columns = c_dim_in * convolution._stride_x;
+ if (out_channels != convolution._out_depth) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "Output channels do not equal output depth. "
+ << out_channels << " vs " << convolution._out_depth;
+ }
+ std::size_t calculated_out_width = (in_width * in_height - convolution._kernel_x + 2 * convolution._padding_x) / convolution._stride_x + 1;
+ if (out_width * in_height != calculated_out_width) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "Invalid output configuration. "
+ << calculated_out_width << " != " << out_width * in_height;
+ }
- uint32_t num_columns_in = c_dim_in;
- uint32_t num_rows_out = w_dim_out;
+ if (dnn->new_num_conv_columns) {
+ dnn->new_num_conv_columns = 0;
+ }
+
+ uint32_t total_conv_kernel_size = convolution._kernel_x * convolution._kernel_y * convolution._out_depth;
+ uint32_t single_conv_kernel_size = convolution._kernel_x * convolution._kernel_y;
+ if (convolution._kernel_y != in_channels) { // work around the strange special case where 1D kernel gets rewritten as 2D kernel
+ total_conv_kernel_size *= in_channels;
+ single_conv_kernel_size *= in_channels;
+ }
+ auto actual_kernel_size = details::product(convolution._weights->getTensorDesc().getDims());
+ if (total_conv_kernel_size != actual_kernel_size) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "Weights size does not equal kernel size "
+ << actual_kernel_size << " vs " << total_conv_kernel_size;
+ }
// padding of convolution kernel to be multiply of 8
- uint32_t num_conv_kernel_padding = ALIGN(convolution._kernel_x * num_feature_map_columns, 8)
- - convolution._kernel_x * num_feature_map_columns;
+ uint32_t num_conv_kernel_padding = ALIGN(single_conv_kernel_size, 8) - single_conv_kernel_size;
+ if (num_conv_kernel_padding == 0) {
+ gnalog() << LAYER_NAME(layer) << "Kernel is aligned \n";
+ } else {
+ gnalog() << LAYER_NAME(layer) << "Kernel padding is " << num_conv_kernel_padding << "\n";
+ }
+
+ // have to pad input to let last kernel meets it's corresponding input
+ uint32_t num_inputs = in_width * in_height * in_channels;
+ uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
+
+ // convert to 2D and set GNA input feature map size
+ uint32_t num_feature_map_columns = in_channels * convolution._stride_x * convolution._stride_y;
+ if (in_height == 1 && convolution._stride_y != 1) {
+ num_feature_map_columns = in_channels * convolution._stride_x;
+ } else if (in_width == 1 && convolution._stride_x != 1) {
+ num_feature_map_columns = in_channels * convolution._stride_y;
+ }
+ uint32_t num_feature_map_rows = (in_channels * in_height * in_width) / num_feature_map_columns;
+
+ uint32_t num_filters = convolution._out_depth;
+ uint32_t num_filter_coefficients = single_conv_kernel_size + num_conv_kernel_padding;
+ uint32_t num_filter_rows = num_filter_coefficients / num_feature_map_columns;
+ uint32_t num_columns_in = num_inputs + num_input_padding;
+
+ uint32_t num_columns_out = (((num_inputs + num_input_padding - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
+ uint32_t num_columns_out_unpadded = (((num_inputs - single_conv_kernel_size) / num_feature_map_columns) + 1) * convolution._out_depth;
+
+ // if kernel padding to multiple of 8 will cause missed outputs, need to pad further
+ while (num_columns_out < out_batch * out_channels * out_height * out_width) {
+ num_input_padding += 8;
+ num_columns_in = num_inputs + num_input_padding;
+ num_columns_out = (((num_inputs + num_input_padding - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
+ dnn->new_num_conv_columns = num_columns_out / convolution._out_depth;
+ }
+
+ if (num_input_padding == 0) {
+ gnalog() << LAYER_NAME(layer) << "Inputs are aligned \n";
+ } else {
+ gnalog() << LAYER_NAME(layer) << "Inputs padding is " << num_input_padding << "\n";
+ }
+
+ if (num_columns_out_unpadded != out_batch * out_channels * out_height * out_width) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "Number of output columns does not equal output tensor size "
+ << num_columns_out_unpadded << " vs " << out_batch * out_channels * out_height * out_width;
+ }
+
void* ptr_inputs = nullptr;
void* ptr_outputs = nullptr;
void* ptr_weights = nullptr;
// TODO: questionable why for biases that are not in IR we inventing precision
auto biasPrecision = convolution._biases ? convolution._biases->getTensorDesc().getPrecision() : outputs->getPrecision();
- auto& currentComponent = dnnComponents.addComponent(layer->name, "convolution");
+ uint32_t num_bytes_per_input = inputs->getPrecision().size();
+ uint32_t num_bytes_per_output = outputs->getPrecision().size();
+ uint32_t num_bytes_per_weight = convolution._weights->getTensorDesc().getPrecision().size();
+ uint32_t num_bytes_per_bias = biasPrecision.size();
- // have to pad input to let last kernel meets it's corresponding input
- auto num_inputs = num_feature_map_columns * num_feature_map_rows + num_conv_kernel_padding;
- auto num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
- auto num_filter_rows = convolution._kernel_x / convolution._stride_x;
+ float weight_scale_factor = 1.0f;
+ float output_scale_factor = 1.0f;
+ auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+ if (quantized != nullptr) {
+ weight_scale_factor = quantized->_weights_quant.scale;
+ output_scale_factor = quantized->_dst_quant.scale;
+ }
+
+ auto& currentComponent = dnnComponents.addComponent(layer->name, "convolution");
dnn->InitConvolutional1DComponent(currentComponent,
1,
- num_inputs + num_input_padding,
+ num_columns_in,
1,
- num_rows_out * convolution._out_depth,
- inputs->getPrecision().size(),
- outputs->getPrecision().size(),
- convolution._weights->getTensorDesc().getPrecision().size(),
- biasPrecision.size(),
- convolution._out_depth,
+ num_columns_out,
+ num_bytes_per_input,
+ num_bytes_per_output,
+ num_bytes_per_weight,
+ num_bytes_per_bias,
+ num_filters,
num_filter_rows,
- num_feature_map_columns * num_filter_rows + num_conv_kernel_padding,
+ num_filter_coefficients,
1,
num_feature_map_rows,
num_feature_map_columns,
- quantized == nullptr ? 1 : quantized->_weights_quant.scale,
- quantized == nullptr ? 1 : quantized->_dst_quant.scale,
+ weight_scale_factor,
+ output_scale_factor,
ptr_inputs,
ptr_outputs,
ptr_weights,
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
- if (LayerInfo(connectedInputLayer).isInput()) {
- // Kaldi features are opposite orientation
- dnn->num_rotate_rows = num_feature_map_columns;
- dnn->num_rotate_columns = num_feature_map_rows;
+ if (!dnn->do_rotate_input) {
+ if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
+ // Kaldi features are opposite orientation
+ dnn->do_rotate_input = true;
+ dnn->num_rotate_rows = num_feature_map_columns;
+ dnn->num_rotate_columns = num_feature_map_rows;
+ } else {
+ dnn->do_rotate_input = false;
+ }
}
connectOutput(layer, ptr_outputs, num_data_bytes_out);
- // rotate
- auto TransposeMatrix = [](uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols) {
- std::vector<uint8_t> temp_buffer(num_rows * num_cols * element_size);
- for (uint32_t i = 0; i < num_rows; i++) {
- for (uint32_t j = 0; j < num_cols; j++) {
- ie_memcpy(&temp_buffer.front() + (j * num_rows + i) * element_size,
- temp_buffer.size() - (i * num_cols + j) * element_size,
- ptr_matrix + (i * num_cols + j) * element_size,
- element_size);
- }
- }
- return temp_buffer;
- };
-
- std::vector<uint8_t > transposedWeights;
+ std::vector<uint8_t> transposedWeights;
for (uint32_t k = 0; k < convolution._out_depth; k++) {
- uint8_t* ptr_filt_current
- = convolution._weights->cbuffer().as<uint8_t*>() + k * num_columns_in * convolution._kernel[X_AXIS] * convolution.precision.size();
- auto transposedPart = TransposeMatrix(ptr_filt_current, convolution.precision.size(), num_columns_in, convolution._kernel[X_AXIS]);
+ uint8_t * ptr_filt_current
+ = convolution._weights->cbuffer().as<uint8_t*>() +
+ k * in_channels * convolution._kernel[X_AXIS] * convolution.precision.size();
+ auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), in_channels, convolution._kernel[X_AXIS]);
transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
}
+ if (transposedWeights.size() != convolution._weights->byteSize()) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "weights was transposed incorrectly. "
+ << transposedWeights.size() << ' '
+ << convolution._weights->byteSize();
+ }
if (num_conv_kernel_padding == 0) {
- gnamem->readonly().push_local_ptr(ptr_weights, transposedWeights.data(), convolution._weights->byteSize(), 64);
+ gnamem->readonly().push_local_ptr(ptr_weights,
+ transposedWeights.data(),
+ convolution._weights->byteSize(),
+ 64);
} else {
- auto elementsIn = convolution._kernel_x * num_feature_map_columns + num_conv_kernel_padding;
- auto paddedWeights = elementsIn * convolution._out_depth;
+ auto paddedWeights = (single_conv_kernel_size + num_conv_kernel_padding) * convolution._out_depth;
auto paddedWeightsSize = paddedWeights * convolution.precision.size();
- auto elements_in_row = convolution._kernel_x * num_feature_map_columns;
- gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
- size_t offset = 0;
- for (int i = 0; i < convolution._out_depth && size >= offset; i++) {
- ie_memcpy(reinterpret_cast<uint8_t*>(data) + offset, size - offset,
- transposedWeights.data() + elements_in_row * i * convolution.precision.size(),
- elements_in_row* convolution.precision.size());
-
- offset += elementsIn * convolution.precision.size();
+ auto initializer = [=](void* data, std::size_t size) {
+ if (paddedWeightsSize > size) {
+ THROW_GNA_LAYER_EXCEPTION(layer) << "size is less than paddedWeightsSize";
}
- }, 64);
+ std::size_t offset = 0;
+ std::vector<uint8_t> padding_zeros(num_conv_kernel_padding * convolution.precision.size(), 0);
+ uint8_t* dstPtr = reinterpret_cast<uint8_t*>(data);
+ for (int i = 0; i < convolution._out_depth; i++) {
+ ie_memcpy(dstPtr + offset,
+ size - offset,
+ transposedWeights.data() + single_conv_kernel_size * i * convolution.precision.size(),
+ single_conv_kernel_size * convolution.precision.size());
+ offset += single_conv_kernel_size * convolution.precision.size();
+ ie_memcpy(dstPtr + offset,
+ size - offset,
+ &padding_zeros[0],
+ padding_zeros.size());
+ offset += padding_zeros.size();
+ }
+ };
+ gnamem->readonly().push_initializer(ptr_weights,
+ paddedWeightsSize,
+ initializer,
+ 64);
}
if (convolution._biases) {
convolution._biases->byteSize(),
64);
} else {
- gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
+ gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
}
}
num_rows = FROM_IR_DIM(inputs, 1);
}
+ if (dnn->new_num_conv_columns) {
+ num_rows = dnn->new_num_conv_columns;
+ dnn->new_num_conv_columns = 0;
+ }
+
// TODO: solve this by layer level transformations
auto concatAlignFilter = CNNNetPrevLayer(layer, 0);
if (LayerInfo(concatAlignFilter).isConcatAlignFilter()) {
std::memset(concatLayer.second.gna_ptr, 0, concatLayer.second.reserved_size);
}
}
+
+void GNAGraphCompiler::printTensorDesc(const std::string& name, const InferenceEngine::TensorDesc& desc) {
+ gnalog() << name << " layout: " << desc.getLayout() << " shape: ";
+ for (auto i = 0; i < desc.getDims().size(); i++) {
+ if (i > 0) {
+ gnalog() << 'x';
+ }
+ gnalog() << desc.getDims()[i];
+ }
+ gnalog() << "\n";
+}
+
+void GNAGraphCompiler::printConvolutionLayer(const InferenceEngine::ConvolutionLayer& layer) {
+ const char x = 'x';
+
+ gnalog() << "ConvolutionLayer '"
+ << layer.name
+ << "' Kernel: "
+ << layer._kernel_x << x << layer._kernel_y
+ << " Padding: "
+ << layer._padding_x << x << layer._padding_y
+ << " Stride: "
+ << layer._stride_x << x << layer._stride_y
+ << " Dilation: "
+ << layer._dilation_x << x << layer._dilation_y
+ << " Auto Padding: '"
+ << layer._auto_pad << "'";
+ gnalog() << "\n";
+ printTensorDesc("Input", layer.input()->getTensorDesc());
+ printTensorDesc("Output", layer.outData.front()->getTensorDesc());
+}
+
+std::vector<uint8_t>
+GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols) {
+ std::vector<uint8_t> temp_buffer(num_rows * num_cols * element_size);
+ for (uint32_t i = 0; i < num_rows; i++) {
+ for (uint32_t j = 0; j < num_cols; j++) {
+ ie_memcpy(&temp_buffer.front() + (j * num_rows + i) * element_size,
+ temp_buffer.size() - (i * num_cols + j) * element_size,
+ ptr_matrix + (i * num_cols + j) * element_size,
+ element_size);
+ }
+ }
+ return temp_buffer;
+}
+
+std::vector<std::size_t> GNAGraphCompiler::getFromIRDimsOrderNCHW(InferenceEngine::Layout layout) {
+ std::vector<std::size_t> order;
+ switch (layout) {
+ case Layout::NHWC:
+ order = { 4, 1, 3, 2 };
+ break;
+ case Layout::NCHW:
+ default:
+ order = { 4, 3, 2, 1 };
+ break;
+ }
+ return order;
+}
#include <vector>
#include "ie_layers.h"
+#include <ie_data.h>
+#include <ie_common.h>
#include "descriptions/gna_input_desc.hpp"
#include "descriptions/gna_flags.hpp"
#include "cpp_interfaces/base/ie_plugin_base.hpp"
intel_dnn_component_t * find_first_unused_input(InferenceEngine::CNNLayerPtr current);
+ static void printTensorDesc(const std::string& name, const InferenceEngine::TensorDesc& desc);
+ static void printConvolutionLayer(const InferenceEngine::ConvolutionLayer& layer);
+ std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
+ std::vector<std::size_t> static getFromIRDimsOrderNCHW(InferenceEngine::Layout layout);
+
public:
GNAPluginNS::backend::DnnComponents dnnComponents;
MemoryConnection memory_connection;
/**
* @brief remove given layer from topology, currently only layers with one input data and one output data supported
*/
-inline void CNNNetworkRemoveLayer(CNNLayerPtr layer) {
+inline void CNNNetworkRemoveLayer(CNNLayerPtr layer, bool checkDims = true) {
if (!layer) {
THROW_IE_EXCEPTION << "Cannot remove layer pointed to NULL";
}
}
// if dimensions of input layer not equal target dimensions - shape infer or reshape layer required, so skipping those cases
auto osp = layer->outData.front();
- if (isp->getDims() != osp->getDims()) {
+ if (checkDims && isp->getDims() != osp->getDims()) {
THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" its input layer("
<< isp->getName() << ") and output(" << osp->getName() << ") have incompatible dimensions";
}
header.nOutputs = outputs.size();
header.nRotateRows = nRotateRows;
header.nRotateColumns = nRotateColumns;
+ header.doRotateInput = doRotateInput;
writeBits(header, os);
*/
uint32_t nRotateRows = 0u;
uint32_t nRotateColumns = 0u;
+ bool doRotateInput = false;
uint32_t nInputs = 0u;
uint32_t nOutputs = 0u;
std::vector<RuntimeEndPoint> outputs;
uint32_t nRotateRows = 0;
uint32_t nRotateColumns = 0;
+ bool doRotateInput = false;
MemoryType states, *pstates = nullptr;
ModelHeader modelHeader;
}
#endif
- GNAModelSerial & SetInputRotation(uint32_t nRotateRows, uint32_t nRotateColumns) {
+ GNAModelSerial & SetInputRotation(uint32_t nRotateRows, uint32_t nRotateColumns, bool do_rotate_inputs) {
this->nRotateColumns = nRotateColumns;
this->nRotateRows = nRotateRows;
+ this->doRotateInput = do_rotate_inputs;
return *this;
}
if (policy.PermutePolicy != Policy::Permute::DISABLED) {
passes->registerPass<ReversePermutationsPass>();
}
+ if (policy.NHWCToNCHWPolicy != Policy::NHWCToNCHW::DISABLED) {
+ passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
+ }
passes->registerPass<InsertIdentityLayerPass>();
passes->registerPass<InsertCopyLayerPass>();
passes->registerPass<InsertDiagonalLayerPass>();
}
}
+ do_rotate_input = dnn->do_rotate_input;
num_rotate_rows = dnn->num_rotate_rows;
num_rotate_columns = dnn->num_rotate_columns;
is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3]);
bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1;
- if (((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
+ if (do_rotate_input && ((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
!= (inputsDesc->getOrientation(input.first) == kDnnInterleavedOrientation))
&& !isOneChannel) {
RotateFeatures(reinterpret_cast<uint8_t *>(inputsDesc->getPtrInputsGlobal(input.first)[idx]),
exportOutputDims[exportOutputDims.size() - 1],
outputDesc.num_bytes_per_element,
sizeof(float));
- } else if (outputBlob->getTensorDesc().getLayout() != Layout::CN) {
- THROW_GNA_EXCEPTION << "Expected output blob to have Layout::NC or Layout::CN. But was "
- << outputBlob->getTensorDesc().getLayout();
}
if (gnadevice) {
outputsDesc[0].orientation = getOrientation(std::get<0>(nnets.back())->obj.pLayers[std::get<0>(nnets.back())->obj.nLayers - 1]);
#endif
+ do_rotate_input = header.doRotateInput;
num_rotate_rows = header.nRotateRows;
num_rotate_columns = header.nRotateColumns;
outputsDesc,
inputsDataMap,
outputsDataMap)
- .SetInputRotation(dnn->num_rotate_rows, dnn->num_rotate_columns);
+ .SetInputRotation(dnn->num_rotate_rows, dnn->num_rotate_columns, dnn->do_rotate_input);
for (auto && memoryConnection : graphCompiler.memory_connection) {
serial.AddState(memoryConnection.second.gna_ptr, memoryConnection.second.reserved_size);
#if GNA_LIB_VER == 2
uint32_t activeLayerIndex = 0xffffffff;
#endif
+ bool do_rotate_input = false;
uint32_t num_rotate_rows = 0;
uint32_t num_rotate_columns = 0;
uint32_t *ptr_active_indices = nullptr;
ENABLED,
FAST
} ConcatAlignmentPolicy = ConcatAlignment::FAST;
+
+ /**
+ * Policy to support --disable_nhwc_to_nchw option in MO
+ */
+ enum class NHWCToNCHW {
+ DISABLED,
+ REMOVE_LAST,
+ REMOVE_ALL
+ } NHWCToNCHWPolicy = NHWCToNCHW::REMOVE_ALL;
};
inline std::ostream& operator<<(std::ostream& os, Policy::ScaleShift policy) {
}
}
+void RemovePermutationsNHWCToNCHWPass::run() {
+ std::list<CNNLayerPtr> permutationsToRemove;
+
+ for (auto& l : *pLayers) {
+ if (!LayerInfo(l).isConvolution()) {
+ continue;
+ }
+
+ if (getInputTo(l->outData.front()).empty()) {
+ continue;
+ }
+ auto next = getInputTo(l->outData.front()).begin()->second;
+ auto prev = CNNNetPrevLayer(l);
+
+ if (!LayerInfo(next).isPermute() || !LayerInfo(prev).isPermute()) {
+ continue;
+ }
+
+ if (getPassManager()->getPolicy().NHWCToNCHWPolicy == Policy::NHWCToNCHW::REMOVE_ALL) {
+ permutationsToRemove.push_back(prev);
+ }
+ permutationsToRemove.push_back(next);
+ }
+
+ for (auto&& toRemove : permutationsToRemove) {
+ gnalog() << toRemove->type << " layer '" << toRemove->name << "' will be removed" << '\n';
+
+ auto next = getInputTo(toRemove->outData.front()).begin()->second;
+ if (LayerInfo(next).isConvolution()) {
+ next->input()->setDims(toRemove->input()->getDims());
+ next->input()->setLayout(Layout::NHWC);
+ auto layerBeforePermute = CNNNetPrevLayer(toRemove);
+ layerBeforePermute->outData[0]->setLayout(Layout::NHWC);
+
+ auto& convolution = dynamic_cast<ConvolutionLayer&>(*next);
+ if (convolution._kernel_y != 1) {
+ THROW_GNA_LAYER_EXCEPTION(next) << "this case is not implemented yet";
+ }
+ auto in_channels = next->input()->getDims()[3];
+ convolution._kernel_y = in_channels;
+ }
+ auto prev = CNNNetPrevLayer(toRemove);
+ if (LayerInfo(prev).isConvolution()) {
+ prev->outData[0]->setDims(toRemove->outData[0]->getDims());
+ prev->outData[0]->setLayout(Layout::NHWC);
+ }
+ CNNNetworkRemoveLayer(toRemove, false);
+ }
+}
+
void InsertIdentityLayerPass::run() {
int numOfIdentityLayers = 0;
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
DECL_PASS(ReversePermutations);
/**
+ * @brief Pass support --disable_nhwc_to_nchw option in MO
+ * @param layers
+ */
+DECL_PASS(RemovePermutationsNHWCToNCHW);
+
+/**
* brief @search for specific patter in the graph (6 layers are replaced by single one)
*/
DECL_PASS(SubstitutePRelu);
num_inputs_band_stride = component->op.conv1D.num_feature_maps * component->op.conv1D.num_feature_map_columns;
uint32_t num_filter_coefficients = component->op.conv1D.num_filter_coefficients;
- if ((component->num_rows_in != 1) || (component->num_rows_out != 1)
- || (component->num_columns_out != num_filter_outputs * component->op.conv1D.num_filters)) {
- THROW_GNA_EXCEPTION << "Bad problem dimensions in CNNFilter32!";
+ std::string layer_name;
+#ifdef PLOT
+ layer_name = " In layer '" + std::string(component->original_layer_name) + "'";
+#endif
+ if (component->num_rows_in != 1 || component->num_rows_out != 1) {
+ THROW_GNA_EXCEPTION << "Bad number of rows in CNNFilter32!" << layer_name;
+ }
+ if (component->num_columns_out < num_filter_outputs * component->op.conv1D.num_filters) {
+ THROW_GNA_EXCEPTION << "Bad num_columns_out in CNNFilter32!" << layer_name;
}
for (uint32_t j = 0; j < num_filter_outputs; j++) {
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+ InferenceEngine::Precision, // Network Precision
+ std::string, // Target Device
+ std::map<std::string, std::string> //Configuration
+> removePermutationsPassParams;
+
+namespace LayerTestsDefinitions {
+
+class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<removePermutationsPassParams>,
+ public LayerTestsUtils::LayerTestsCommon {
+ public:
+ static std::string getTestCaseName(testing::TestParamInfo<removePermutationsPassParams> obj) {
+ InferenceEngine::Precision netPrecision;
+ std::string targetDevice;
+ std::map<std::string, std::string> configuration;
+ std::tie(netPrecision, targetDevice, configuration) = obj.param;
+
+ std::ostringstream result;
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice << "_";
+ for (auto const& configItem : configuration) {
+ result << "_configItem=" << configItem.first << "_" << configItem.second;
+ }
+ return result.str();
+ }
+
+ protected:
+ void SetUp() override {
+ // Reshape ([1, 336] -> [1, 1, 168, 2])
+ // |
+ // Permute (order: [0, 3, 1, 2])
+ // |
+ // Convolution (weights: [2, 12, 1, 8])
+ // |
+ // Permute (order: [0, 2, 3, 1])
+ // |
+ // Reshape ([1, 1, 161, 12] -> [1, 1932])
+ InferenceEngine::Precision netPrecision;
+ std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+ auto params = ngraph::builder::makeParams(ngPrc, { {1, 336} });
+
+ std::vector<size_t> outFormShapes1 = { 1, 1, 168, 2 };
+ auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1);
+ auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
+
+ auto permute1 = std::make_shared<ngraph::opset1::Transpose>(reshape1,
+ ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
+ permute1->set_friendly_name("permute1");
+
+ auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, 8 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
+ ngraph::op::PadType::VALID, 12);
+
+ auto permute2 = std::make_shared<ngraph::opset1::Transpose>(conv1,
+ ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
+ permute2->set_friendly_name("permute2");
+
+ std::vector<size_t> outFormShapes2 = { 1, 1932 };
+ auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes2);
+ auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
+
+ ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape2) };
+ function = std::make_shared<ngraph::Function>(results, params, "RemovePermutationPass");
+ }
+};
+
+ TEST_P(RemovePermutationsNHWCToNCHWPassTest, CompareWithRefImpl) {
+ Run();
+ };
+
+ const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+ };
+
+ const std::vector<std::map<std::string, std::string>> configs = {
+ {
+ {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+ {"GNA_SCALE_FACTOR_0", "1638.4"}
+ }
+ };
+
+ INSTANTIATE_TEST_CASE_P(PermutationPass, RemovePermutationsNHWCToNCHWPassTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_GNA),
+ ::testing::ValuesIn(configs)),
+ RemovePermutationsNHWCToNCHWPassTest::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
+
// Executable Network GetMetric
//
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS,
+ DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS,
::testing::Values("GNA" /*, "MULTI:GNA", "HETERO:GNA" */));
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS,
+ DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS,
::testing::Values("GNA" /*, "MULTI:GNA", "HETERO:GNA" */));
// TODO: this metric is not supported by the plugin
DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_NETWORK_NAME,
::testing::Values("GNA", "MULTI:GNA", "HETERO:GNA"));
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
+ DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
::testing::Values("GNA"/*, "MULTI:GNA", "HETERO:GNA" */));
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported,
+ DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported,
::testing::Values("GNA", /* "MULTI:GNA", */ "HETERO:GNA"));
//
// Executable Network GetConfig / SetConfig
//
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkGetConfigTest, IEClassExecutableNetworkGetConfigTest,
+ DISABLED_IEClassExecutableNetworkGetConfigTest, IEClassExecutableNetworkGetConfigTest,
::testing::Values("GNA"));
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkSetConfigTest, IEClassExecutableNetworkSetConfigTest,
+ DISABLED_IEClassExecutableNetworkSetConfigTest, IEClassExecutableNetworkSetConfigTest,
::testing::Values("GNA"));
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkSupportedConfigTest, IEClassExecutableNetworkSupportedConfigTest,
+ DISABLED_IEClassExecutableNetworkSupportedConfigTest, IEClassExecutableNetworkSupportedConfigTest,
::testing::Combine(::testing::Values("GNA"),
::testing::Values(std::make_pair(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_HW),
std::make_pair(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW),
std::make_pair(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT),
std::make_pair(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_AUTO))));
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkUnsupportedConfigTest, IEClassExecutableNetworkUnsupportedConfigTest,
+ DISABLED_IEClassExecutableNetworkUnsupportedConfigTest, IEClassExecutableNetworkUnsupportedConfigTest,
::testing::Combine(::testing::Values("GNA"),
::testing::Values(std::make_pair(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_FP32),
std::make_pair(GNA_CONFIG_KEY(SCALE_FACTOR), "5"),
ASSERT_THROW(exeNetwork.SetConfig({{configKey, configValue}}), InferenceEngineException);
}
+// TODO: Convolution with 3D input is not supported on GNA
INSTANTIATE_TEST_CASE_P(
- IEClassExecutableNetworkSetConfigFromFp32Test, IEClassExecutableNetworkSetConfigFromFp32Test,
+ DISABLED_IEClassExecutableNetworkSetConfigFromFp32Test, IEClassExecutableNetworkSetConfigFromFp32Test,
::testing::Combine(::testing::Values("GNA"),
::testing::Values(std::make_pair(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_HW),
std::make_pair(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW),
INSTANTIATE_TEST_CASE_P(
IEClassHeteroExecutableNetworlGetMetricTest, IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK,
::testing::Values("GNA"));
-} // namespace
\ No newline at end of file
+} // namespace