From f0b10bf07110d633c66ac186e0bfdb57bc6dbd0e Mon Sep 17 00:00:00 2001 From: Eugene Smirnov Date: Mon, 21 Sep 2020 14:22:14 +0300 Subject: [PATCH] [GNA] fake quantize single layer tests for GNA plugin (#2060) * fake quantize single layer test for GNA plugin * implemented fakequantize for fp32 case as an activation function * added proper seed randomisation within single test run * [GNA] [FAKEQUANTIZE] fixed ref-fp32 implementation on GNA to use nearbyint instead of roundf * [GNA] [FAKEQUANTIZE] restored random seed * [GNA][FAKEQUANTIZE] disabled 4d and integer tests for FakeQuantize * [GNA][FAKEQUANTIZE]updated ngraph FakeQuantize builder to accept seed * [GNA][FAKEQUANTIZE]aligned FP calculations order on GNA with reference ngraph - this however gives more error * [CPU]build of FakeQuantise tests restored * [TESTS][FAKEQUANTIZE] ignore extra inferRequests for disabled tests * [GNA] Fixed legacy unit test failuers appeared due to extra check for possible segfault in import frames * [GNA] adopted fuse multiple identities for FakeQunatize layer * [GNA]fp32 runtime code review --- .../src/gna_plugin/backend/am_intel_dnn.cpp | 90 ++++------ .../src/gna_plugin/backend/am_intel_dnn.hpp | 2 - inference-engine/src/gna_plugin/backend/dnn.cpp | 173 ------------------- inference-engine/src/gna_plugin/backend/dnn.hpp | 15 -- .../src/gna_plugin/backend/dnn_components.cpp | 2 +- .../src/gna_plugin/backend/dnn_types.h | 15 +- .../src/gna_plugin/descriptions/gna_input_desc.cpp | 30 +++- .../src/gna_plugin/descriptions/gna_input_desc.hpp | 4 + .../src/gna_plugin/gna_graph_compiler.cpp | 73 +++++++- .../src/gna_plugin/gna_graph_compiler.hpp | 1 + .../src/gna_plugin/gna_graph_tools.hpp | 3 - .../src/gna_plugin/gna_model_serial.cpp | 1 + inference-engine/src/gna_plugin/gna_plugin.cpp | 25 ++- .../src/gna_plugin/layers/gna_layer_info.hpp | 7 + .../src/gna_plugin/layers/gna_layer_type.hpp | 4 +- .../src/gna_plugin/optimizer/gna_pass_manager.cpp | 12 +- inference-engine/src/gna_plugin/runtime/cnn.cpp | 2 - .../src/gna_plugin/runtime/gna_float_runtime.cpp | 88 ++++++++++ .../src/gna_plugin/runtime/gna_float_runtime.hpp | 41 +++++ .../gna_plugin/runtime/gna_float_runtime_op.cpp | 184 +++++++++++++++++++++ inference-engine/src/gna_plugin/runtime/pwl.cpp | 30 +++- .../single_layer_tests/fake_quantize.cpp | 12 +- .../single_layer_tests/fake_quantize.cpp | 68 ++++++++ .../include/single_layer_tests/fake_quantize.hpp | 23 ++- .../src/single_layer_tests/fake_quantize.cpp | 96 ++++++++++- .../ie_test_utils/common_test_utils/data_utils.hpp | 20 ++- .../functional_test_utils/blob_utils.hpp | 5 +- .../include/ngraph_functions/builders.hpp | 7 +- .../include/ngraph_functions/utils/data_utils.hpp | 26 ++- .../tests/ngraph_functions/src/fake_quantize.cpp | 23 +-- .../unit/engines/gna/gna_matcher.cpp | 3 +- 31 files changed, 753 insertions(+), 332 deletions(-) create mode 100644 inference-engine/src/gna_plugin/runtime/gna_float_runtime.cpp create mode 100644 inference-engine/src/gna_plugin/runtime/gna_float_runtime.hpp create mode 100644 inference-engine/src/gna_plugin/runtime/gna_float_runtime_op.cpp create mode 100644 inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp index b8f77e3..43b0fa3 100644 --- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp +++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp @@ -380,65 +380,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn } } -void GNAPluginNS::backend::AMIntelDNN::Propagate() { - for (uint32_t i = 0; i < component.size(); i++) { - intel_dnn_component_t *comp = &component[i]; - uint32_t *ptr_active_outputs = nullptr; - uint32_t num_active_outputs = (comp->orientation_out == kDnnInterleavedOrientation) - ? comp->num_rows_out : comp->num_columns_out; - - if (i == component.size() - 1) { // active list applies to last component - ptr_active_outputs = ptr_active_outputs_; - num_active_outputs = num_active_outputs_; - } else if (i == component.size() - 2) { // also applies to last two components when last is PWL - if ((component[i].operation == kDnnAffineOp) && (component[i + 1].operation == kDnnPiecewiselinearOp)) { - ptr_active_outputs = ptr_active_outputs_; - num_active_outputs = num_active_outputs_; - } - } - - switch (comp->operation) { - case kDnnAffineOp :ApplyAffineTransform(comp, ptr_active_outputs, num_active_outputs); - break; - case kDnnDiagonalOp:ApplyDiagonalTransform(comp); - break; - case kDnnRecurrentOp: - if ((i < component.size() - 1) && (component[i + 1].operation == kDnnPiecewiselinearOp)) { - intel_dnn_component_t *comp_pwl = &component[i + 1]; - for (uint32_t j = 0; j < comp->num_rows_in; j++) { - void *ptr_feedbacks = - reinterpret_cast(reinterpret_cast(comp->op.recurrent.ptr_feedbacks) + j * comp_pwl->num_columns_out); - ApplyRecurrentTransform(comp, j, ptr_feedbacks); - // PrintOutputs(i); - ApplyPiecewiseLinearTransform(comp_pwl, compute_precision_, num_active_outputs, j); - } - i++; // skip next component - } else { - fprintf(stderr, "Missing PiecewiseLinear component after Recurrent component in Propagate!\n"); - throw -1; - } - break; - case kDnnConvolutional1dOp:ApplyConvolutional1DTransform(comp); - break; - case kDnnPiecewiselinearOp:ApplyPiecewiseLinearTransform(comp, compute_precision_, num_active_outputs); - break; - case kDnnMaxPoolOp:ApplyMaxPoolTransform(comp, compute_precision_); - break; - case kDnnInterleaveOp:ApplyTranspose(comp); - break; - case kDnnDeinterleaveOp:ApplyTranspose(comp); - break; - case kDnnCopyOp:ApplyCopy(comp); - break; - default:fprintf(stderr, "Bad operation in Propagate!\n"); - throw -1; - break; - } - // PrintOutputs(i); fflush(stdout); - } -} - - float GNAPluginNS::backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) { return comp.output_scale_factor; } @@ -529,11 +470,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename) graph << ", label=<\n" " \n"; -#ifdef PLOT if (components[k].original_layer_name != nullptr) { graph << " \n"; } -#endif graph << " \n"; if (IS_AFFINE(k)) { graph << " \n"; @@ -1191,6 +1130,35 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_file << " " << std::dec << sizeof(int16_t) << "\n"; out_file << " " << std::dec << sizeof(int16_t) << "\n"; out_file << " " << std::dec << sizeof(int32_t) << "\n"; + switch (func_id) { + case kActRelu: + case kActLeakyRelu: + out_file << " " << + std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.lrelu.negative_slope << "\n"; + break; + case kActPow : + out_file << " " << + std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.exponent << "\n"; + out_file << " " << + std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.scale << "\n"; + out_file << " " << + std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.offset << "\n"; + break; + case kActFakeQuantize : + out_file << " " << + std::dec << component[i].op.pwl.func_id.args.fakeQuantize.levels << "\n"; + out_file << " " << + std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.input_low << "\n"; + out_file << " " << + std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.input_high << "\n"; + out_file << " " << + std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.output_low << "\n"; + out_file << " " << + std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.output_high << "\n"; + break; + default: + break; + } if (logging_precision == kDnnFloat) { out_file << std::setprecision(12) << std::scientific << " " << 1.0 << "\n"; out_file << " " << std::dec << 0 << "\n"; diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp index 036381a..9073388 100644 --- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp +++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp @@ -266,8 +266,6 @@ public: } - void Propagate(); - float OutputScaleFactor(uint32_t component_index) { return OutputScaleFactor(component[component_index]); } diff --git a/inference-engine/src/gna_plugin/backend/dnn.cpp b/inference-engine/src/gna_plugin/backend/dnn.cpp index 830d965..5fb03d5 100644 --- a/inference-engine/src/gna_plugin/backend/dnn.cpp +++ b/inference-engine/src/gna_plugin/backend/dnn.cpp @@ -27,179 +27,6 @@ #include "runtime/cnn.h" -void GNAPluginNS::backend::ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize) { - if (4 != component->num_bytes_per_input) { - THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; - } - - auto transform = &component->op.affine; - int m = component->num_rows_out; - int n = component->num_columns_in; - int k = component->num_rows_in; - int lda = component->num_rows_in; - int ldb = component->num_columns_in; - int ldc = component->num_columns_out; - - auto A = reinterpret_cast(transform->ptr_weights); - auto B = reinterpret_cast(component->ptr_inputs); - auto C = reinterpret_cast(component->ptr_outputs); - auto bias = reinterpret_cast(transform->ptr_biases); - if (list == nullptr) { - for (uint32_t i = 0; i < m; i++) { - for (uint32_t j = 0; j < n; j++) { - C[i * ldc + j] = bias[i]; - } - } - cblas_sgemm1(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, A, lda, B, ldb, 1.0, C, ldc); - } else { - for (int l = 0; l < listsize; l++) { - int i = list[l]; - for (uint32_t j = 0; j < n; j++) { - C[l * ldc + j] = bias[i]; - } - } - cblas_sgemm_subset(CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - m, - n, - k, - 1.0, - A, - lda, - B, - ldb, - 1.0, - C, - ldc, - list, - listsize); - } -} - -void GNAPluginNS::backend::ApplyDiagonalTransform(intel_dnn_component_t *component) { - if (4 != component->num_bytes_per_input) { - THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; - } - - auto transform = &component->op.affine; - int m = component->num_rows_out; - int n = component->num_columns_in; - int ldb = component->num_columns_in; - int ldc = component->num_columns_out; - - auto A = reinterpret_cast(transform->ptr_weights); - auto B = reinterpret_cast(component->ptr_inputs); - auto C = reinterpret_cast(component->ptr_outputs); - auto bias = reinterpret_cast(transform->ptr_biases); - for (uint32_t i = 0; i < m; i++) { - for (uint32_t j = 0; j < n; j++) { - C[i * ldc + j] = bias[i]; - } - } - for (uint32_t j = 0; j < n; j++) { - float *Bcol = B + j * ldb; - float *Ccol = C + j * ldc; - cblas_ssbmv1(CblasRowMajor, CblasLower, m, 0, 1.0, A, 1, Bcol, 1, 1.0, Ccol, 1); - } -} - -void GNAPluginNS::backend::ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks) { - if (4 != component->num_bytes_per_input) { - THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; - } - - intel_recurrent_t *transform = &component->op.recurrent; - int k1 = component->num_columns_in; - int k2 = component->num_columns_out; - int n = k2; - - if (component->op.recurrent.ptr_feedbacks == nullptr) { - THROW_GNA_EXCEPTION << "nullptr feedback pointer"; - } - auto A1 = reinterpret_cast(component->ptr_inputs) + row * component->num_columns_in; - auto A2 = reinterpret_cast(ptr_feedbacks); - auto X = reinterpret_cast(transform->ptr_weights); - auto B = reinterpret_cast(transform->ptr_biases); - auto C = reinterpret_cast(component->ptr_outputs) + row * component->num_columns_out; - sgemv_split(n, k1, k2, A1, A2, X, B, C); -} - -void GNAPluginNS::backend::ApplyConvolutional1DTransform(intel_dnn_component_t *component) { - if (4 != component->num_bytes_per_input) { - THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; - } - CNNFilter32(component); -} - -void GNAPluginNS::backend::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component, - intel_dnn_number_type_t number_type, - uint32_t listsize) { - if (kDnnFloat != number_type) { - THROW_GNA_EXCEPTION << "Bad number type: " << number_type; - } - PwlApply32(component, listsize); -} - -void GNAPluginNS::backend::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component, - intel_dnn_number_type_t number_type, - uint32_t listsize, - uint32_t num_row) { - if (kDnnFloat != number_type) { - THROW_GNA_EXCEPTION << "Bad number type: " << number_type; - } - PwlApply32(component, num_row, num_row, 0, listsize - 1); -} - -void GNAPluginNS::backend::ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type) { - if (4 != component->num_bytes_per_input) { - THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; - } - CNNMaxPool(component, number_type); -} - -void GNAPluginNS::backend::ApplyTranspose(intel_dnn_component_t *component) { - if (4 != component->num_bytes_per_input) { - THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; - } - - int m = component->num_rows_in; - int n = component->num_columns_in; - int lda = component->num_columns_in; - int ldb = component->num_columns_out; - // B = Transpose(A) where A is mxn and B is nxm - auto A = reinterpret_cast(component->ptr_inputs); - auto B = reinterpret_cast(component->ptr_outputs); - for (uint32_t row = 0; row < m; row++) { - for (uint32_t col = 0; col < n; col++) { - B[col * ldb + row] = A[row * lda + col]; - } - } -} - -void GNAPluginNS::backend::ApplyCopy(intel_dnn_component_t *component) { - if (4 != component->num_bytes_per_input) { - THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; - } - - auto src = reinterpret_cast(component->ptr_inputs); - auto dst = reinterpret_cast(component->ptr_outputs); - int32_t m = component->op.copy.num_copy_rows; - int32_t n = component->op.copy.num_copy_columns; - int32_t lda = component->num_columns_in; - int32_t ldb = component->num_columns_out; - if (m > component->num_rows_in) { - THROW_GNA_EXCEPTION << "Error: attempt to copy more columns than matrix has"; - } - auto A = reinterpret_cast(src); - auto B = reinterpret_cast(dst); - for (uint32_t row = 0; row < m; row++) { - for (uint32_t col = 0; col < n; col++) { - B[row * ldb + col] = A[row * lda + col]; - } - } -} - bool GNAPluginNS::backend::isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2) { bool isCompatible = true; diff --git a/inference-engine/src/gna_plugin/backend/dnn.hpp b/inference-engine/src/gna_plugin/backend/dnn.hpp index 599849d..e82c634 100644 --- a/inference-engine/src/gna_plugin/backend/dnn.hpp +++ b/inference-engine/src/gna_plugin/backend/dnn.hpp @@ -49,21 +49,6 @@ namespace GNAPluginNS { namespace backend { -void ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize); -void ApplyDiagonalTransform(intel_dnn_component_t *component); -void ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks); -void ApplyConvolutional1DTransform(intel_dnn_component_t *component); -void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component, - intel_dnn_number_type_t number_type, - uint32_t listsize); -void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component, - intel_dnn_number_type_t number_type, - uint32_t listsize, - uint32_t num_row); -void ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type); -void ApplyTranspose(intel_dnn_component_t *component); -void ApplyCopy(intel_dnn_component_t *component); - void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int); bool isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2); void ClearScoreError(intel_score_error_t *error); diff --git a/inference-engine/src/gna_plugin/backend/dnn_components.cpp b/inference-engine/src/gna_plugin/backend/dnn_components.cpp index fd590a3..d5e25a9 100644 --- a/inference-engine/src/gna_plugin/backend/dnn_components.cpp +++ b/inference-engine/src/gna_plugin/backend/dnn_components.cpp @@ -18,9 +18,9 @@ intel_dnn_component_t & backend::DnnComponents::addComponent(const std::string l components.emplace_back(layerName, intel_dnn_component_t()); auto ¤tComponent = components.back().second; #ifdef PLOT - currentComponent.original_layer_name = components.back().first.c_str(); std::cout << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl; #endif + currentComponent.original_layer_name = components.back().first.c_str(); return currentComponent; } diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h index 73a2d06..39c8bc1 100644 --- a/inference-engine/src/gna_plugin/backend/dnn_types.h +++ b/inference-engine/src/gna_plugin/backend/dnn_types.h @@ -27,6 +27,7 @@ enum DnnActivationType : uint8_t { kActNegHalfLog, kActSoftSign, kActPow, + kActFakeQuantize, kActNumType }; @@ -43,7 +44,14 @@ struct DnnActivation { float offset; } pow; struct { - float reserved[3]; + int32_t levels; + float input_low; + float input_high; + float output_low; + float output_high; + } fakeQuantize; + struct { + float reserved[5]; }; } args; operator DnnActivationType () const noexcept { @@ -75,7 +83,8 @@ static const char *intel_dnn_activation_name[kActNumType] = { "kActNegHalfLog", "kActCustom", "kActSoftSign", - "kActPow" + "kActPow", + "kActFakeQuantize" }; typedef enum DnnSoftmaxType { @@ -232,9 +241,7 @@ typedef struct { void *ptr_outputs; float output_scale_factor; float input_scale_factor; -#ifdef PLOT const char * original_layer_name = nullptr; -#endif } intel_dnn_component_t; typedef struct { diff --git a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.cpp b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.cpp index d933da7..a8104e8 100644 --- a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.cpp +++ b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.cpp @@ -4,11 +4,35 @@ #include #include +#include +#include +#include #include "gna_input_desc.hpp" #include "gna_plugin_log.hpp" -std::vector& GNAPluginNS::InputDesc::getPtrInputsGlobal(const std::string& name) { +using namespace InferenceEngine; +using namespace GNAPluginNS; + +size_t InputDesc::minBytesRequiredForStoreInput(CNNLayerPtr layer) { + auto quantized = getInjectedData(layer); + size_t precision_bytes; + if (quantized) { + precision_bytes = 2; + } else { + precision_bytes = 4; + } + if (!LayerInfo(layer).isInput()) { + THROW_GNA_LAYER_EXCEPTION(layer) << "minBytesRequiredForStoreInput expect to worn on \"Input\" layer"; + } + if (layer->outData.size() != 1) { + THROW_GNA_LAYER_EXCEPTION(layer) << "minBytesRequiredForStoreInput invalid outData for the layer"; + } + auto dims = layer->outData.front()->getTensorDesc().getDims(); + return details::product(dims.begin(), dims.end()) * precision_bytes; +} + +std::vector& InputDesc::getPtrInputsGlobal(const std::string& name) { if (ptr_inputs_global_id.find(name) == ptr_inputs_global_id.end()) { ptr_inputs_global_storage.push_front({}); ptr_inputs_global_id[name] = ptr_inputs_global_storage.begin(); @@ -16,14 +40,14 @@ std::vector& GNAPluginNS::InputDesc::getPtrInputsGlobal(const std::strin return *ptr_inputs_global_id[name]; } -intel_dnn_orientation_t GNAPluginNS::InputDesc::getOrientation(const std::string& name) { +intel_dnn_orientation_t InputDesc::getOrientation(const std::string& name) { if (orientation_in.find(name) == orientation_in.end()) { THROW_GNA_EXCEPTION << "Can't find orientation for input name '" << name << "'"; } return orientation_in[name]; } -float GNAPluginNS::InputDesc::getScaleFactor(const std::size_t index) { +float InputDesc::getScaleFactor(const std::size_t index) { if (index >= inputScaleFactors.size()) { THROW_GNA_EXCEPTION << "Can't find scale factor for index = " << index; } diff --git a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp index 96c9029..f80931b 100644 --- a/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp +++ b/inference-engine/src/gna_plugin/descriptions/gna_input_desc.hpp @@ -9,6 +9,8 @@ #include #include #include +#include + #include "backend/dnn_types.h" namespace GNAPluginNS { @@ -17,6 +19,8 @@ struct InputDesc { /// order of scale factors matches inputs order in original topology std::vector inputScaleFactors; std::map bytes_allocated_for_input; + size_t minBytesRequiredForStoreInput(InferenceEngine::CNNLayerPtr); + std::unordered_map>::iterator> ptr_inputs_global_id; std::list> ptr_inputs_global_storage; diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 84e0c70..219dcdb 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -1480,6 +1480,14 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) } } +void GNAGraphCompiler::FakeQuantizePrimitive(InferenceEngine::CNNLayerPtr layer) { + // in FP32 mode lets use special form of activation that satisfies fakeQuantize formula + if (gnaFlags->sw_fp32) { + PWLPrimitive(layer); + return; + } +} + void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) { auto* generic = dynamic_cast(layer.get()); std::string type; @@ -1558,7 +1566,8 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) { {"neglog", kActNegLog}, {"neghalflog", kActNegHalfLog}, {"identity", kActIdentity}, - {"softsign", kActSoftSign} + {"softsign", kActSoftSign}, + {"fakequantize", kActFakeQuantize} }; auto it = supportedActivations.find(type); @@ -1573,6 +1582,42 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) { activation_type.args.lrelu.negative_slope = 0.0f; } + if (it->second == kActFakeQuantize) { + // get params from const input + auto GetParamFromInputAsFloat = [](CNNLayerPtr input, size_t idx) { + if (input->insData.size() <= idx) { + THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << "input"; + } + auto iLayerData = input->insData[idx].lock(); + if (!iLayerData) { + THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: cannot dereference data weak-pointer"; + } + auto iLayer = getCreatorLayer(iLayerData).lock(); + if (!iLayer) { + THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: cannot dereference creator layer weak-pointer"; + } + if (!LayerInfo(iLayer).isConst()) { + THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: expected to be of type const, but was: " << iLayer->type; + } + + if (!iLayer->blobs.count("custom")) { + THROW_GNA_LAYER_EXCEPTION(iLayer) << "cannot get custom blob"; + } + auto data = iLayer->blobs["custom"]; + if (data->getTensorDesc().getPrecision() != Precision::FP32) { + THROW_GNA_LAYER_EXCEPTION(iLayer) << "cannot cast custom blob to type FP32, since it is of type: " << data->getTensorDesc().getPrecision(); + } + + return data->cbuffer().as()[0]; + }; + + activation_type.args.fakeQuantize.levels = layer->GetParamAsInt("levels"); + activation_type.args.fakeQuantize.input_low = GetParamFromInputAsFloat(layer, 1); + activation_type.args.fakeQuantize.input_high = GetParamFromInputAsFloat(layer, 2); + activation_type.args.fakeQuantize.output_low = GetParamFromInputAsFloat(layer, 3); + activation_type.args.fakeQuantize.output_high = GetParamFromInputAsFloat(layer, 4); + } + string actName = "unknown"; #ifdef PLOT @@ -1776,7 +1821,8 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) { {{"Crop"}, CREATE(CropPrimitive)}, {{"Copy"}, CREATE(CopyPrimitive)}, {{"TensorIterator"}, SKIP}, - {{"LSTMCell"}, SKIP} + {{"LSTMCell"}, SKIP}, + {{"FakeQuantize"}, CREATE(FakeQuantizePrimitive)} // TODO: fakequantize layer should be properly converted to GNA scale factors for integer case }; auto it = LayersBuilder::getStorage().find(layer->type); if (it != LayersBuilder::getStorage().end()) { @@ -1914,10 +1960,17 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p if (included == concat_connection.end()) { gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64); + size_t concatInputIdx = 0; for (auto &&inputLayer : concatLayerInfoItem.concatInputLayers) { - if (InferenceEngine::details::CaselessEq() - (inputLayer.name, "input")) { - inputDesc->bytes_allocated_for_input[inputLayer.name] = inputLayer.tensorSize; + // skipping non functional and reshape layer, as in that case input might be not connected to anything + auto realConcatInputs = CNNNetGetPrevLayersSkip(concat, [](CNNLayerPtr l) { + return !LayerInfo(l).isNonFunctional() && !LayerInfo(l).isSplit(); + }, concatInputIdx++); + + for (auto rInput : realConcatInputs) { + if (LayerInfo(rInput.first).isInput()) { + inputDesc->bytes_allocated_for_input[rInput.first->name] += inputLayer.tensorSize; + } } } concatLayerInfoItem.input_allocated = true; @@ -1960,7 +2013,14 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // real input not a memory input if (LayerInfo(prevLayer).isInput()) { if (0 == inputDesc->bytes_allocated_for_input[prevLayer->name]) { - // real allocation pointer will be kept in ptr not in ptf_inputs_global + // if request for allocation less that realTensorInput - we need to extend request + auto minInput = inputDesc->minBytesRequiredForStoreInput(prevLayer); + if (num_data_bytes_in < minInput) { + gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8); + num_data_bytes_in = ALIGN(minInput, 8); + } + + // real allocation pointer will be kept in ptr not in ptr_inputs_global if (offset < 0) { gnamem->push_value(ptr, static_cast(0), @@ -1972,7 +2032,6 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, num_data_bytes_in, 64); } - inputDesc->bytes_allocated_for_input[prevLayer->name] = num_data_bytes_in; } if (ALIGN(num_data_bytes_in, 64) > ALIGN(inputDesc->bytes_allocated_for_input[prevLayer->name], 64)) { diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp index 22a2ada..8f310ab 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp @@ -120,6 +120,7 @@ public: void SplitPrimitive(InferenceEngine::CNNLayerPtr); void SlicePrimitive(InferenceEngine::CNNLayerPtr); void PWLPrimitive(InferenceEngine::CNNLayerPtr); + void FakeQuantizePrimitive(InferenceEngine::CNNLayerPtr); void CopyPrimitive(InferenceEngine::CNNLayerPtr); void Reset(); diff --git a/inference-engine/src/gna_plugin/gna_graph_tools.hpp b/inference-engine/src/gna_plugin/gna_graph_tools.hpp index a185174..4ef15f4 100644 --- a/inference-engine/src/gna_plugin/gna_graph_tools.hpp +++ b/inference-engine/src/gna_plugin/gna_graph_tools.hpp @@ -185,9 +185,6 @@ inline std::pair CNNNetCheckNextLayerSkipCer */ template inline std::vector CNNNetGetAllNextLayersSkipCertain(Layer layer, int oDataIdx, const std::function &shouldSkip) { - // TODO: need to have generic function that creates slice of the graph : starting from given layer - // and skipped all non functional - ending up into functional one - std::list currentSet; std::vector resultSet; diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp index 977610d..c74d1c7 100644 --- a/inference-engine/src/gna_plugin/gna_model_serial.cpp +++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp @@ -696,6 +696,7 @@ void GNAModelSerial::ImportInputs(std::istream &is, is.read(reinterpret_cast(&input), sizeof(input)); inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast(reinterpret_cast (basePtr) + input.descriptor_offset)); inputsDesc->orientation_in[name] = input.orientation; + inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count; auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup}); diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index 820fc70..81167ab 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -36,6 +36,7 @@ #include "memory/gna_allocator.hpp" #include "memory/gna_memory_state.hpp" #include "gna_model_serial.hpp" +#include "runtime/gna_float_runtime.hpp" #if GNA_LIB_VER == 2 #include @@ -903,15 +904,28 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer auto dims = input.second->getTensorDesc().getDims(); + auto importedElements = is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3]; + auto importedFrames = dims[0]; + auto targetGroups = is2D ? dims[dims.size() - 2] : dims[0]; // TODO: no proper support for groups yet + + auto importedElementSizeBytes = gnaFlags->sw_fp32 ? 4 : 2; + auto importedBytes = importedElements * importedFrames * importedElementSizeBytes; + + if (inputsDesc->bytes_allocated_for_input[input.first] < importedBytes) { + THROW_GNA_EXCEPTION << "Cannot import input frames for :" << input.first + << ", allocated size: " << inputsDesc->bytes_allocated_for_input[input.first] + << ", but input blob size: " << importedBytes; + } + ImportFrames(inputsDesc->getPtrInputsGlobal(input.first)[idx], input.second->cbuffer().as(), input.second->getTensorDesc().getPrecision(), gnaFlags->sw_fp32 ? 1.0f : inputsDesc->getScaleFactor(inputNum), inputsDesc->getOrientation(input.first), - dims[0], - is2D ? dims[dims.size() - 2] : dims[0], - is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3], - is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3]); + importedFrames, + targetGroups, + importedElements, + importedElements); bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1; if (do_rotate_input && ((inputLayout == Layout::NC || inputLayout == Layout::NCHW) @@ -929,7 +943,8 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer } if (!gnadevice) { - dnn->Propagate(); + auto runtime = runtime::FP(dnn); + runtime.infer(); if (freeNnet != nnets.end()) { std::get<1>(*freeNnet) = 1; } diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp index fc3c44e..a83ef22 100644 --- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp +++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp @@ -49,6 +49,10 @@ class LayerInfo { explicit LayerInfo(InferenceEngine::CNNLayer * layer) : layer(layer) { } + bool hasMultipleInputs() const noexcept { + IS_VALID(); + return layer->insData.size() > 1; + } bool has16BOutput() const noexcept { IS_VALID(); static InferenceEngine::details::caseless_set layersWith16BOutputs = {"memory", "input", "split", "slice", "concat", "copy", "const"}; @@ -200,6 +204,9 @@ class LayerInfo { bool isConcat() const noexcept { return isOfType("concat"); } + bool isFakeQnatize() const noexcept { + return isOfType("FakeQnatize"); + } bool isNonFunctional() const noexcept { return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze"); } diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp index 8456999..ec12dac 100644 --- a/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp +++ b/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp @@ -48,6 +48,7 @@ enum LayerType { LSTMCell, TensorIterator, SoftSign, + FakeQuantize, NO_TYPE }; @@ -84,7 +85,8 @@ static const InferenceEngine::details::caseless_map(l.get()); - auto concat = dynamic_cast(l.get()); - - if (LayerInfo(l).isNonFunctional() || LayerInfo(l).has32BInput()) + if (LayerInfo(l).hasMultipleInputs()) { continue; + } + if (LayerInfo(l).isNonFunctional() || LayerInfo(l).has32BInput()) { + continue; + } gnalog() << "CNNNetPrevLayer skip non functional from :: " << l->name; auto isFunctional = [](CNNLayerPtr ptr) { return !LayerInfo(ptr).isNonFunctional(); @@ -1310,7 +1311,7 @@ void FuseMultipleIdentitiesPass::run() { return LayerInfo(candidate.first).isLink(); }), prevLayersReached.end()); - if (prevLayersReached.size() != 1 && eltwise == nullptr && concat == nullptr) { + if (prevLayersReached.size() != 1) { std::stringstream layers; for (auto && prevLayer : prevLayersReached) { layers << prevLayer.first->name; @@ -1361,7 +1362,6 @@ void FuseMultipleIdentitiesPass::run() { } int PassManager::run(int index) { -// #define PLOT #ifdef PLOT auto dumpNetworkAfterPass = [&index, this] (std::shared_ptr pass) { std::string name = std::string("gna_passes_") + (index < 10 ? "0" : "") + std::to_string(index) + "_" + pass->getName(); diff --git a/inference-engine/src/gna_plugin/runtime/cnn.cpp b/inference-engine/src/gna_plugin/runtime/cnn.cpp index 0f371e5..47d2970 100644 --- a/inference-engine/src/gna_plugin/runtime/cnn.cpp +++ b/inference-engine/src/gna_plugin/runtime/cnn.cpp @@ -22,9 +22,7 @@ void CNNFilter32(intel_dnn_component_t *component) { uint32_t num_filter_coefficients = component->op.conv1D.num_filter_coefficients; std::string layer_name; -#ifdef PLOT layer_name = " In layer '" + std::string(component->original_layer_name) + "'"; -#endif if (component->num_rows_in != 1 || component->num_rows_out != 1) { THROW_GNA_EXCEPTION << "Bad number of rows in CNNFilter32!" << layer_name; } diff --git a/inference-engine/src/gna_plugin/runtime/gna_float_runtime.cpp b/inference-engine/src/gna_plugin/runtime/gna_float_runtime.cpp new file mode 100644 index 0000000..1ba0ce7 --- /dev/null +++ b/inference-engine/src/gna_plugin/runtime/gna_float_runtime.cpp @@ -0,0 +1,88 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include +#include +#include +#include "gna_float_runtime.hpp" + +using namespace GNAPluginNS; +using namespace GNAPluginNS::runtime; + + +void FP::infer() { + if (!dnn) { + THROW_GNA_EXCEPTION << "[GNA FP32 RUNTIME] not initialized"; + } + + for (uint32_t i = 0; i < dnn->component.size(); i++) { + intel_dnn_component_t *comp = &dnn->component[i]; + uint32_t *ptr_active_outputs = nullptr; + uint32_t num_active_outputs = (comp->orientation_out == kDnnInterleavedOrientation) + ? comp->num_rows_out : comp->num_columns_out; + + if (i == dnn->component.size() - 1) { // active list applies to last component + ptr_active_outputs = dnn->ptr_active_outputs(); + num_active_outputs = dnn->num_active_outputs(); + } else if (i == dnn->component.size() - 2) { // also applies to last two components when last is PWL + if ((dnn->component[i].operation == kDnnAffineOp) && (dnn->component[i + 1].operation == kDnnPiecewiselinearOp)) { + ptr_active_outputs = dnn->ptr_active_outputs(); + num_active_outputs = dnn->num_active_outputs(); } + } + + switch (comp->operation) { + case kDnnAffineOp : { + ApplyAffineTransform(comp, ptr_active_outputs, num_active_outputs); + break; + } + case kDnnDiagonalOp: { + ApplyDiagonalTransform(comp); + break; + } + case kDnnRecurrentOp: { + if ((i < dnn->component.size() - 1) && (dnn->component[i + 1].operation == kDnnPiecewiselinearOp)) { + intel_dnn_component_t *comp_pwl = &dnn->component[i + 1]; + for (uint32_t j = 0; j < comp->num_rows_in; j++) { + void *ptr_feedbacks = + reinterpret_cast(reinterpret_cast(comp->op.recurrent.ptr_feedbacks) + + j * comp_pwl->num_columns_out); + ApplyRecurrentTransform(comp, j, ptr_feedbacks); + ApplyPiecewiseLinearTransform(comp_pwl, kDnnFloat, num_active_outputs, j); + } + i++; // skip next component + } else { + THROW_GNA_EXCEPTION << "Missing PiecewiseLinear component after Recurrent component in Propagate!"; + } + break; + } + case kDnnConvolutional1dOp: { + ApplyConvolutional1DTransform(comp); + break; + } + case kDnnPiecewiselinearOp: { + ApplyPiecewiseLinearTransform(comp, kDnnFloat, num_active_outputs); + break; + } + case kDnnMaxPoolOp: { + ApplyMaxPoolTransform(comp, kDnnFloat); + break; + } + case kDnnInterleaveOp: { + ApplyTranspose(comp); + break; + } + case kDnnDeinterleaveOp: { + ApplyTranspose(comp); + break; + } + case kDnnCopyOp: { + ApplyCopy(comp); + break; + } + default: + THROW_GNA_EXCEPTION << "[GNA FP32 RUNTIME] Bad operation " << comp->operation; + } + } +} \ No newline at end of file diff --git a/inference-engine/src/gna_plugin/runtime/gna_float_runtime.hpp b/inference-engine/src/gna_plugin/runtime/gna_float_runtime.hpp new file mode 100644 index 0000000..ce0457c --- /dev/null +++ b/inference-engine/src/gna_plugin/runtime/gna_float_runtime.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include + +namespace GNAPluginNS { +namespace runtime { +/** + * @brief floating runtime for gna-plugin, in most case it uses same gna-primitives description as integer runtime, but execute them on CPU + */ +class FP { + std::shared_ptr dnn; + public: + FP(std::shared_ptr dnn) : dnn(dnn) { + } + virtual void infer(); + + /** + * atomic operations for floating inference + */ + static void ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize); + static void ApplyDiagonalTransform(intel_dnn_component_t *component); + static void ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks); + static void ApplyConvolutional1DTransform(intel_dnn_component_t *component); + static void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component, + intel_dnn_number_type_t number_type, + uint32_t listsize); + static void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component, + intel_dnn_number_type_t number_type, + uint32_t listsize, + uint32_t num_row); + static void ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type); + static void ApplyTranspose(intel_dnn_component_t *component); + static void ApplyCopy(intel_dnn_component_t *component); +}; + +} // namespace runtime + +} // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/runtime/gna_float_runtime_op.cpp b/inference-engine/src/gna_plugin/runtime/gna_float_runtime_op.cpp new file mode 100644 index 0000000..1ea9df7 --- /dev/null +++ b/inference-engine/src/gna_plugin/runtime/gna_float_runtime_op.cpp @@ -0,0 +1,184 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gna_float_runtime.hpp" +#include "pwl.h" +#include "cnn.h" +#include "floatmath.h" + +using namespace GNAPluginNS; +using namespace GNAPluginNS::runtime; + +void FP::ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize) { + if (4 != component->num_bytes_per_input) { + THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; + } + + auto transform = &component->op.affine; + int m = component->num_rows_out; + int n = component->num_columns_in; + int k = component->num_rows_in; + int lda = component->num_rows_in; + int ldb = component->num_columns_in; + int ldc = component->num_columns_out; + + auto A = reinterpret_cast(transform->ptr_weights); + auto B = reinterpret_cast(component->ptr_inputs); + auto C = reinterpret_cast(component->ptr_outputs); + auto bias = reinterpret_cast(transform->ptr_biases); + if (list == nullptr) { + for (uint32_t i = 0; i < m; i++) { + for (uint32_t j = 0; j < n; j++) { + C[i * ldc + j] = bias[i]; + } + } + cblas_sgemm1(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, A, lda, B, ldb, 1.0, C, ldc); + } else { + for (int l = 0; l < listsize; l++) { + int i = list[l]; + for (uint32_t j = 0; j < n; j++) { + C[l * ldc + j] = bias[i]; + } + } + cblas_sgemm_subset(CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + m, + n, + k, + 1.0, + A, + lda, + B, + ldb, + 1.0, + C, + ldc, + list, + listsize); + } +} + +void FP::ApplyDiagonalTransform(intel_dnn_component_t *component) { + if (4 != component->num_bytes_per_input) { + THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; + } + + auto transform = &component->op.affine; + int m = component->num_rows_out; + int n = component->num_columns_in; + int ldb = component->num_columns_in; + int ldc = component->num_columns_out; + + auto A = reinterpret_cast(transform->ptr_weights); + auto B = reinterpret_cast(component->ptr_inputs); + auto C = reinterpret_cast(component->ptr_outputs); + auto bias = reinterpret_cast(transform->ptr_biases); + for (uint32_t i = 0; i < m; i++) { + for (uint32_t j = 0; j < n; j++) { + C[i * ldc + j] = bias[i]; + } + } + for (uint32_t j = 0; j < n; j++) { + float *Bcol = B + j * ldb; + float *Ccol = C + j * ldc; + cblas_ssbmv1(CblasRowMajor, CblasLower, m, 0, 1.0, A, 1, Bcol, 1, 1.0, Ccol, 1); + } +} + +void FP::ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks) { + if (4 != component->num_bytes_per_input) { + THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; + } + + intel_recurrent_t *transform = &component->op.recurrent; + int k1 = component->num_columns_in; + int k2 = component->num_columns_out; + int n = k2; + + if (component->op.recurrent.ptr_feedbacks == nullptr) { + THROW_GNA_EXCEPTION << "nullptr feedback pointer"; + } + auto A1 = reinterpret_cast(component->ptr_inputs) + row * component->num_columns_in; + auto A2 = reinterpret_cast(ptr_feedbacks); + auto X = reinterpret_cast(transform->ptr_weights); + auto B = reinterpret_cast(transform->ptr_biases); + auto C = reinterpret_cast(component->ptr_outputs) + row * component->num_columns_out; + sgemv_split(n, k1, k2, A1, A2, X, B, C); +} + +void FP::ApplyConvolutional1DTransform(intel_dnn_component_t *component) { + if (4 != component->num_bytes_per_input) { + THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; + } + CNNFilter32(component); +} + +void FP::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component, + intel_dnn_number_type_t number_type, + uint32_t listsize) { + if (kDnnFloat != number_type) { + THROW_GNA_EXCEPTION << "Bad number type: " << number_type; + } + PwlApply32(component, listsize); +} + +void FP::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component, + intel_dnn_number_type_t number_type, + uint32_t listsize, + uint32_t num_row) { + if (kDnnFloat != number_type) { + THROW_GNA_EXCEPTION << "Bad number type: " << number_type; + } + PwlApply32(component, num_row, num_row, 0, listsize - 1); +} + +void FP::ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type) { + if (4 != component->num_bytes_per_input) { + THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; + } + CNNMaxPool(component, number_type); +} + +void FP::ApplyTranspose(intel_dnn_component_t *component) { + if (4 != component->num_bytes_per_input) { + THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; + } + + int m = component->num_rows_in; + int n = component->num_columns_in; + int lda = component->num_columns_in; + int ldb = component->num_columns_out; + // B = Transpose(A) where A is mxn and B is nxm + auto A = reinterpret_cast(component->ptr_inputs); + auto B = reinterpret_cast(component->ptr_outputs); + for (uint32_t row = 0; row < m; row++) { + for (uint32_t col = 0; col < n; col++) { + B[col * ldb + row] = A[row * lda + col]; + } + } +} + +void FP::ApplyCopy(intel_dnn_component_t *component) { + if (4 != component->num_bytes_per_input) { + THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input; + } + + auto src = reinterpret_cast(component->ptr_inputs); + auto dst = reinterpret_cast(component->ptr_outputs); + int32_t m = component->op.copy.num_copy_rows; + int32_t n = component->op.copy.num_copy_columns; + int32_t lda = component->num_columns_in; + int32_t ldb = component->num_columns_out; + if (m > component->num_rows_in) { + THROW_GNA_EXCEPTION << "Error: attempt to copy more columns than matrix has"; + } + auto A = reinterpret_cast(src); + auto B = reinterpret_cast(dst); + for (uint32_t row = 0; row < m; row++) { + for (uint32_t col = 0; col < n; col++) { + B[row * ldb + col] = A[row * lda + col]; + } + } +} diff --git a/inference-engine/src/gna_plugin/runtime/pwl.cpp b/inference-engine/src/gna_plugin/runtime/pwl.cpp index d3d8c77..c2e8ace 100644 --- a/inference-engine/src/gna_plugin/runtime/pwl.cpp +++ b/inference-engine/src/gna_plugin/runtime/pwl.cpp @@ -1046,9 +1046,33 @@ void PwlApply32(intel_dnn_component_t *component, } } break; + case kActFakeQuantize: { + auto input_low = transform->func_id.args.fakeQuantize.input_low; + auto input_high = transform->func_id.args.fakeQuantize.input_high; + auto output_low = transform->func_id.args.fakeQuantize.output_low; + auto output_high = transform->func_id.args.fakeQuantize.output_high; + auto levels = transform->func_id.args.fakeQuantize.levels; + // TODO: this special modification for spedup-compute give different result with straight FQ forulae + // but this used in referencen graph FakeQuantize implementations so we need to honor it for a while + float scaleInput = (input_high - input_low) / (levels-1); + float scaleOutputs = (output_high - output_low) / (levels-1); + + for (uint32_t i = num_row_start; i <= num_row_end; i++) { + for (uint32_t j = num_col_start; j <= num_col_end; j++) { + auto x = ptr_in[i * num_columns + j]; + if (x < std::min(input_low, input_high)) { + ptr_out[i * num_columns + j] = output_low; + } else if (x > std::max(input_low, input_high)) { + ptr_out[i * num_columns + j] = output_high; + } else { + ptr_out[i * num_columns + j] = nearbyint((x - input_low) / scaleInput) * scaleOutputs + output_low; + } + } + } + break; + } case kActCustom: - // break; - default:fprintf(stderr, "Unknown piecewise linear function type!\n"); - throw -1; + default: + THROW_GNA_EXCEPTION << component->original_layer_name << ", Unknown piecewise linear function type: " << transform->func_id.type; } } diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp index d4f5e41..c14e648 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp @@ -20,9 +20,16 @@ const std::vector> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6 const std::vector> constShapes = {{1}}; const std::vector levels = {16, 255, 256}; +const std::pair> config = {}; +const std::vector fqArgs = {}; +const std::vector inputParams = {}; + + const auto fqParams = ::testing::Combine( ::testing::ValuesIn(levels), - ::testing::ValuesIn(constShapes) + ::testing::ValuesIn(constShapes), + ::testing::Values(fqArgs), + ::testing::Values(inputParams) ); INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest, @@ -30,7 +37,8 @@ INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest, fqParams, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(inputShapes), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(config)), FakeQuantizeLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp new file mode 100644 index 0000000..bad19b0 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "single_layer_tests/fake_quantize.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, +}; + +using ConfigType = std::map; +const ConfigType configFP32 = { + {"GNA_DEVICE_MODE", "GNA_SW_FP32"}, +}; +const ConfigType configInt16 = { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {InferenceEngine::GNAConfigParams::KEY_GNA_PRECISION, "I16"}, + {"GNA_SCALE_FACTOR_0", "327.67"} +}; +const ConfigType configInt8 = { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {InferenceEngine::GNAConfigParams::KEY_GNA_PRECISION, "I8"}, + {"GNA_SCALE_FACTOR_0", "327.67"} +}; + +/** + * @brief specific quantisation mode to be used internally + */ +const std::vector> gnaQuantModes = { + {"sw_fp32", configFP32}, +// TODO: support FakeQuantize in integer mode +// {"sw_exact_i16", configInt16}, +// {"sw_exact_i8", configInt8}, +}; + +// TODO: uncomment once fixed proper 4d import for GNA-plugin issue: 38806 +const std::vector> inputShapes = {{1, 1, 1, 1}, /*{3, 10, 5, 6}*/}; +const std::vector> constShapes = {{1}}; +const std::vector levels = {16, 255, 256}; + +const std::vector> fqArgs = {{0, 10, 2, 5}, {}}; +const std::vector> inputParams = {{-10, 10, 0.1}, {}}; + +const auto fqParams = ::testing::Combine( + ::testing::ValuesIn(levels), + ::testing::ValuesIn(constShapes), + ::testing::ValuesIn(fqArgs), + ::testing::ValuesIn(inputParams) +); + +INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest, + ::testing::Combine( + fqParams, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(gnaQuantModes)), + FakeQuantizeLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp index 661f83b..a23db9b 100644 --- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp @@ -14,14 +14,18 @@ #include "ngraph_functions/utils/ngraph_helpers.hpp" typedef std::tuple< - size_t, // levels - std::vector // const inputs shape + size_t, // levels + std::vector, // const inputs shape + std::vector, // fake quantize inputLow, inputHigh, outputLow, outputHigh or empty for random + std::vector // input generator data: low, high, resolution > fqSpecificParams; typedef std::tuple< fqSpecificParams, - InferenceEngine::Precision, // Net precision - InferenceEngine::SizeVector, // Input shapes - LayerTestsUtils::TargetDevice // Device name + InferenceEngine::Precision, // Net precision + InferenceEngine::SizeVector, // Input shapes + LayerTestsUtils::TargetDevice, // Device name + + std::pair> // Additional backend configuration and alis name to it > fqLayerTestParamsSet; namespace LayerTestsDefinitions { @@ -30,9 +34,16 @@ class FakeQuantizeLayerTest : public testing::WithParamInterface obj); - + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override; protected: void SetUp() override; + void UpdateSeed(); + + protected: + float inputDataMin = 0.0; + float inputDataMax = 10.0; + float inputDataResolution = 1.0; + int32_t seed = 1; }; } // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp index e16eab4..dd8b623 100644 --- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp @@ -18,6 +18,17 @@ #include "single_layer_tests/fake_quantize.hpp" +// seed selected using current cloc time +#define USE_CLOCK_TIME 1 +// seed started from default value, and incremented every time using big number like 9999 +#define USE_INCREMENTAL_SEED 2 + +/** + * redefine this seed to reproduce issue with given seed that can be read from gtest logs + */ +#define BASE_SEED USE_CLOCK_TIME +#define NGRAPH_SEED USE_CLOCK_TIME + namespace LayerTestsDefinitions { std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo obj) { @@ -25,10 +36,13 @@ std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo> config; + std::tie(fqParams, netPrecision, inputShapes, targetDevice, config) = obj.param; size_t levels; std::vector constShape; - std::tie(levels, constShape) = fqParams; + std::vector fqDirectArgs; + std::vector inputArg; + std::tie(levels, constShape, fqDirectArgs, inputArg) = fqParams; std::ostringstream result; result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; @@ -36,29 +50,101 @@ std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo inputShape; + std::pair> config; auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; - std::tie(fqParams, netPrecision, inputShape, targetDevice) = this->GetParam(); + std::tie(fqParams, netPrecision, inputShape, targetDevice, config) = this->GetParam(); InferenceEngine::SizeVector kernel, stride, dilation; size_t levels; std::vector constShape; - std::tie(levels, constShape) = fqParams; + std::vector fqDirectArg; + std::vector inputArg; + std::tie(levels, constShape, fqDirectArg, inputArg) = fqParams; + if (inputArg.size() == 3) { + inputDataMin = inputArg[0]; + inputDataMax = inputArg[1]; + inputDataResolution = inputArg[2]; + } auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); - auto fq = std::dynamic_pointer_cast(ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape)); + UpdateSeed(); + + std::shared_ptr fakeQNode; + if (fqDirectArg.empty()) { + int32_t ngraphSeed = seed; + if (NGRAPH_SEED != USE_CLOCK_TIME) { + ngraphSeed = NGRAPH_SEED; + } + std::cout << "\033[0;32m" << "[ ] " << "\033[0;0m" + << "ngraphSeed = " << ngraphSeed << std::endl; + fakeQNode = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape, ngraphSeed); + } else { + fakeQNode = ngraph::builder::makeFakeQuantize( + paramOuts[0], + ngPrc, + levels, + constShape, + {fqDirectArg[0]}, + {fqDirectArg[1]}, + {fqDirectArg[2]}, + {fqDirectArg[3]}); + } + + + auto fq = std::dynamic_pointer_cast(fakeQNode); ngraph::ResultVector results{std::make_shared(fq)}; function = std::make_shared(results, params, "fakeQuantize"); + + configuration = config.second; +} + +InferenceEngine::Blob::Ptr FakeQuantizeLayerTest::GenerateInput(const InferenceEngine::InputInfo &info) const { + return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution, seed); +} + +void FakeQuantizeLayerTest::UpdateSeed() { + if (BASE_SEED == USE_CLOCK_TIME) { + seed = std::chrono::system_clock::now().time_since_epoch().count(); + } else if (BASE_SEED == USE_INCREMENTAL_SEED) { + seed += 9999; + } else { + seed = BASE_SEED; + } + std::cout << "\033[0;32m" << "[ ] " << "\033[0;0m" + << "seed = " << seed << std::endl; } TEST_P(FakeQuantizeLayerTest, CompareWithRefs) { Run(); + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + + if (BASE_SEED != USE_CLOCK_TIME && + BASE_SEED != USE_INCREMENTAL_SEED) { + return; + } + + size_t nIterations = (inputDataMax - inputDataMin) / inputDataResolution; + for (; nIterations != 0; nIterations--) { + UpdateSeed(); + Infer(); + Validate(); + } } } // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp index 1cd4d53..c6eeab1 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp @@ -110,7 +110,7 @@ static void fill_data_bbox(float *data, size_t size, int height, int width, floa * - With k = 4 numbers resolution will 1/4 so outputs only .0 .25 .50 0.75 and etc. */ template -void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1) { +void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1, const int seed = 1) { using dataType = typename InferenceEngine::PrecisionTrait::value_type; testing::internal::Random random(1); random.Generate(range); @@ -144,8 +144,7 @@ void inline fill_data_consistently(InferenceEngine::Blob::Ptr &blob, const uint3 } template -void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k, - const int seed = 1) { +void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k, const int seed = 1) { using dataType = typename InferenceEngine::PrecisionTrait::value_type; std::default_random_engine random(seed); // 1/k is the resolution of the floating point numbers @@ -199,13 +198,20 @@ void inline fill_data_float_array(InferenceEngine::Blob::Ptr &blob, const float } template<> -void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) { - fill_data_random_float(blob, range, start_from, k); +void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, + const uint32_t range, + int32_t start_from, + const int32_t k, + const int seed) { + fill_data_random_float(blob, range, start_from, k, seed); } template<> -void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) { - fill_data_random_float(blob, range, start_from, k); +void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, + const uint32_t range, + int32_t start_from, + const int32_t k, const int seed) { + fill_data_random_float(blob, range, start_from, k, seed); } } // namespace CommonTestUtils diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp index cbb28cd..fc7d98b 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp @@ -455,11 +455,12 @@ InferenceEngine::Blob::Ptr inline createAndFillBlobWithFloatArray(const Inferenc InferenceEngine::Blob::Ptr inline createAndFillBlob(const InferenceEngine::TensorDesc &td, const uint32_t range = 10, const int32_t start_from = 0, - const int32_t resolution = 1) { + const int32_t resolution = 1, + const int seed = 1) { InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td); blob->allocate(); switch (td.getPrecision()) { -#define CASE(X) case X: CommonTestUtils::fill_data_random(blob, range, start_from, resolution); break; +#define CASE(X) case X: CommonTestUtils::fill_data_random(blob, range, start_from, resolution, seed); break; CASE(InferenceEngine::Precision::FP32) CASE(InferenceEngine::Precision::FP16) CASE(InferenceEngine::Precision::U8) diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp index 302f3c1..ee0e1f0 100644 --- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp +++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp @@ -26,14 +26,14 @@ makeParams(const element::Type &type, const std::vector std::shared_ptr makeConstant(const element::Type &type, const std::vector &shape, const std::vector &data, bool random = false, - uint32_t upTo = 10, uint32_t startFrom = 1) { + uint32_t upTo = 10, uint32_t startFrom = 1, const int seed = 1) { std::shared_ptr weightsNode; #define makeNode(TYPE) \ case TYPE: \ weightsNode = std::make_shared( \ type, shape, \ - random ? NGraphFunctions::Utils::generateVector(ngraph::shape_size(shape), upTo, startFrom) : \ + random ? NGraphFunctions::Utils::generateVector(ngraph::shape_size(shape), upTo, startFrom, seed) : \ NGraphFunctions::Utils::castVector::value_type >(data)); \ break; switch (type) { @@ -274,7 +274,8 @@ std::shared_ptr makeFakeQuantize(const ngraph::Output &in, std::shared_ptr makeFakeQuantize(const ngraph::Output &in, const element::Type &type, std::size_t levels, - std::vector constShapes); + std::vector constShapes, + const int32_t seed = 1); std::shared_ptr makeCumSum(const ngraph::Output &in, const ngraph::Output &axis, diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp index ebcf456..c8d0507 100644 --- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp +++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp @@ -17,11 +17,14 @@ namespace Utils { template std::vector::value_type> inline -generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) { +generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) { std::vector::value_type> res; - std::mt19937 gen( - static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); + if (seed == 1) { + seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + } + + std::mt19937 gen(seed); // chose values between this range to avoid type overrun (e.g. in case of I8 precision) std::uniform_int_distribution dist(startFrom, upTo); @@ -32,11 +35,14 @@ generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) { return res; } -std::vector inline generateF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) { +std::vector inline generateF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) { std::vector res; - std::mt19937 gen( - static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); + if (seed == 1) { + seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + } + + std::mt19937 gen(seed); // chose values between this range to avoid type overrun (e.g. in case of I8 precision) std::uniform_int_distribution dist(startFrom, upTo); @@ -46,11 +52,13 @@ std::vector inline generateF16Vector(size_t vec_len, uint32_t u return res; } -std::vector inline generateBF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) { +std::vector inline generateBF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) { std::vector res; - std::mt19937 gen( - static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); + if (seed == 1) { + seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + } + std::mt19937 gen(seed); // chose values between this range to avoid type overrun (e.g. in case of I8 precision) std::uniform_int_distribution dist(startFrom, upTo); diff --git a/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp b/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp index aab3c67..3915780 100644 --- a/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp +++ b/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp @@ -32,18 +32,19 @@ std::shared_ptr makeFakeQuantize(const ngraph::Output &in, std::shared_ptr makeFakeQuantize(const ngraph::Output &in, const ngraph::element::Type &type, std::size_t levels, - std::vector constShapes) { + std::vector constShapes, + const int32_t seed) { size_t constDataSize = ngraph::shape_size(constShapes); std::vector inputLowData, inputHighData, outputLowData, outputHighData; - inputLowData = NGraphFunctions::Utils::generateVector(constDataSize); + inputLowData = NGraphFunctions::Utils::generateVector(constDataSize, 10, 1, seed); if (levels != 2) { - inputHighData = NGraphFunctions::Utils::generateVector(constDataSize); - outputLowData = NGraphFunctions::Utils::generateVector(constDataSize); - outputHighData = NGraphFunctions::Utils::generateVector(constDataSize); + inputHighData = NGraphFunctions::Utils::generateVector(constDataSize, 10, 1, seed); + outputLowData = NGraphFunctions::Utils::generateVector(constDataSize, 10, 1, seed); + outputHighData = NGraphFunctions::Utils::generateVector(constDataSize, 10, 1, seed); } else { inputHighData = inputLowData; - outputLowData = NGraphFunctions::Utils::generateVector(constDataSize); - outputHighData = NGraphFunctions::Utils::generateVector(constDataSize); + outputLowData = NGraphFunctions::Utils::generateVector(constDataSize, 10, 1, seed); + outputHighData = NGraphFunctions::Utils::generateVector(constDataSize, 10, 1, seed); for (int i = 0; i < constDataSize; i++) { if (outputLowData[i] > outputHighData[i]) { @@ -70,10 +71,10 @@ std::shared_ptr makeFakeQuantize(const ngraph::Output(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels); diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp index fb63c3f..ed787e5 100644 --- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp +++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp @@ -224,7 +224,8 @@ void GNAPropagateMatcher :: match() { ASSERT_NO_THROW_IE_EXCEPTION(network = CNNNetwork(_env.ngraph_model)); ASSERT_NO_FATAL_FAILURE(loadCNNNetwork(network)); #ifdef GNA_DEBUG - network.serialize("CNNNetworkFromNgraphModel.xml", "CNNNetworkFromNgraphModel.bin"); + // TODO: crash on activation tests so far on addOutput call + // network.serialize("CNNNetworkFromNgraphModel.xml", "CNNNetworkFromNgraphModel.bin"); #endif } else if (!_env.importedModelFileName.empty()) { -- 2.7.4
" << l << "
IR " << components[k].original_layer_name << "
dims" << components[k].num_rows_in << "x" << components[k].num_rows_out<< "
wscale" << components[k].op.affine.weight_scale_factor<< "