.gdb_history
.vimspector.json
doc/
+!ngraph/doc
docs/build_documentation/work_dir/
inference-engine/plugins/
inference-engine/temp
/model-optimizer/!CMakeLists.txt
/model-optimizer/*.mapping
/model-optimizer/*.dat
-/model-optimizer/*.svg
\ No newline at end of file
+/model-optimizer/*.svg
+
+# ngraph
+ngraph/src/CPackConfig.cmake
+ngraph/src/CPackSourceConfig.cmake
+ngraph/src/VERSION
+ngraph/src/gtest/
+ngraph/src/json/
+ngraph/src/ngraphConfig.cmake
+ngraph/src/ngraphConfigVersion.cmake
+ngraph/src/protobuf/
+ngraph/src/src/
+ngraph/src/test/
macro(ie_cpack)
set(CPACK_GENERATOR "TGZ")
+ string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
if(WIN32)
set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE})
- string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
else()
set(CPACK_PACKAGE_NAME inference-engine)
- string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
endif()
set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
COMPONENT python_samples)
endif()
+# install speech demo files
+
+if(SPEECH_LIBS_AND_DEMOS)
+ ie_cpack_add_component(speech_demo_files REQUIRED)
+
+ install(DIRECTORY ${TEMP}/deployment_tools
+ ${TEMP}/data_processing
+ DESTINATION .
+ COMPONENT speech_demo_files)
+endif()
+
#
# Developer package
#
# Check Cython version
if("${CYTHON_VERSION}" VERSION_LESS "0.29")
- message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}")
+ message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}")
else()
message(STATUS "Found Cython version ${CYTHON_VERSION}")
endif()
# Find Cython version
execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET)
-string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
+string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+(\\.[0-9]+)?).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION )
get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE)
set_source_files_properties(${PYX_FILE} PROPERTIES CYTHON_IS_CXX ON)
cython_add_module(${PYX_NAME} ${PYX_FILE})
+ add_dependencies(${TARGET_NAME} ${PYX_NAME})
target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
endforeach()
import pytest
import warnings
import threading
+from datetime import datetime
from openvino.inference_engine import ie_api as ie
from conftest import model_path, image_path
def test_async_infer_wait_time(device):
ie_core = ie.IECore()
net = ie_core.read_network(test_net_xml, test_net_bin)
- exec_net = ie_core.load_network(net, device, num_requests=1)
+ exec_net = ie_core.load_network(net, device, num_requests=2)
img = read_image()
request = exec_net.requests[0]
request.async_infer({'data': img})
- request.wait(100)
+ start_time = datetime.utcnow()
+ status = request.wait(ie.WaitMode.RESULT_READY)
+ assert status == ie.StatusCode.OK
+ time_delta = datetime.utcnow() - start_time
+ latency_ms = (time_delta.microseconds / 1000) + (time_delta.seconds * 1000)
+ timeout = max(100, latency_ms)
+ request = exec_net.requests[1]
+ request.async_infer({'data': img})
+ max_repeat = 10
+ status = ie.StatusCode.REQUEST_BUSY
+ i = 0
+ while i < max_repeat and status != ie.StatusCode.OK:
+ status = request.wait(timeout)
+ i += 1
+ assert status == ie.StatusCode.OK
res = request.output_blobs['fc_out'].buffer
assert np.argmax(res) == 2
del exec_net
static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
" in case of one input size.";
+// @brief message for quantization bits
+static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)";
+
/// @brief Define flag for showing help message <br>
DEFINE_bool(h, false, help_message);
/// @brief Define flag for input shape <br>
DEFINE_string(shape, "", shape_message);
+/// @brief Define flag for quantization bits (default 16)
+DEFINE_int32(qb, 16, gna_qb_message);
+
/**
* @brief This function show a help message
*/
std::cout << " -dump_config " << dump_config_message << std::endl;
std::cout << " -load_config " << load_config_message << std::endl;
#endif
+ std::cout << " -qb " << gna_qb_message << std::endl;
}
#include <inference_engine.hpp>
#include <vpu/vpu_plugin_config.hpp>
#include <cldnn/cldnn_config.hpp>
+#include <gna/gna_config.hpp>
#include <samples/common.hpp>
#include <samples/slog.hpp>
#include <samples/args_helper.hpp>
}
} else if (device == "MYRIAD") {
device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
+ } else if (device == "GNA") {
+ if (FLAGS_qb == 8)
+ device_config[GNA_CONFIG_KEY(PRECISION)] = "I8";
+ else
+ device_config[GNA_CONFIG_KEY(PRECISION)] = "I16";
+
+ if (isFlagSetInCommandLine("nthreads"))
+ device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
}
}
#include "details/caseless.hpp"
#include <details/ie_cnn_network_tools.h>
#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
#include <ngraph/op/fused/gelu.hpp>
#include <generic_ie.hpp>
#include <transformations/common_optimizations/common_optimizations.hpp>
std::shared_ptr<ICNNNetwork> clonedNetwork(nullptr);
if (network.getFunction()) {
const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
- return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) != nullptr;
+ return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
+ std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
};
CNNNetwork net(network.getFunction());
auto nGraphFunc = net.getFunction();
#include "blob_factory.hpp"
#include "precision_ex.hpp"
#include "layers/gna_layer_info.hpp"
+#include "weights_converter.hpp"
+#include "layer_transform.hpp"
namespace GNAPluginNS {
namespace frontend {
}
};
+template <typename T>
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+ auto prec_blob = InferenceEngine::make_shared_blob<T>({ precision,
+ fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
+ prec_blob->allocate();
+
+ int i = 0;
+ for (auto& precValue : *prec_blob) {
+ auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++] * scale_factor;
+ if (f32Value > std::numeric_limits<T>::max()) {
+ precValue = std::numeric_limits<T>::max();
+ } else if (f32Value < std::numeric_limits<T>::min()) {
+ precValue = std::numeric_limits<T>::min();
+ } else {
+ precValue = static_cast<T>(f32Value);
+ }
+ }
+
+ return static_cast<InferenceEngine::Blob::Ptr>(prec_blob);
+}
+
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+ InferenceEngine::Blob::Ptr result_ptr = nullptr;
+ switch (precision) {
+ case InferenceEngine::Precision::FP32:
+ result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor);
+ break;
+ case InferenceEngine::Precision::I32:
+ result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor);
+ break;
+ case InferenceEngine::Precision::I16:
+ result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor);
+ break;
+ case InferenceEngine::Precision::I8:
+ result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor);
+ break;
+ default:
+ THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
+ }
+ return result_ptr;
+}
+
template<class QuantDesc, class QuantFunc>
inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
InferenceEngine::WeightableLayer *wl,
}
cnnLayer->precision = Desc::mandatory().getInputPrecision();
+ if (cnnLayer->type == "Const") {
+ if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+ cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
+ }
+ auto const_scale_factor = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer)->_dst_quant.scale;
+ auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
+ auto const_blob = cnnLayer->blobs["custom"];
+ if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
+ cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
+ }
+ }
+
return true;
}
};
return true;
}
+ if (cnnLayer->type == "Const") {
+ auto blob = cnnLayer->blobs["custom"];
+ if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+ blob = make_fp32_blob(blob);
+ }
+ auto max_val = std::numeric_limits<float>::min();
+ auto min_val = std::numeric_limits<float>::max();
+
+ auto flt_buf = blob->buffer().as<float*>();
+ auto size = blob->size();
+
+ for (int i=0; i < size; i++) {
+ auto val = flt_buf[i];
+ if (val > max_val) max_val = val;
+ if (val < min_val) min_val = val;
+ }
+
+ auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
+ auto scale_val = static_cast<float>(std::numeric_limits<int16_t>::max()) / abs_val;
+
+ // TODO: Investigate what should be the scale in such cases (31910)
+ if (std::isinf(scale_val)) {
+ quant->_dst_quant.scale = quant->_src_quant.scale;
+ } else {
+ quant->_dst_quant.scale = scale_val;
+ }
+
+ return ScaleFactorUpdateResult();
+ }
+
if (!CNNNetHasPrevLayer(cnnLayer)) {
quant->_dst_quant.scale = quant->_src_quant.scale;
return ScaleFactorUpdateResult();
auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
+
auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);
switch (eltwiseLayer->_operation) {
quantData->_dst_quant.scale = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale;
break;
}
+ case InferenceEngine::EltwiseLayer::Sub:
case InferenceEngine::EltwiseLayer::Sum: {
// detect which input will be used as biases
if (LayerInfo(in0).has32BOutput()) {
}
// this path might result in significant data loss
+ quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
quantData->_dst_quant.scale = quantParams1->_dst_quant.scale;
#include "quantized_layer_params.hpp"
#include "precision_utils.h"
+inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) {
+ auto fp32_blob = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+ fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() });
+ fp32_blob->allocate();
+
+ int i = 0;
+ for (auto& f32Value : *fp32_blob) {
+ auto f16Value = fp16_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
+ f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
+ }
+
+ return static_cast<InferenceEngine::Blob::Ptr>(fp32_blob);
+}
+
inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) {
InferenceEngine::BlobMap newBlobs;
for (auto& blob : lp->blobs) {
if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first;
}
- auto tmp =
- InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
- blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C });
- tmp->allocate();
- int i = 0;
- for (auto& f32Value : *tmp) {
- auto f16Value = blob.second->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
- f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
- }
- newBlobs[blob.first] = tmp;
+ auto fp32_blob = make_fp32_blob(blob.second);
+ newBlobs[blob.first] = fp32_blob;
}
lp->_biases = newBlobs["biases"];
lp->_weights = newBlobs["weights"];
for (auto& dataItem : lp->outData) {
dataItem->setPrecision(InferenceEngine::Precision::FP32);
}
+ InferenceEngine::BlobMap newBlobs;
+ for (auto& blob_pair : lp->blobs) {
+ auto blob_name = blob_pair.first;
+ auto blob_ptr = blob_pair.second;
+ if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+ auto new_blob = make_fp32_blob(blob_ptr);
+ newBlobs[blob_name] = new_blob;
+ } else {
+ newBlobs[blob_name] = blob_ptr;
+ }
+ }
+
return true;
}
if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section";
}
- auto constBlob = constLayer->blobs["custom"];
+ auto const_blob = constLayer->blobs["custom"];
- void* ptr_for_const_blob = &ptr_for_const_blob;
- connectOutput(constLayer, ptr_for_const_blob, constBlob->size());
-
- const_connections[constLayer->name] = ptr_for_const_blob;
+ const_connections[constLayer->name] = &const_connections[constLayer->name];
+ void* ptr_for_const_blob = &const_connections[constLayer->name];
+ connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
// dont see practical use case when bind storage type need to be different that allocation type
- gnamem->readonly().bind_initializer(ptr_for_const_blob, [constBlob](void* data, size_t size) {
- ie_memcpy(data, size, constBlob->buffer(), constBlob->byteSize());
+ gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
+ ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
});
}
if (cropLayer == nullptr) {
return;
}
- if (cropLayer->axis.size() > 1) {
+
+ IE_ASSERT(!layer->insData.empty());
+ auto inputs = layer->insData.begin()->lock();
+
+ IE_ASSERT(!cropLayer->axis.empty());
+ IE_ASSERT(cropLayer->axis.size() == cropLayer->dim.size());
+ IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
+
+ std::vector<int> axis, dim, offset;
+ for (int n = 0; n < cropLayer->axis.size(); n++) {
+ uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]);
+ // Exclude crop layer components that do nothing
+ if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) {
+ continue;
+ }
+ axis.push_back(cropLayer->axis[n]);
+ dim.push_back(cropLayer->dim[n]);
+ offset.push_back(cropLayer->offset[n]);
+ }
+
+ if (axis.size() > 1) {
THROW_GNA_EXCEPTION <<
- "Crop layer does not support the number of cropped dimensions = "
- << cropLayer->axis.size() << ".";
+ "Crop layer does not support the number of (non-trivial) cropped dimensions more than 1, provided: "
+ << axis.size() << ".";
}
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
- size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
- size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size();
+ size_t cropOffset = offset.front() * cropLayer->precision.size();
+ size_t cropOutputSize = dim.front() * cropLayer->precision.size();
if (ALIGN64(cropOffset) == cropOffset) {
// leave crop as it is
} else {
gnalog() << "Crop " << layer->name << " is being replaced by Affine layer...\n";
IE_ASSERT(!layer->outData.empty());
- IE_ASSERT(!layer->insData.empty());
auto outputs = *layer->outData.begin();
- auto inputs = layer->insData.begin()->lock();
// only 1D crops supported
- if (cropLayer->axis.size() != 1) {
+ if (axis.size() != 1) {
THROW_GNA_EXCEPTION << "only 1D crop layer supported: " << cropLayer->name;
}
// TODO: add unit tests for 4d crops blobs
- uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[0]);
+ uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front());
uint32_t num_columns_in = 1;
- uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - cropLayer->axis[0]);
+ uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front());
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
void* ptr_inputs = nullptr;
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
- FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true);
+ FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
(quantized == nullptr) ?
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
int biasesLayerIdx = 1;
if (quantized) {
- if (eltwise._operation == EltwiseLayer::Sum) {
+ switch (eltwise._operation) {
+ case InferenceEngine::EltwiseLayer::Sum:
+ case InferenceEngine::EltwiseLayer::Sub:
+ {
if (inputs4Bytes->getPrecision().size() != 4) {
std::swap(inputs4Bytes, inputs2Bytes);
biasesLayerIdx = 0;
}
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4);
- } else {
+ break;
+ }
+ case InferenceEngine::EltwiseLayer::Prod:
+ {
// for mul both inputs should be 2 bytes precision
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2);
+ break;
+ }
+ default:
+ THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation;
}
}
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
switch (eltwise._operation) {
+ case EltwiseLayer::Sub:
+ if (quantized == nullptr) {
+ gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
+ } else {
+ auto scaledIdentity = -quantized->_weights_quant.scale;
+
+ auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
+
+ gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
+ }
+ connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
+ break;
case EltwiseLayer::Sum:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
key_config_map[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
- key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
- gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
key_config_map[GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)] =
gnaFlags.uniformPwlDesign ? PluginConfigParams::YES: PluginConfigParams::NO;
key_config_map[CONFIG_KEY(PERF_COUNT)] =
return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
InferenceEngine::EltwiseLayer::Sum;
}
+ bool isEltwiseSub() const noexcept {
+ IS_VALID();
+ if (!isEltwise()) return false;
+ // dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer) is validated in isEltwise function
+ // coverity[var_deref_op]
+ return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
+ InferenceEngine::EltwiseLayer::Sub;
+ }
+
bool isEltwiseMul() const noexcept {
IS_VALID();
if (!isEltwise()) return false;
auto prev1 = PrevFunctionalLayer(l, 1);
switch (eltwise->_operation) {
+ case EltwiseLayer::Sub:
case EltwiseLayer::Sum:
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
return prevLayers;
// for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
// for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights
- if (eltwise->_operation != EltwiseLayer::Sum)
+ if (eltwise->_operation != EltwiseLayer::Sum && eltwise->_operation != EltwiseLayer::Sub)
continue;
auto prevLayer1 = CNNNetPrevLayerSkipCertain(l, 1, [](CNNLayerPtr ptr) {
auto blobs = node.child("blobs");
if (!blobs.empty()) {
for (pugi::xml_node blob = blobs.first_child(); !blob.empty(); blob = blob.next_sibling()) {
- size_t size = GetUIntAttr(blob, "size", 0);
+ size_t size = GetUInt64Attr(blob, "size", 0);
uint64_t offset = GetUInt64Attr(blob, "offset", 0);
Precision precision(Precision::U8);
const std::string& preStr = GetStrAttr(blob, "precision", "");
std::vector<float> activations_beta = getParameters<float>(dn, "activations_beta", {});
float clip = GetFloatAttr(dn, "clip", 0.f);
return std::make_shared<ngraph::op::LSTMCell>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5],
- GetUIntAttr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
+ GetUInt64Attr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
activations, activations_alpha, activations_beta, clip);
}
if (dn.empty())
THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name;
- size_t offset = GetUIntAttr(dn, "offset");
- size_t size = GetUIntAttr(dn, "size");
+ size_t offset = GetUInt64Attr(dn, "offset");
+ size_t size = GetUInt64Attr(dn, "size");
if (!weights || weights->cbuffer() == nullptr)
THROW_IE_EXCEPTION << "Cannot read network! The model requires weights data! "
class XmlDeserializer : public ngraph::AttributeVisitor {
public:
explicit XmlDeserializer(const pugi::xml_node& node): node(node) {}
- void on_attribute(const std::string& name, std::string& value) override {
+ void on_adapter(const std::string& name, ngraph::ValueAccessor<std::string>& value) override {
std::string val;
if (!getStrAttribute(node.child("data"), name, val)) return;
- value = val;
+ value.set(val);
}
- void on_attribute(const std::string& name, bool& value) override {
+ void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& value) override {
std::string val;
if (!getStrAttribute(node.child("data"), name, val)) return;
std::transform(val.begin(), val.end(), val.begin(), [](char ch) {
bool is_false = false_names.find(val) != false_names.end();
if (!is_true && !is_false) return;
- value = is_true;
+ value.set(is_true);
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override {
std::string val;
CNNLayerPtr create();
- void on_attribute(const std::string& name, std::string& value) override {
- params[name] = value;
- }
-
- void on_attribute(const std::string& name, bool& value) override {
- params[name] = value ? "true" : "false";
+ void on_adapter(const std::string& name, ::ngraph::ValueAccessor<bool> &value) override {
+ params[name] = value.get() ? "true" : "false";
}
void addSpecificCreator(const std::vector<std::string>& forTypes, const CreatorFor& creator) {
res->params = params;
return res;
});
+
+ addSpecificCreator({"StaticShapeTopK"}, [](const std::shared_ptr<::ngraph::Node>& node,
+ const std::map<std::string, std::string> params) -> CNNLayerPtr {
+ LayerParams attrs = {node->get_friendly_name(), "TopK",
+ details::convertPrecision(node->get_output_element_type(0))};
+ auto res = std::make_shared<TopKLayer>(attrs);
+ res->params = params;
+ return res;
+ });
}
CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
std::make_shared<Builder::NodeConverter<::ngraph::op::Sign>>(),
std::make_shared<Builder::NodeConverter<::ngraph::op::Sinh>>(),
std::make_shared<Builder::NodeConverter<::ngraph::op::SquaredDifference>>(),
- std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Select>>(),
std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Softmax>>(),
std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Split>>(),
std::make_shared<Builder::NodeConverter<::ngraph::op::VariadicSplit>>(),
#include <string>
#include <vector>
#include <mutex>
+#include <algorithm>
#include <cnn_network_ngraph_impl.hpp>
#include "blob_factory.hpp"
return remainingConstLayers;
}
+static std::vector<std::string> skipConstInfer = {
+ "FakeQuantize",
+ "Quantize",
+ "CumSum" // Const inference function for CumSum is not implemented!
+};
+
const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::vector<CNNLayerPtr>& sortedLayers) {
std::map<std::string, bool> mapConstLayers;
// collect all const layers, which inputs are const layers.
// Layers with "Shape" and "Const" type are Const by definition
if (layer->type == "Shape" || layer->type == "Const") {
mapConstLayers[layer->name] = false;
- } else if ((layer->type != "FakeQuantize") && (layer->type != "Quantize") && (!isForFakeQuantzie(*layer))) {
+ } else if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end() && !isForFakeQuantzie(*layer)) {
bool isAllInputsConst = true;
for (auto const& data : layer->insData) {
auto creator = data.lock()->getCreatorLayer().lock();
};
for (const auto& layer : sortedLayers) {
- if (layer->type == "FakeQuantize" || layer->type == "Quantize") {
+ if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) != skipConstInfer.end()) {
continue;
}
auto implPtr = holder.getConstInferImpl(layer->type);
if (!implPtr && !isForShape)
- if (layer->type != "FakeQuantize" && layer->type != "Quantize")
+ if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
THROW_IE_EXCEPTION << "Failed to find reference implementation for `" + layer->name +
"` Layer with `" + layer->type + "` Type on constant propagation";
if (!isForShape) {
auto outputBlobs = getOutputBlobs(layer->outData);
auto inp = getInputBlobs(layer->insData, isForShape);
- if (layer->type != "FakeQuantize" && layer->type != "Quantize")
+ if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
implPtr->infer(inp, layer->params, layer->blobs, outputBlobs);
for (int i = 0; i < layer->outData.size(); i++) {
std::string dataName = layer->outData[i]->getName();
}
template <>
-CNNLayer::Ptr NodeConverter<ngraph::op::v1::Select>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
- LayerParams params = {layer->get_friendly_name(), "Select", details::convertPrecision(layer->get_output_element_type(0))};
-
- auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
- auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::Select>(layer);
- if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
-
- auto broadcast = castedLayer->get_auto_broadcast().m_type;
- if (broadcast == ngraph::op::AutoBroadcastType::NUMPY) {
- res->params["auto_broadcast"] = "numpy";
- } else if (broadcast == ngraph::op::AutoBroadcastType::NONE) {
- res->params["auto_broadcast"] = "none";
- }
-
- return res;
-}
-
-template <>
CNNLayer::Ptr NodeConverter<ngraph::op::DetectionOutput>::createLayer(
const std::shared_ptr<ngraph::Node>& layer) const {
LayerParams params = {layer->get_friendly_name(), "DetectionOutput",
set(TARGET_NAME "MKLDNNPlugin")
+if(ENABLE_LTO)
+ ie_enable_lto()
+endif()
+
if (WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX")
endif()
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_resample_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_imp.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/nodes/cum_sum.cpp
)
foreach(LAYER ${LAYERS})
memory::primitive_desc fetch() const {
memory::primitive_desc adesc;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc = mkldnn_primitive_desc_iterator_fetch(get());
memory::primitive_desc src_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
memory::primitive_desc dst_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
memory::primitive_desc diff_src_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
memory::primitive_desc weights_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
memory::primitive_desc diff_dst_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
template <typename T>
void getPrimitiveDescriptor(T& pdesc) const {
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
memory::primitive_desc cdescpd;
auto inputDesc = getInputDesc();
auto outputDesc = getOutputDesc();
if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) ||
- (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && inputDesc != outputDesc))
+ (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 &&
+ (inputDesc.getPrecision() != outputDesc.getPrecision() ||
+ inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc())))
THROW_IE_EXCEPTION << "Cannot allocate memory. Nodes have primitive descriptors with different formats.";
if (inputDesc.getLayout() == InferenceEngine::Layout::ANY)
THROW_IE_EXCEPTION << "Cannot get input descriptor!";
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mkldnn_layers_dispatcher.hpp"
-#include <details/ie_exception.hpp>
-#include "nodes/list.hpp"
-#include <memory>
-
-using namespace InferenceEngine;
-
-namespace MKLDNNPlugin {
-
-void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr) {
- if (!mngr)
- THROW_IE_EXCEPTION << "Cannot add default extensions! Extension manager is empty.";
-
- auto defaultExtensions = std::make_shared<Extensions::Cpu::MKLDNNExtensions>();
- mngr->AddExtension(defaultExtensions);
-}
-
-} // namespace MKLDNNPlugin
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "mkldnn_extension_mngr.h"
-
-namespace MKLDNNPlugin {
-
-void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr);
-
-} // namespace MKLDNNPlugin
size_t MKLDNNMemory::GetSize() const {
uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(GetDataType()));
+ return GetElementsCount() * itemSize;
+}
+size_t MKLDNNMemory::GetElementsCount() const {
auto desc = GetDescriptor();
std::vector<int> dims(desc.data.layout_desc.blocking.padding_dims,
desc.data.layout_desc.blocking.padding_dims + desc.data.ndims);
- return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>()) * itemSize;
+ return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>());
}
void MKLDNNMemory::Create(memory::dims dims, memory::data_type data_type, memory::format format, const void* data) {
case f::OhIw16o4i:
case f::OIhw4i16o4i:
case f::OhIw8o4i:
+ case f::IOhw16o16i:
ndims = 4; break;
// DHW
case f::ncdhw:
case memory::OhIw8o4i: return "OhIw8o4i";
case memory::OhIw16o4i: return "OhIw16o4i";
case memory::OIhw4i16o4i: return "OIhw4i16o4i";
+ case memory::IOhw16o16i: return "IOhw16o16i";
case memory::oidhw: return "oidhw";
case memory::dhwio: return "dhwio";
blkDims.push_back(16);
layout = Layout::BLOCKED;
break;
+ case memory::OIhw8o8i:
+ order = {0, 1, 2, 3, 0, 1};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+ blkDims.push_back(8);
+ blkDims.push_back(8);
+ layout = Layout::BLOCKED;
+ break;
+ case memory::OIhw16o16i:
+ order = {0, 1, 2, 3, 0, 1};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+ blkDims.push_back(16);
+ blkDims.push_back(16);
+ layout = Layout::BLOCKED;
+ break;
+ case memory::IOhw16o16i:
+ order = {1, 0, 2, 3, 0, 1};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+ blkDims.push_back(16);
+ blkDims.push_back(16);
+ layout = Layout::BLOCKED;
+ break;
case memory::OIdhw8i8o:
order = {0, 1, 2, 3, 4, 1, 0};
blkDims = dims;
blkDims.push_back(16);
layout = Layout::BLOCKED;
break;
+ case memory::OIdhw8o8i:
+ order = {0, 1, 2, 3, 4, 1, 0};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+ blkDims.push_back(8);
+ blkDims.push_back(8);
+ layout = Layout::BLOCKED;
+ break;
+ case memory::OIdhw16o16i:
+ order = {0, 1, 2, 3, 4, 0, 1};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+ blkDims.push_back(16);
+ blkDims.push_back(16);
+ layout = Layout::BLOCKED;
+ break;
case memory::gOIhw4o4i:
- order = {0, 1, 2, 3, 4, 2, 1};
+ order = {0, 1, 2, 3, 4, 1, 2};
blkDims = dims;
blkDims[1] = blkDims[1] / 4 + (blkDims[1] % 4 ? 1 : 0);
blkDims[2] = blkDims[2] / 4 + (blkDims[2] % 4 ? 1 : 0);
blkDims.push_back(8);
layout = Layout::BLOCKED;
break;
+ case memory::gOIhw8o8i:
+ order = {0, 1, 2, 3, 4, 1, 2};
+ blkDims = dims;
+ blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+ blkDims[2] = blkDims[2] / 8 + (blkDims[2] % 8 ? 1 : 0);
+ blkDims.push_back(8);
+ blkDims.push_back(8);
+ layout = Layout::BLOCKED;
+ break;
case memory::gOIhw16i16o:
order = {0, 1, 2, 3, 4, 2, 1};
blkDims = dims;
blkDims.push_back(16);
layout = Layout::BLOCKED;
break;
+ case memory::gOIhw16o16i:
+ order = {0, 1, 2, 3, 4, 1, 2};
+ blkDims = dims;
+ blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+ blkDims[2] = blkDims[2] / 16 + (blkDims[2] % 16 ? 1 : 0);
+ blkDims.push_back(16);
+ blkDims.push_back(16);
+ layout = Layout::BLOCKED;
+ break;
case memory::OhIw8o4i:
order = {0, 2, 1, 3, 0, 1};
blkDims = dims;
} else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
mkldnnFormat = memory::format::OIhw16i16o;
}
+ } else if (order.size() == 6 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
+ if (blkdDims[4] == 8 && blkdDims[5] == 8) {
+ mkldnnFormat = memory::format::OIhw8o8i;
+ } else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
+ mkldnnFormat = memory::format::OIhw16o16i;
+ }
+ } else if (order.size() == 6 && order[0] == 1 && order[1] == 0 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
+ if (blkdDims[4] == 16 && blkdDims[5] == 16) {
+ mkldnnFormat = memory::format::IOhw16o16i;
+ }
} else if (order.size() == 5 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0) {
if (blkdDims[4] == 8) {
mkldnnFormat = memory::format::Ohwi8o;
mkldnnFormat = memory::format::OIdhw16i16o;
}
} else if (order.size() == 7 &&
+ order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
+ if (blkdDims[6] == 8) {
+ mkldnnFormat = memory::format::OIdhw8o8i;
+ } else if (blkdDims[6] == 16) {
+ mkldnnFormat = memory::format::OIdhw16o16i;
+ }
+ } else if (order.size() == 7 &&
order[0] == 0 && order[1] == 2 && order[2] == 3 && order[3] == 1 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
if (blkdDims[5] == 8) {
mkldnnFormat = memory::format::OdhIw8o4i;
} else if (order.size() == 7 &&
order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 2 && order[6] == 1) {
if (blkdDims[6] == 4) {
- mkldnnFormat = memory::format::gOIhw4o4i;
+ mkldnnFormat = memory::format::gOIhw4i4o;
} else if (blkdDims[6] == 8) {
mkldnnFormat = memory::format::gOIhw8i8o;
} else if (blkdDims[6] == 16) {
mkldnnFormat = memory::format::gOIhw16i16o;
}
} else if (order.size() == 7 &&
+ order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
+ if (blkdDims[6] == 4) {
+ mkldnnFormat = memory::format::gOIhw4o4i;
+ } else if (blkdDims[6] == 8) {
+ mkldnnFormat = memory::format::gOIhw8o8i;
+ } else if (blkdDims[6] == 16) {
+ mkldnnFormat = memory::format::gOIhw16o16i;
+ }
+ } else if (order.size() == 7 &&
order[0] == 0 && order[1] == 1 && order[2] == 3 && order[3] == 2 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
if (blkdDims[5] == 8 && blkdDims[6] == 4) {
mkldnnFormat = memory::format::gOhIw8o4i;
}
size_t GetSize() const;
+ size_t GetElementsCount() const;
mkldnn::memory::format GetFormat() const {
return static_cast<mkldnn::memory::format>(prim->get_primitive_desc().desc().data.format);
auto config = selected_pd->getConfig();
if (!isInitConfig(config)) {
for (size_t i = 0; i < config.inConfs.size(); i++) {
- config.inConfs[i].desc = getConfiguredInputDesc(config, i);
+ // TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field.
+ // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
+ config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i));
}
for (size_t i = 0; i < config.outConfs.size(); i++) {
- config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
+ // TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field.
+ // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
+ config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i));
}
+
initDescriptor(config);
} else if (getType() != RNNSeq && getType() != RNNCell) {
initDescriptor(config);
if (srcDescs.empty() || selectedDescs.empty())
return false;
for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) {
- if (srcDescs[i] != selectedDescs[i].desc && srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
+ if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() &&
+ srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() &&
+ srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) &&
+ srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
return false;
}
return true;
#include "ie_metric_helpers.hpp"
#include "mkldnn_plugin.h"
#include "mkldnn_extension_mngr.h"
-#include "mkldnn_layers_dispatcher.hpp"
#include "mkldnn_weights_cache.hpp"
#include <cpp_interfaces/base/ie_plugin_base.hpp>
#include <threading/ie_executor_manager.hpp>
#include <tuple>
#include <ie_system_conf.h>
#include <generic_ie.hpp>
+#include <nodes/list.hpp>
#include "convert_function_to_cnn_network.hpp"
#include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
#include <ngraph/op/fused/gelu.hpp>
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
Engine::Engine() {
_pluginName = "CPU";
- addDefaultExtensions(extensionManager);
+ extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
}
Engine::~Engine() {
const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
- std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
+ std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node) ||
+ std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
};
auto nGraphFunc = clonedNetwork->getFunction();
// Disable shape inference (WA for generic operations)
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include "argmax_imp.hpp"
argmax_conf conf;
};
-REG_FACTORY_FOR(ImplFactory<ArgMaxImpl>, ArgMax);
+REG_FACTORY_FOR(ArgMaxImpl, ArgMax);
} // namespace Cpu
} // namespace Extensions
#include "argmax_imp.hpp"
+#include <cstring>
#include <algorithm>
#include <string>
#include <vector>
vmask_type vmask;
int s_index = i0 * dim * after_num + ib1 * block_size;
- memset(reinterpret_cast<void*>(&vmax_values[0]), 0, sizeof(vmax_values));
+ std::memset(reinterpret_cast<void*>(&vmax_values[0]), 0, sizeof(vmax_values));
auto vswap_func = [&](int index1, int index2) {
vtmp = vmax_values[index1];
#include <ie_iextension.h>
#include "ie_util_internal.hpp"
-#include "list.hpp"
+#include "nodes/list.hpp"
#include <string>
#include <vector>
IE_SUPPRESS_DEPRECATED_END
+template <typename __prim>
+inline void extRegister(MKLDNNExtensions * extInstance, const char * __type) {
+ IE_SUPPRESS_DEPRECATED_START
+ extInstance->AddExt(__type,
+ [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+ return new __prim(layer);
+ });
+ IE_SUPPRESS_DEPRECATED_END
+}
+
+#define REG_FACTORY_FOR(__prim, __type) \
+ void __prim ## __type(MKLDNNExtensions * extInstance) { \
+ extRegister<ImplFactory<__prim>>(extInstance, #__type); \
+ }
+
} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
std::vector<size_t> _crops_end;
};
-REG_FACTORY_FOR(ImplFactory<BatchToSpaceImpl>, BatchToSpace);
+REG_FACTORY_FOR(BatchToSpaceImpl, BatchToSpace);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
const size_t BROADCAST_SHAPE = 1;
};
-REG_FACTORY_FOR(ImplFactory<BroadcastImpl>, Broadcast);
+REG_FACTORY_FOR(BroadcastImpl, Broadcast);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
bool with_bins = false;
};
-REG_FACTORY_FOR(ImplFactory<BucketizeImpl>, Bucketize);
+REG_FACTORY_FOR(BucketizeImpl, Bucketize);
} // namespace Cpu
} // namespace Extensions
#if defined(HAVE_AVX512F)
namespace AVX512F {
+ static inline __m512 _mm_uni_any_ps() {
+ return __m512{};
+ }
+
+ static inline __m512i _mm_uni_any_epi32() {
+ return __m512i{};
+ }
+
static inline __m512 _mm_uni_loadu_ps(const float* psrc) {
- return _mm512_loadu_ps(psrc);
+ return _mm512_mask_loadu_ps(_mm_uni_any_ps(), (__mmask16)-1, psrc);
}
static inline void _mm_uni_storeu_ps(float* pdst, const __m512& vec) {
return _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vec0), _mm512_castps_si512(vec1)));
}
+ static inline __m512i _mm_uni_set1_epi32(int value) {
+ return _mm512_mask_set1_epi32(_mm_uni_any_epi32(), (__mmask16)-1, value);
+ }
+
static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __m512 vmask) {
- return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm512_set1_epi32(0)), vec0, vec1);
+ return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm_uni_set1_epi32(0)), vec0, vec1);
}
static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __mmask16 vmask) {
return _mm512_add_epi32(vec0, vec1);
}
- static inline __m512i _mm_uni_set1_epi32(int value) {
- return _mm512_set1_epi32(value);
- }
-
static inline __m512i _mm_uni_slli_epi32(__m512i vec, int value) {
return _mm512_sll_epi32(vec, _mm_set1_epi64x(value));
}
}
static inline __m512 _mm_uni_cvtepi32_ps(__m512i vec) {
- return _mm512_cvtepi32_ps(vec);
+ return _mm512_mask_cvtepi32_ps(_mm_uni_any_ps(), (__mmask16)-1, vec);
}
} // namespace AVX512F
#elif defined(HAVE_AVX2)
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
std::string precision;
};
-REG_FACTORY_FOR(ImplFactory<ConvertImpl>, Convert);
+REG_FACTORY_FOR(ConvertImpl, Convert);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
}
};
-REG_FACTORY_FOR(ImplFactory<CTCGreedyDecoderImpl>, CTCGreedyDecoder);
+REG_FACTORY_FOR(CTCGreedyDecoderImpl, CTCGreedyDecoder);
} // namespace Cpu
} // namespace Extensions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "list.hpp"
+#include "base.hpp"
+
+#include <string>
+#include <vector>
+#include "ie_parallel.hpp"
+#include "ie_precision.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class CumSumImpl: public ExtLayerBase {
+ enum { CUM_SUM_DATA, AXIS, numOfInputs };
+ enum { N, C, D, H, W, numOfDims };
+ bool exclusive;
+ bool reverse;
+ size_t axis = 0;
+ std::vector<size_t> shape5d;
+
+public:
+ explicit CumSumImpl(const CNNLayer* layer) {
+ try {
+ layerName = layer->name;
+ if ((layer->insData.size() != numOfInputs && layer->insData.size() != (numOfInputs - 1)) || layer->outData.size() != 1)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has incorrect number of input/output edges!";
+
+ const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc();
+ const auto &dataShape = dataTensor.getDims();
+ if (dataShape.size() < 1 || dataShape.size() > 5) {
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size();
+ }
+
+ exclusive = layer->GetParamAsBool("exclusive", false);
+ reverse = layer->GetParamAsBool("reverse", false);
+
+ const auto& dataPrecision = dataTensor.getPrecision();
+ if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
+ dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'data' input precision: " << dataPrecision.name();
+
+ if (layer->insData.size() == numOfInputs) {
+ const auto& axisTensor = layer->insData[AXIS].lock()->getTensorDesc();
+ const auto& axisTensorPrec = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
+ if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'axis' input precision: " << axisTensorPrec.name();
+
+ const auto axisTensorRank = axisTensor.getDims().size();
+ if (axisTensorRank != 0)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with rank: " << axisTensorRank;
+ }
+
+ if (dataShape != layer->outData[0]->getTensorDesc().getDims())
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions";
+
+ shape5d = get5dShape(dataShape);
+
+ LayerConfig config;
+ for (size_t i = 0; i < layer->insData.size(); i++) {
+ DataConfig inConfig;
+ inConfig.inPlace = -1;
+ inConfig.constant = false;
+
+ Precision inPrecision = layer->insData[i].lock()->getTensorDesc().getPrecision();
+ if (inPrecision == Precision::BF16)
+ inPrecision = Precision::FP32;
+ const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims();
+ inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
+
+ config.inConfs.push_back(inConfig);
+ }
+ DataConfig outConfig;
+ outConfig.inPlace = -1;
+ outConfig.constant = false;
+ Precision outPrecision = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc().getPrecision();
+ if (outPrecision == Precision::BF16)
+ outPrecision = Precision::FP32;
+ const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
+ outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
+
+ config.outConfs.push_back(outConfig);
+
+ config.dynBatchSupport = false;
+ confs.push_back(config);
+ } catch (InferenceEngine::details::InferenceEngineException &ex) {
+ errorMsg = ex.what();
+ }
+ }
+
+ StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+ if (inputs.size() == numOfInputs)
+ axis = getAxis(inputs[AXIS], inputs[CUM_SUM_DATA]);
+
+ const auto &dataPrecision = inputs[CUM_SUM_DATA]->getTensorDesc().getPrecision();
+ switch (dataPrecision) {
+ case Precision::I8 : { execImpl<int8_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::U8 : { execImpl<uint8_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::I16 : { execImpl<int16_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::I32 : { execImpl<int32_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::FP32 : { execImpl<float>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::I64 : { execImpl<int64_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::U64 : { execImpl<uint64_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ default : {
+ if (resp) {
+ std::string errorMsg = "CumSum layer with name '" + layerName + "' has unsupported 'data' input precision: " + dataPrecision.name();
+ errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+ }
+ return GENERAL_ERROR;
+ }
+ }
+ return OK;
+ }
+
+private:
+ template <typename dataType>
+ void execImpl(const Blob::CPtr& _input, const Blob::Ptr& _output) {
+ const auto *input = _input->cbuffer().as<const dataType *>() + _input->getTensorDesc().getBlockingDesc().getOffsetPadding();
+ auto *output = _output->buffer().as<dataType *>() + _output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+ const size_t offset = _input->getTensorDesc().getBlockingDesc().getStrides()[axis];
+
+ if (reverse) {
+ if (exclusive) {
+ cumSum<true, true, dataType>(input, output, offset);
+ } else {
+ cumSum<true, false, dataType>(input, output, offset);
+ }
+ } else {
+ if (exclusive) {
+ cumSum<false, true, dataType>(input, output, offset);
+ } else {
+ cumSum<false, false, dataType>(input, output, offset);
+ }
+ }
+ }
+
+ template <bool reverse, bool exclusive, typename dataType>
+ void cumSum(const dataType *input, dataType *output, const size_t &offset) {
+ std::vector<size_t> iterationRange(numOfDims - 1);
+ size_t j = 0;
+ for (size_t i = 0; i < shape5d.size(); i++) {
+ if (i == axis)
+ continue;
+ iterationRange[j++] = shape5d[i];
+ }
+ parallel_for4d(iterationRange[0], iterationRange[1], iterationRange[2], iterationRange[3], [&](size_t ir0, size_t ir1, size_t ir2, size_t ir3) {
+ std::vector<size_t> forStartOffset;
+ forStartOffset.push_back(ir0); forStartOffset.push_back(ir1); forStartOffset.push_back(ir2); forStartOffset.push_back(ir3);
+ forStartOffset.insert(forStartOffset.begin() + axis, 0);
+ size_t startOffset = getStartOffset(forStartOffset);
+
+ const dataType *inputStart = input + startOffset;
+ dataType *outputStart = output + startOffset;
+
+ if (reverse) {
+ if (exclusive) {
+ outputStart[offset*(shape5d[axis] - 1)] = 0;
+ for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
+ outputStart[i*offset] = inputStart[(i+1)*offset] + outputStart[(i+1)*offset];
+ }
+ } else {
+ outputStart[offset*(shape5d[axis] - 1)] = inputStart[offset*(shape5d[axis] - 1)];
+ for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
+ outputStart[i*offset] = inputStart[i*offset] + outputStart[(i+1)*offset];
+ }
+ }
+ } else {
+ if (exclusive) {
+ outputStart[0] = 0;
+ for (size_t i = 1; i < shape5d[axis]; i++) {
+ outputStart[i*offset] = inputStart[(i-1)*offset] + outputStart[(i-1)*offset];
+ }
+ } else {
+ outputStart[0] = inputStart[0];
+ for (size_t i = 1; i < shape5d[axis]; i++) {
+ outputStart[i*offset] = inputStart[i*offset] + outputStart[(i-1)*offset];
+ }
+ }
+ }
+ });
+ }
+
+ size_t getStartOffset(std::vector<size_t> &forStartOffset) {
+ return forStartOffset[N]*shape5d[C]*shape5d[D]*shape5d[H]*shape5d[W] + forStartOffset[C]*shape5d[D]*shape5d[H]*shape5d[W] +
+ forStartOffset[D]*shape5d[H]*shape5d[W] + forStartOffset[H]*shape5d[W] + forStartOffset[W];
+ }
+
+ size_t getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) {
+ const auto& axisPrecision = _axis->getTensorDesc().getPrecision();
+ const int64_t dataShapeSize = static_cast<int64_t>(_data->getTensorDesc().getDims().size());
+ int64_t axisValueFromBlob;
+ switch (axisPrecision) {
+ case Precision::I32 : {
+ const auto *axisPtr = _axis->cbuffer().as<const int32_t *>();
+ axisValueFromBlob = static_cast<int64_t>(axisPtr[0]);
+ break;
+ }
+ case Precision::I64 : {
+ const auto *axisPtr = _axis->cbuffer().as<const int64_t *>();
+ axisValueFromBlob = axisPtr[0];
+ break;
+ }
+ default : {
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input with precision: " << axisPrecision.name();
+ }
+ }
+ if (axisValueFromBlob < -dataShapeSize || axisValueFromBlob > dataShapeSize - 1)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has axis with a value out of range: " << axisValueFromBlob;
+ return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize);
+ }
+
+ std::vector<size_t> get5dShape(const SizeVector& dims) {
+ std::vector<size_t> shape5d(numOfDims, 1);
+ for (size_t i = 0; i < dims.size(); i++)
+ shape5d[i] = dims[i];
+ return shape5d;
+ }
+
+private:
+ std::string layerName;
+};
+
+REG_FACTORY_FOR(CumSumImpl, CumSum);
+
+} // namespace Cpu
+} // namespace Extensions
+} // namespace InferenceEngine
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
size_t ownStrides[CNTR_SIZE];
};
-REG_FACTORY_FOR(ImplFactory<DepthToSpaceImpl>, DepthToSpace);
+REG_FACTORY_FOR(DepthToSpaceImpl, DepthToSpace);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cfloat>
}
}
-REG_FACTORY_FOR(ImplFactory<DetectionOutputImpl>, DetectionOutput);
+REG_FACTORY_FOR(DetectionOutputImpl, DetectionOutput);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cassert>
-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronDetectionOutputImpl>, ExperimentalDetectronDetectionOutput);
+REG_FACTORY_FOR(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
const size_t FILL_VALUE = 1;
};
-REG_FACTORY_FOR(ImplFactory<FillImpl>, Fill);
+REG_FACTORY_FOR(FillImpl, Fill);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
};
-REG_FACTORY_FOR(ImplFactory<GatherImpl>, Gather);
+REG_FACTORY_FOR(GatherImpl, Gather);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
InferenceEngine::Precision precision;
};
-REG_FACTORY_FOR(ImplFactory<GatherTreeImpl>, GatherTree);
+REG_FACTORY_FOR(GatherTreeImpl, GatherTree);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
float bias = 1.0f;
};
-REG_FACTORY_FOR(ImplFactory<GRNImpl>, GRN);
+REG_FACTORY_FOR(GRNImpl, GRN);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <string>
#include <vector>
}
};
-REG_FACTORY_FOR(ImplFactory<InterpImpl>, Interp);
+REG_FACTORY_FOR(InterpImpl, Interp);
} // namespace Cpu
} // namespace Extensions
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "nodes/list.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+#define FACTORY_DECLARATION(__prim, __type) \
+ void __prim ## __type(MKLDNNExtensions * extInstance)
+
+#define FACTORY_CALL(__prim, __type) \
+ __prim ## __type(this)
+
+#define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_DECLARATION(__prim, __type)
+# include "list_tbl.hpp"
+#undef MKLDNN_EXTENSION_NODE
+
+MKLDNNExtensions::MKLDNNExtensions() {
+ #define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_CALL(__prim, __type)
+ # include "list_tbl.hpp"
+ #undef MKLDNN_EXTENSION_NODE
+}
+
+} // namespace Cpu
+} // namespace Extensions
+} // namespace InferenceEngine
#include <memory>
#include <algorithm>
-// WA for xbyak.h
-#ifdef _WIN32
-# ifndef _WINSOCKAPI_
-# define _WINSOCKAPI_
-# endif
-# ifndef _WINSOCK2API_
-# define _WINSOCK2API_
-# endif
-#endif
-#include <cpu_isa_traits.hpp>
-
namespace InferenceEngine {
namespace Extensions {
namespace Cpu {
class MKLDNNExtensions : public IExtension {
public:
+ MKLDNNExtensions();
+
StatusCode getPrimitiveTypes(char**& types, unsigned int& size, ResponseDesc* resp) noexcept override {
- collectTypes(types, size, MKLDNNExtensions::GetExtensionsHolder()->list);
+ collectTypes(types, size, extensionsHolder->list);
return OK;
}
StatusCode
getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept override {
- auto& factories = MKLDNNExtensions::GetExtensionsHolder()->list;
+ auto& factories = extensionsHolder->list;
if (factories.find(cnnLayer->type) == factories.end()) {
std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
delete this;
}
- static void AddExt(std::string name, ext_factory factory) {
- auto extensionsHolder = GetExtensionsHolder();
- if (extensionsHolder != nullptr)
- extensionsHolder->list[name] = factory;
- }
-
- static std::shared_ptr<ExtensionsHolder> GetExtensionsHolder() {
- static std::shared_ptr<ExtensionsHolder> localHolder;
- if (localHolder == nullptr) {
- localHolder = std::make_shared<ExtensionsHolder>();
- }
- return localHolder;
+ void AddExt(std::string name, ext_factory factory) {
+ extensionsHolder->list[name] = factory;
}
private:
+ std::shared_ptr<ExtensionsHolder> extensionsHolder = std::make_shared<ExtensionsHolder>();
+
template<class T>
void collectTypes(char**& types, unsigned int& size, const std::map<std::string, T> &factories) {
types = new char *[factories.size()];
IE_SUPPRESS_DEPRECATED_END
-template<typename Ext>
-class ExtRegisterBase {
-public:
- explicit ExtRegisterBase(const std::string& type) {
- IE_SUPPRESS_DEPRECATED_START
- MKLDNNExtensions::AddExt(type,
- [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
- return new Ext(layer);
- });
- IE_SUPPRESS_DEPRECATED_END
- }
-};
-
-#define REG_FACTORY_FOR(__prim, __type) \
-static ExtRegisterBase<__prim> __reg__##__type(#__type)
-
} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef MKLDNN_EXTENSION_NODE
+# warning "MKLDNN_EXTENSION_NODE is not defined"
+# define MKLDNN_EXTENSION_NODE(__prim, __type)
+#endif
+
+MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox);
+MKLDNN_EXTENSION_NODE(MathImpl, Abs);
+MKLDNN_EXTENSION_NODE(MathImpl, Acos);
+MKLDNN_EXTENSION_NODE(MathImpl, Acosh);
+MKLDNN_EXTENSION_NODE(MathImpl, Asin);
+MKLDNN_EXTENSION_NODE(MathImpl, Asinh);
+MKLDNN_EXTENSION_NODE(MathImpl, Atan);
+MKLDNN_EXTENSION_NODE(MathImpl, Atanh);
+MKLDNN_EXTENSION_NODE(MathImpl, Ceil);
+MKLDNN_EXTENSION_NODE(MathImpl, Cos);
+MKLDNN_EXTENSION_NODE(MathImpl, Cosh);
+MKLDNN_EXTENSION_NODE(MathImpl, Erf);
+MKLDNN_EXTENSION_NODE(MathImpl, Floor);
+MKLDNN_EXTENSION_NODE(MathImpl, HardSigmoid);
+MKLDNN_EXTENSION_NODE(MathImpl, Log);
+MKLDNN_EXTENSION_NODE(MathImpl, Neg);
+MKLDNN_EXTENSION_NODE(MathImpl, Reciprocal);
+MKLDNN_EXTENSION_NODE(MathImpl, Selu);
+MKLDNN_EXTENSION_NODE(MathImpl, Sign);
+MKLDNN_EXTENSION_NODE(MathImpl, Sin);
+MKLDNN_EXTENSION_NODE(MathImpl, Sinh);
+MKLDNN_EXTENSION_NODE(MathImpl, Softplus);
+MKLDNN_EXTENSION_NODE(MathImpl, Softsign);
+MKLDNN_EXTENSION_NODE(MathImpl, Tan);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
+MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence);
+MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput);
+MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax);
+MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze);
+MKLDNN_EXTENSION_NODE(StridedSliceImpl, StridedSlice);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
+MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
+MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
+MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
+MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
+MKLDNN_EXTENSION_NODE(ConvertImpl, Convert);
+MKLDNN_EXTENSION_NODE(FillImpl, Fill);
+MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
+MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
+MKLDNN_EXTENSION_NODE(DepthToSpaceImpl, DepthToSpace);
+MKLDNN_EXTENSION_NODE(ScatterImpl, ScatterUpdate);
+MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot);
+MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast);
+MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
+MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
+MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
+MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression);
+MKLDNN_EXTENSION_NODE(TopKImpl, TopK);
+MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels);
+MKLDNN_EXTENSION_NODE(SpaceToDepthImpl, SpaceToDepth);
+MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile);
+MKLDNN_EXTENSION_NODE(InterpImpl, Interp);
+MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
+MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
+MKLDNN_EXTENSION_NODE(PadImpl, Pad);
+MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
+MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
+MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);
+MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder);
+MKLDNN_EXTENSION_NODE(GatherImpl, Gather);
+MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal);
+MKLDNN_EXTENSION_NODE(RangeImpl, Range);
+MKLDNN_EXTENSION_NODE(SelectImpl, Select);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceAnd);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL1);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL2);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSum);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSumExp);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMax);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMean);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMin);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceOr);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceProd);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSum);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSumSquare);
+MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree);
+MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered);
+MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
+MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
bool is_last_dim = false;
};
-REG_FACTORY_FOR(ImplFactory<LogSoftmaxImpl>, LogSoftmax);
+REG_FACTORY_FOR(LogSoftmaxImpl, LogSoftmax);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
float gamma = 0.0f;
};
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Abs);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Acos);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Acosh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Asin);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Asinh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Atan);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Atanh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Ceil);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Cos);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Cosh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Erf);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Floor);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, HardSigmoid);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Log);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Neg);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Reciprocal);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Selu);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sign);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sin);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Sinh);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Softplus);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Softsign);
-REG_FACTORY_FOR(ImplFactory<MathImpl>, Tan);
+REG_FACTORY_FOR(MathImpl, Abs);
+REG_FACTORY_FOR(MathImpl, Acos);
+REG_FACTORY_FOR(MathImpl, Acosh);
+REG_FACTORY_FOR(MathImpl, Asin);
+REG_FACTORY_FOR(MathImpl, Asinh);
+REG_FACTORY_FOR(MathImpl, Atan);
+REG_FACTORY_FOR(MathImpl, Atanh);
+REG_FACTORY_FOR(MathImpl, Ceil);
+REG_FACTORY_FOR(MathImpl, Cos);
+REG_FACTORY_FOR(MathImpl, Cosh);
+REG_FACTORY_FOR(MathImpl, Erf);
+REG_FACTORY_FOR(MathImpl, Floor);
+REG_FACTORY_FOR(MathImpl, HardSigmoid);
+REG_FACTORY_FOR(MathImpl, Log);
+REG_FACTORY_FOR(MathImpl, Neg);
+REG_FACTORY_FOR(MathImpl, Reciprocal);
+REG_FACTORY_FOR(MathImpl, Selu);
+REG_FACTORY_FOR(MathImpl, Sign);
+REG_FACTORY_FOR(MathImpl, Sin);
+REG_FACTORY_FOR(MathImpl, Sinh);
+REG_FACTORY_FOR(MathImpl, Softplus);
+REG_FACTORY_FOR(MathImpl, Softsign);
+REG_FACTORY_FOR(MathImpl, Tan);
} // namespace Cpu
} // namespace Extensions
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::primitive_desc &primitive_desc) const {
memory::primitive_desc aprimitive_desc;
- mkldnn_primitive_desc_t bndesc;
+ mkldnn_primitive_desc_t bndesc = nullptr;
mkldnn_batch_normalization_desc_t *p;
error::wrap_c_api(mkldnn_primitive_desc_query(
primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primitive_desc &primitive_desc) const {
memory::primitive_desc aprimitive_desc;
- mkldnn_primitive_desc_t bndesc;
+ mkldnn_primitive_desc_t bndesc = nullptr;
mkldnn_batch_normalization_desc_t *p;
error::wrap_c_api(mkldnn_primitive_desc_query(
primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const memory::primitive_desc &primitive_desc) const {
memory::primitive_desc adesc;
- mkldnn_primitive_desc_t bndesc;
+ mkldnn_primitive_desc_t bndesc = nullptr;
const_mkldnn_primitive_desc_t const_bndesc =
mkldnn_primitive_desc_query_pd(primitive_desc.get(),
mkldnn::convert_to_c(weights_pd), 0);
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include <ie_layers_internal.hpp>
+
+// WA for xbyak.h
+#ifdef _WIN32
+# ifndef _WINSOCKAPI_
+# define _WINSOCKAPI_
+# endif
+# ifndef _WINSOCK2API_
+# define _WINSOCK2API_
+# endif
+#endif
#include "cpu_isa_traits.hpp"
using namespace mkldnn;
getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::ndhwc : memory::nhwc);
createDescriptor({in_candidate}, {out_candidate});
} else {
- inputDataType = convLayer->input()->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
- outputDataType = convLayer->outData[0]->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
+ inputDataType = (convLayer->input()->getPrecision() == Precision::BF16
+ && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
+ outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16
+ && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
eltwisePrecision = Precision::FP32;
for (int i = 0; i < fusedWith.size(); i++) {
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
precision = InferenceEngine::Precision::FP32;
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
- if (getParentEdges().size() != 1)
+ if (getParentEdges().empty() || getParentEdges().size() > 3)
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
if (getChildEdges().empty())
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
auto * deconvLayer = dynamic_cast<DeconvolutionLayer*>(getCnnLayer().get());
if (deconvLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot convert deconvolution layer.";
- if (deconvLayer->_weights == nullptr) {
+ if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) {
THROW_IE_EXCEPTION << "Weights are empty for layer: " << deconvLayer->name
<< " used in MKLDNN node: " << getName() << "\n"
<< "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
withGroups = (deconvLayer->_group > 1);
isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth &&
deconvLayer->_group == deconvLayer->input()->getDims()[1];
- withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0);
+
+ bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3;
if (withBiases) {
- biases = deconvLayer->_biases;
+ Blob::Ptr biases;
+
+ if (getParentEdges().size() == 3) {
+ auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer();
+ if (biasLayer->type != "Const")
+ THROW_IE_EXCEPTION << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases";
+ biases = biasLayer->blobs["custom"];
+ } else {
+ biases = deconvLayer->_biases;
+ }
+
// WA: we add bias as depthwise post op
- setBiasAsPostOp();
+ setBiasAsPostOp(biases);
}
/* Original layout format for deconv weights is iohw (from Caffe).
weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]);
}
- internalBlobs.push_back(createInternalBlob(weightDims, true));
+ if (getParentEdges().size() == 1)
+ internalBlobs.push_back(createInternalBlob(weightDims, true));
invertVectorCopyUtoI(deconvLayer->_stride, stride);
for (int i = 1; i <= deconvLayer->_dilation.size(); i++) {
}
}
-void MKLDNNDeconvolutionNode::setBiasAsPostOp() {
+void MKLDNNDeconvolutionNode::setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases) {
mkldnn::post_ops ops;
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biases->size(), 16))});
prim.reset(new convolution_backward_data(prim_desc,
getParentEdgeAt(0)->getMemory().GetPrimitive(),
- internalBlobMemory[0]->GetPrimitive(),
+ getWeights(),
getChildEdgeAt(0)->getMemory().GetPrimitive()));
}
}
MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
- InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
- if (desc.getLayout() == InferenceEngine::Layout::ANY)
+ InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(idx - 1).desc())
+ : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
+
+ if (desc.getLayout() == InferenceEngine::Layout::ANY) {
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
getParentEdgeAt(idx)->getDims().ToSizeVector(),
desc.getLayout()));
- else
- return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
- getParentEdgeAt(idx)->getDims().ToSizeVector(),
- desc.getBlockingDesc()));
+ } else {
+ if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(),
+ desc.getBlockingDesc().getOrder().end()) + 1) {
+ auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector();
+ auto new_dims = weightsDims.ToSizeVector();
+
+ auto td = InferenceEngine::TensorDesc(desc.getPrecision(),
+ new_dims,
+ desc.getBlockingDesc());
+ if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) {
+ td.setLayout(BLOCKED);
+ }
+ return MKLDNNMemoryDesc(td);
+ } else {
+ return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
+ getParentEdgeAt(idx)->getDims().ToSizeVector(),
+ desc.getBlockingDesc()));
+ }
+ }
}
MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
getChildEdgeAt(idx)->getDims().ToSizeVector(),
desc.getBlockingDesc()));
}
+
+const mkldnn::memory& MKLDNNDeconvolutionNode::getWeights() const {
+ return getParentEdges().size() > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
+}
+
REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
return false;
}
+ size_t descInputNumbers(MKLDNNDescriptor desc) override {
+ return static_cast<size_t>(getParentEdges().size());
+ }
+
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
private:
- bool withBiases = false;
bool withGroups = false;
bool isDW = false;
size_t groupNum = 1;
std::vector<ptrdiff_t> dilation;
std::vector<ptrdiff_t> paddingR;
MKLDNNDims weightsDims;
- InferenceEngine::Blob::Ptr biases;
std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
mkldnn::primitive_attr attr;
std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
- void setBiasAsPostOp();
+ void setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases);
+
+ const mkldnn::memory& getWeights() const;
};
} // namespace MKLDNNPlugin
setPostOps(attr, true);
Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
+ inputPrecision = inputPrecision == Precision::BF16 ? Precision(Precision::FP32) : inputPrecision;
Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision();
+ outputPrecision = outputPrecision == Precision::BF16 ? Precision(Precision::FP32) : outputPrecision;
if (!fusedWith.empty()) {
auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
// MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats.
// Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw)
// as grouped weights planar formats (e.g. goihw) since they have same physical memory layout.
- if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() && MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) {
+ if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() &&
+ MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) {
try {
mkldnn::memory::dims newDims = dst_blocked->GetDims();
- mkldnn::memory::format newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw :
- src_blocked->GetDims().size() == 5 ? memory::goidhw :
- src_blocked->GetFormat();
+ mkldnn::memory::format newFormat;
+ newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw :
+ src_blocked->GetDims().size() == 5 ? memory::goidhw :
+ src_blocked->GetFormat();
auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat);
src_blocked->Create(newDesc, srcPtr, false);
createReorder();
- } catch (const std::exception&) {
+ } catch (...) {
THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
}
+ // MKLDNN doesn't support direct reorders between planar data formats in case they have different rank but the same number of elements.
+ // Code block below detects these cases and substitute src dims with dst ones.
+ } else if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() &&
+ MKLDNNMemory::GetPlainFormat(dst_blocked->GetDims()) == dst_blocked->GetFormat() &&
+ src_blocked->GetElementsCount() == dst_blocked->GetElementsCount()) {
+ try {
+ auto newDesc = mkldnn::memory::desc(dst_blocked->GetDims(), src_blocked->GetDataType(), dst_blocked->GetFormat());
+ src_blocked->Create(newDesc, srcPtr, false);
+
+ createReorder();
+ } catch (...) {
+ THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
+ }
+ } else {
+ THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
}
- // TODO: should't we throw exception in this case?
}
}
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
bool sort_result_descending = true;
};
-REG_FACTORY_FOR(ImplFactory<NonMaxSuppressionImpl>, NonMaxSuppression);
+REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppression);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include "ie_parallel.hpp"
Precision input_precision;
};
-REG_FACTORY_FOR(ImplFactory<OneHotImpl>, OneHot);
+REG_FACTORY_FOR(OneHotImpl, OneHot);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
});
}
-REG_FACTORY_FOR(ImplFactory<PadImpl>, Pad);
+REG_FACTORY_FOR(PadImpl, Pad);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
std::vector<int> shift_;
};
-REG_FACTORY_FOR(ImplFactory<PowerFileImpl>, PowerFile);
+REG_FACTORY_FOR(PowerFileImpl, PowerFile);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <vector>
int _num_priors = 0;
};
-REG_FACTORY_FOR(ImplFactory<PriorBoxImpl>, PriorBox);
+REG_FACTORY_FOR(PriorBoxImpl, PriorBox);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <algorithm>
#include <vector>
float offset_;
};
-REG_FACTORY_FOR(ImplFactory<PriorBoxClusteredImpl>, PriorBoxClustered);
+REG_FACTORY_FOR(PriorBoxClusteredImpl, PriorBoxClustered);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <algorithm>
#include <cassert>
};
-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronPriorGridGeneratorImpl>, ExperimentalDetectronPriorGridGenerator);
+REG_FACTORY_FOR(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include "proposal_imp.hpp"
bool store_prob; // store blob with proposal probabilities
};
-REG_FACTORY_FOR(ImplFactory<ProposalImpl>, Proposal);
+REG_FACTORY_FOR(ProposalImpl, Proposal);
} // namespace Cpu
} // namespace Extensions
#include "proposal_imp.hpp"
+#include <cstring>
#include <cmath>
#include <string>
#include <vector>
const float* x1 = boxes + 2 * num_proposals;
const float* y1 = boxes + 3 * num_proposals;
- memset(is_dead, 0, num_boxes * sizeof(int));
+ std::memset(is_dead, 0, num_boxes * sizeof(int));
#if defined(HAVE_AVX2)
__m256 vc_fone = _mm256_set1_ps(coordinates_offset);
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
+#include <cstring>
#include <cassert>
#include <cmath>
#include <string>
const float* x1 = boxes + 2 * num_proposals;
const float* y1 = boxes + 3 * num_proposals;
- memset(is_dead, 0, num_boxes * sizeof(int));
+ std::memset(is_dead, 0, num_boxes * sizeof(int));
#if defined(HAVE_AVX2)
__m256 vc_fone = _mm256_set1_ps(coordinates_offset);
std::vector<int> roi_indices_;
};
-REG_FACTORY_FOR(ImplFactory<ONNXCustomProposalImpl>, ExperimentalDetectronGenerateProposalsSingleImage);
+REG_FACTORY_FOR(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
#include <vector>
float trans_std_;
};
-REG_FACTORY_FOR(ImplFactory<PSROIPoolingImpl>, PSROIPooling);
+REG_FACTORY_FOR(PSROIPoolingImpl, PSROIPooling);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
});
return OK;
}
-REG_FACTORY_FOR(ImplFactory<RangeImpl>, Range);
+REG_FACTORY_FOR(RangeImpl, Range);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
}
}
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceAnd);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceL1);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceL2);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceLogSum);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceLogSumExp);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMax);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMean);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMin);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceOr);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceProd);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceSum);
-REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceSumSquare);
+REG_FACTORY_FOR(ReduceImpl, ReduceAnd);
+REG_FACTORY_FOR(ReduceImpl, ReduceL1);
+REG_FACTORY_FOR(ReduceImpl, ReduceL2);
+REG_FACTORY_FOR(ReduceImpl, ReduceLogSum);
+REG_FACTORY_FOR(ReduceImpl, ReduceLogSumExp);
+REG_FACTORY_FOR(ReduceImpl, ReduceMax);
+REG_FACTORY_FOR(ReduceImpl, ReduceMean);
+REG_FACTORY_FOR(ReduceImpl, ReduceMin);
+REG_FACTORY_FOR(ReduceImpl, ReduceOr);
+REG_FACTORY_FOR(ReduceImpl, ReduceProd);
+REG_FACTORY_FOR(ReduceImpl, ReduceSum);
+REG_FACTORY_FOR(ReduceImpl, ReduceSumSquare);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include "common/defs.h"
#include "common/softmax.h"
}
};
-REG_FACTORY_FOR(ImplFactory<RegionYoloImpl>, RegionYolo);
+REG_FACTORY_FOR(RegionYoloImpl, RegionYolo);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <vector>
int stride;
};
-REG_FACTORY_FOR(ImplFactory<ReorgYoloImpl>, ReorgYolo);
+REG_FACTORY_FOR(ReorgYoloImpl, ReorgYolo);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
size_t work_amount_dst;
};
-REG_FACTORY_FOR(ImplFactory<ReverseSequenceImpl>, ReverseSequence);
+REG_FACTORY_FOR(ReverseSequenceImpl, ReverseSequence);
} // namespace Cpu
} // namespace Extensions
// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp
//
-#include "list.hpp"
#include "base.hpp"
#include <cassert>
#include <cmath>
int nw = 0;
};
-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronROIFeatureExtractorImpl>, ExperimentalDetectronROIFeatureExtractor);
+REG_FACTORY_FOR(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
const size_t SCATTER_UPDATES = 2;
};
-REG_FACTORY_FOR(ImplFactory<ScatterImpl>, ScatterUpdate);
+REG_FACTORY_FOR(ScatterImpl, ScatterUpdate);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <string>
}
};
-REG_FACTORY_FOR(ImplFactory<SelectImpl>, Select);
+REG_FACTORY_FOR(SelectImpl, Select);
} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
size_t ownStrides[CNTR_SIZE];
};
-REG_FACTORY_FOR(ImplFactory<ShuffleChannelsImpl>, ShuffleChannels);
+REG_FACTORY_FOR(ShuffleChannelsImpl, ShuffleChannels);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
std::vector<simpler_nms_anchor> anchors_;
};
-REG_FACTORY_FOR(ImplFactory<SimplerNMSImpl>, SimplerNMS);
+REG_FACTORY_FOR(SimplerNMSImpl, SimplerNMS);
} // namespace Cpu
} // namespace Extensions
#include "base.hpp"
#include "ie_parallel.hpp"
-#include "list.hpp"
#include <cmath>
#include <string>
std::vector<size_t> _pads_end;
};
-REG_FACTORY_FOR(ImplFactory<SpaceToBatchImpl>, SpaceToBatch);
+REG_FACTORY_FOR(SpaceToBatchImpl, SpaceToBatch);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
size_t ownStrides[CNTR_SIZE];
};
-REG_FACTORY_FOR(ImplFactory<SpaceToDepthImpl>, SpaceToDepth);
+REG_FACTORY_FOR(SpaceToDepthImpl, SpaceToDepth);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
size_t outMaxNumValues = 0;
};
-REG_FACTORY_FOR(ImplFactory<SparseFillEmptyRowsImpl>, SparseFillEmptyRows);
+REG_FACTORY_FOR(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
ReducedOp reduction_op;
};
-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentMean);
-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentSqrtN);
-REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentSum);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentMean);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSqrtN);
+REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSum);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
bool with_default_value = false;
};
-REG_FACTORY_FOR(ImplFactory<SparseToDenseImpl>, SparseToDense);
+REG_FACTORY_FOR(SparseToDenseImpl, SparseToDense);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
Precision input_default_value_precision;
};
-REG_FACTORY_FOR(ImplFactory<ExperimentalSparseWeightedReduceImpl>, ExperimentalSparseWeightedSum);
+REG_FACTORY_FOR(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
}
};
-REG_FACTORY_FOR(ImplFactory<SqueezeImpl>, Squeeze);
+REG_FACTORY_FOR(SqueezeImpl, Squeeze);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
});
}
-REG_FACTORY_FOR(ImplFactory<StridedSliceImpl>, StridedSlice);
+REG_FACTORY_FOR(StridedSliceImpl, StridedSlice);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
}
};
-REG_FACTORY_FOR(ImplFactory<TopKImpl>, TopK);
+REG_FACTORY_FOR(TopKImpl, TopK);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <algorithm>
#include <cassert>
int max_rois_num_;
};
-REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronTopKROIsImpl>, ExperimentalDetectronTopKROIs);
+REG_FACTORY_FOR(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
size_t num_elements = 0;
};
-REG_FACTORY_FOR(ImplFactory<UniqueImpl>, Unique);
+REG_FACTORY_FOR(UniqueImpl, Unique);
} // namespace Cpu
} // namespace Extensions
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include <cmath>
}
};
-REG_FACTORY_FOR(ImplFactory<UnsqueezeImpl>, Unsqueeze);
+REG_FACTORY_FOR(UnsqueezeImpl, Unsqueeze);
} // namespace Cpu
} // namespace Extensions
class ngraph::pass::ConvFusion: public ngraph::pass::GraphRewrite {
public:
ConvFusion() : GraphRewrite() {
- fuse_convolution_with<op::ConvolutionIE, op::v1::Multiply>();
- fuse_convolution_with<op::ConvolutionIE, op::v1::Add>();
- fuse_convolution_with<op::DeconvolutionIE, op::v1::Add>();
+ fuse_convolution_with<op::ConvolutionIE, opset1::Multiply>();
+ fuse_convolution_with<op::ConvolutionIE, opset1::Add>();
+ fuse_convolution_with<op::DeconvolutionIE, opset1::Add>();
}
private:
template <class Conv, class Eltwise>
void ngraph::pass::ConvFusion::fuse_convolution_with() {
- static_assert(std::is_same<Eltwise, ngraph::op::v1::Multiply>() || std::is_same<Eltwise, ngraph::op::v1::Add>(),
- "This transformation works only with ngraph::op::v1::Add and ngraph::op::v1::Multiply");
+ static_assert(std::is_same<Eltwise, ngraph::opset1::Multiply>() || std::is_same<Eltwise, ngraph::opset1::Add>(),
+ "This transformation works only with ngraph::opset1::Add and ngraph::opset1::Multiply");
static_assert(std::is_same<Conv, ngraph::op::ConvolutionIE>() || std::is_same<Conv, ngraph::op::DeconvolutionIE>(),
"This transformation works only with ngraph::op::ConvolutionIE and ngraph::op::DeconvolutionIE");
}
// TODO: check that constant can be scalar and do not match [1, C, 1, 1] layout
- auto constant_shape = m_const->get_shape();
- auto output_shape = m_conv->get_shape();
+ const auto constant_shape = m_const->get_shape();
+ const auto output_pshape = m_conv->get_output_partial_shape(0);
+
+ if (output_pshape.rank().is_dynamic() || output_pshape[1].is_dynamic()) {
+ return false;
+ }
+
+ const auto channel_dim = output_pshape[1].get_length();
+
size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies<size_t>());
- if (constant_size != output_shape[1]) {
+ if (constant_size != channel_dim) {
return false;
}
- std::shared_ptr<ngraph::Node> constant(m_const);
+ Output<Node> constant(m_const);
if (constant_shape.size() > 1) {
- constant = std::make_shared<op::v1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {output_shape[1]}), true);
+ constant = std::make_shared<opset1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {channel_dim}), true);
}
if (m_conv->output(0).get_target_inputs().size() != 1) {
return false;
}
- std::shared_ptr<Node> new_conv, new_weights, new_bias;
- if (std::dynamic_pointer_cast<op::v1::Add>(eltwise)) {
+ Output<Node> new_conv, new_weights, new_bias;
+ if (std::dynamic_pointer_cast<opset1::Add>(eltwise)) {
// Fuse: ConvolutionIE/DeconvolutionIE->Add
if (m_conv->inputs().size() == 2) {
new_bias = constant;
} else {
- new_bias = std::make_shared<op::v1::Add>(constant, m_conv->input_value(2));
+ new_bias = std::make_shared<opset1::Add>(constant, m_conv->input_value(2));
}
new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), m_conv->input_value(1), new_bias});
- } else if (std::is_same<Conv, op::ConvolutionIE>() && std::dynamic_pointer_cast<op::v1::Multiply>(eltwise)) {
+ } else if (std::is_same<Conv, op::ConvolutionIE>() && std::dynamic_pointer_cast<opset1::Multiply>(eltwise)) {
// Fuse: ConvolutionIE->Mul
auto weights_shape = m_conv->input(1).get_shape();
Shape const_shape(weights_shape.size(), 1);
const_shape[0] = weights_shape[0];
- auto const_reshape = std::make_shared<op::v1::Reshape>(constant,
+ auto const_reshape = std::make_shared<opset1::Reshape>(constant,
op::Constant::create(element::i64, Shape{const_shape.size()}, const_shape), true);
- new_weights = std::make_shared<op::v1::Multiply> (m_conv->input_value(1), const_reshape);
+ new_weights = std::make_shared<opset1::Multiply> (m_conv->input_value(1), const_reshape);
if (m_conv->inputs().size() == 2) {
new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights});
} else {
- auto bias_reshape = std::make_shared<op::v1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true);
- new_bias = std::make_shared<op::v1::Multiply>(bias_reshape, constant);
+ auto bias_reshape = std::make_shared<opset1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true);
+ new_bias = std::make_shared<opset1::Multiply>(bias_reshape, constant);
new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights, new_bias});
}
} else {
return false;
}
- ngraph::copy_runtime_info({m_conv, eltwise}, new_conv);
- new_conv->set_friendly_name(m.get_match_root()->get_friendly_name());
- ngraph::replace_node(m.get_match_root(), new_conv);
+ ngraph::copy_runtime_info({m_conv, eltwise}, new_conv.get_node_shared_ptr());
+ new_conv.get_node_shared_ptr()->set_friendly_name(m.get_match_root()->get_friendly_name());
+ ngraph::replace_node(m.get_match_root(), new_conv.get_node_shared_ptr());
return true;
};
return callback;
NGRAPH_PASS(ConvertBroadcast3, ::ngraph::pass)
NGRAPH_PASS(ConvertNMS3, ::ngraph::pass)
NGRAPH_PASS(ConvertShapeOf3, ::ngraph::pass)
+NGRAPH_PASS(ConvertShuffleChannels3, ::ngraph::pass)
NGRAPH_PASS(ConvertTopK3, ::ngraph::pass)
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+ class INFERENCE_ENGINE_API_CLASS(ConvertShuffleChannels3);
+
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite {
+public:
+ ConvertShuffleChannels3() : GraphRewrite() {
+ convert_shuffle_channels3();
+ }
+
+private:
+ void convert_shuffle_channels3();
+};
}
void op::ConvolutionIE::validate_and_infer_types() {
- const PartialShape& data_batch_pshape = get_input_partial_shape(0);
+ PartialShape data_batch_shape = get_input_partial_shape(0);
element::Type data_batch_et = get_input_element_type(0);
- const PartialShape& filters_pshape = get_input_partial_shape(1);
+ PartialShape filters_shape = get_input_partial_shape(1);
element::Type filters_et = get_input_element_type(1);
- PartialShape result_shape{PartialShape::dynamic()};
-
- // we need to adjust filters_shape to reuse helpers for normal convolution
- if (filters_pshape.is_static() && data_batch_pshape.is_static()) {
- auto filters_shape = filters_pshape.to_shape();
- auto groups = m_group;
- auto data_batch_shape = data_batch_pshape.to_shape();
- data_batch_shape[1] /= groups;
-
- if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) {
- m_pads_begin.clear();
- m_pads_end.clear();
- infer_auto_padding(
- data_batch_shape,
- Shape(filters_shape.begin() + 2, filters_shape.end()), // Remove {O,I}
- m_strides,
- m_dilations,
- m_auto_pad,
- m_pads_end,
- m_pads_begin);
- }
-
- result_shape =
- infer_convolution_forward(this,
- data_batch_shape,
- Strides(m_strides.size(), 1), // dummy data dilations
- m_pads_begin,
- m_pads_end,
- filters_shape,
- m_strides,
- m_dilations);
- }
element::Type result_et;
NODE_VALIDATION_CHECK(
filters_et,
").");
+ PartialShape result_shape{PartialShape::dynamic()};
+
+ // In case if number of groups greater than 1 and channel dimension is dynamic we can't calculate output shape
+ if (m_group > 1) {
+ if (data_batch_shape.rank().is_dynamic() || data_batch_shape[1].is_dynamic()) {
+ set_output_type(0, result_et, result_shape);
+ return;
+ } else {
+ // Update channel dimension according to groups count
+ data_batch_shape[1] = data_batch_shape[1].get_length() / m_group;
+ }
+ }
+
+ // we need to adjust filters_shape to reuse helpers for normal convolution
+ if (filters_shape.is_static() && data_batch_shape.is_static()) {
+ if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) {
+ m_pads_begin.clear();
+ m_pads_end.clear();
+ auto filter_shape = filters_shape.to_shape();
+ filter_shape.erase(filter_shape.begin(), filter_shape.begin() + 2); // Remove {O,I}
+ infer_auto_padding(data_batch_shape.to_shape(),
+ filter_shape,
+ m_strides,
+ m_dilations,
+ m_auto_pad,
+ m_pads_end,
+ m_pads_begin);
+ }
+ }
+
+ result_shape = infer_convolution_forward(this,
+ data_batch_shape,
+ Strides(m_strides.size(), 1), // dummy data dilations
+ m_pads_begin,
+ m_pads_end,
+ filters_shape,
+ m_strides,
+ m_dilations);
+
set_output_type(0, result_et, result_shape);
}
interpolate_attrs.pads_end = std::vector<size_t>{0};
std::vector<size_t> useless_axes;
- for (const auto & axis : interpolate_axes)
- if (input_shape[axis] == out_spatial_shape[axis] && axis < 2)
- // keeping only those not spatial dimensions that are going to be changed
- useless_axes.push_back(axis);
+ size_t axis_idx = 0;
+ for (auto axis = 0; axis < input_shape.size(); ++axis) {
+ if (interpolate_axes.count(axis)) {
+ if (input_shape[axis] == out_spatial_shape[axis_idx] && axis < 2)
+ // keeping only those not spatial dimensions that are going to be changed
+ useless_axes.push_back(axis);
+ ++axis_idx;
+ }
+ }
+
std::reverse(useless_axes.begin(), useless_axes.end());
for (const auto & axis : useless_axes) {
interpolate_axes.erase(axis);
#include "transformations/convert_opset3_to_opset2/convert_broadcast3.hpp"
#include "transformations/convert_opset3_to_opset2/convert_nms3.hpp"
#include "transformations/convert_opset3_to_opset2/convert_shapeof3.hpp"
+#include "transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp"
#include "transformations/convert_opset3_to_opset2/convert_topk3.hpp"
#include <memory>
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+using namespace ngraph;
+
+void ngraph::pass::ConvertShuffleChannels3::convert_shuffle_channels3() {
+ auto input = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1, 1, 1});
+ auto shuffle_channels = std::make_shared<::opset3::ShuffleChannels>(input);
+
+ ngraph::graph_rewrite_callback callback = [](pattern::Matcher &m) {
+ auto shuffle_channels = std::dynamic_pointer_cast<::opset3::ShuffleChannels>(m.get_match_root());
+ if (!shuffle_channels) {
+ return false;
+ }
+ if (shuffle_channels->input_value(0).get_partial_shape().rank().is_dynamic()) {
+ return false;
+ }
+
+ auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+ auto shuffle_axis = shuffle_channels->get_axis();
+ int64_t shuffle_group = static_cast<int64_t>(shuffle_channels->get_group());
+ int64_t input_rank = shuffle_channels->input_value(0).get_partial_shape().rank().get_length();
+ auto original_shape = std::make_shared<::opset2::ShapeOf>(shuffle_channels->input_value(0));
+ if (shuffle_axis < 0) {
+ shuffle_axis += input_rank;
+ }
+
+ // calculate split sizes based on shuffle axis and avoid splits of size 0
+ std::vector<int64_t> split_lengts;
+ if (shuffle_axis == 0) {
+ split_lengts = {1, input_rank - 1};
+ } else if (shuffle_axis + 1 == input_rank) {
+ split_lengts = {input_rank - 1, 1};
+ } else {
+ split_lengts = {shuffle_axis, 1, input_rank - shuffle_axis - 1};
+ }
+
+ // get input tensor dimensions divided into parts with help of VariadicSplit
+ auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+ original_shape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+ ::opset2::Constant::create(element::i64, Shape({split_lengts.size()}), split_lengts));
+
+ // calculate new dimension of the reshape. Start with two elements of {group, -1}
+ ::OutputVector new_dimensions = {
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{shuffle_group}),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1})};
+
+ // add more elements to the reshape output dimensions based on shuffle_axis
+ std::vector<int64_t> transpose_order;
+ if (shuffle_axis == 0) {
+ new_dimensions.push_back(
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(1), reduce_axis_const, true));
+ transpose_order = {1, 0, 2};
+ } else if (shuffle_axis + 1 == input_rank) {
+ new_dimensions.insert(new_dimensions.begin(),
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0),
+ reduce_axis_const, true));
+ transpose_order = {0, 2, 1};
+ } else {
+ new_dimensions.insert(new_dimensions.begin(),
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0),
+ reduce_axis_const, true));
+ new_dimensions.push_back(
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true));
+ transpose_order = {0, 2, 1, 3};
+ }
+ // reshape the tensor to a new shape
+ auto new_shape = std::make_shared<::opset2::Concat>(new_dimensions, 0);
+ auto reshape = std::make_shared<::opset2::Reshape>(shuffle_channels->input_value(0), new_shape, false);
+ // swap dimensions appearing after splitting the "shuffle_axis" dimension into two
+ auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+ ::opset2::Constant::create(element::i64,
+ Shape({transpose_order.size()}),
+ transpose_order));
+ // restore original shape
+ auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+ ::NodeVector new_ops = {original_shape, split_input_dimensions, transpose, reshape, reshape_back, new_shape};
+ for (auto output : new_dimensions)
+ new_ops.insert(new_ops.begin(), output.get_node_shared_ptr());
+ reshape_back->set_friendly_name(shuffle_channels->get_friendly_name());
+ ::copy_runtime_info(shuffle_channels, new_ops);
+ ::replace_node(shuffle_channels, reshape_back);
+ return true;
+ };
+
+ auto m = std::make_shared<ngraph::pattern::Matcher>(shuffle_channels, "ConvertShuffleChannels3");
+ this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
\ No newline at end of file
struct DataInfo final {
std::unordered_map<std::string, int> offset;
+ std::unordered_map<std::string, ie::TensorDesc> descFromPlugin;
int totalSize = 0;
};
const Data& newChild);
void disconnectDatas(const DataToDataAllocation& edge);
+ void disconnectDatas(const DataToShapeAllocation& edge);
//
// Nodes removal
auto ioBufferOffset = data->attrs().get<int>("ioBufferOffset");
IE_ASSERT(ioBufferOffset + data->totalByteSize() <= inputInfo.totalSize);
+ inputInfo.descFromPlugin[data->name()] = data->desc().toTensorDesc();
inputInfo.offset[data->name()] = ioBufferOffset;
} else if (DataUsage::Output == data->usage()) {
IE_ASSERT(outputInfo.offset.count(data->name()) == 0);
auto ioBufferOffset = data->attrs().get<int>("ioBufferOffset");
IE_ASSERT(ioBufferOffset + data->totalByteSize() <= outputInfo.totalSize);
+ outputInfo.descFromPlugin[data->name()] = data->desc().toTensorDesc();
outputInfo.offset[data->name()] = ioBufferOffset;
}
}
};
StridedSliceParams PassImpl::parseInputParams(const Stage& stage) {
- const auto beginInput = stage->input(1);
- const auto endInput = stage->input(2);
- const auto num_input_dims = stage->input(0)->desc().numDims();
+ const auto input = stage->input(0);
+ const auto beginInput = stage->input(1);
+ const auto endInput = stage->input(2);
+ const auto num_input_dims = input->desc().numDims();
StridedSliceParams params;
IE_ASSERT(beginInput->content() != nullptr);
IE_ASSERT(endInput->content() != nullptr);
- auto vectorToDimValues = [](const std::vector<int>& v) {
- auto dims = DimsOrder::fromNumDims(v.size()).toIndices();
- int idx = v.size();
+ const auto numpyIdxVectorToDimValues = [&input](const std::vector<int>& values) {
+ auto dims = DimsOrder::fromNumDims(values.size()).toIndices();
+
+ // IE notation to GT notation
+ std::vector<int> revertedValues(values.size());
+ std::reverse_copy(values.begin(), values.end(), revertedValues.begin());
+
+ int idx = 0;
for (auto& dim : dims) {
- idx--;
- dim.second = v[idx];
+ auto value = revertedValues[idx++];
+ if (value < 0) {
+ value = std::max(input->desc().dim(dim.first) + value + 1, 0);
+ }
+ value = std::min(input->desc().dim(dim.first), value);
+ dim.second = value;
}
+
return dims;
};
- params.begin = vectorToDimValues(
+ params.begin = numpyIdxVectorToDimValues(
std::vector<int>(beginInput->content()->get<int>(),
beginInput->content()->get<int>() + beginInput->desc().dims().get(Dim::C, 0)));
- params.end = vectorToDimValues(
+ params.end = numpyIdxVectorToDimValues(
std::vector<int>(endInput->content()->get<int>(),
endInput->content()->get<int>() + endInput->desc().dims().get(Dim::C, 0)));
if (stage->numInputs() == 4) {
const auto stridesInput = stage->input(3);
IE_ASSERT(stridesInput->content() != nullptr);
- params.strides = vectorToDimValues(
+ params.strides = numpyIdxVectorToDimValues(
std::vector<int>(stridesInput->content()->get<int>(),
stridesInput->content()->get<int>() + stridesInput->desc().dims().get(Dim::C, 0)));
} else {
- params.strides = vectorToDimValues(std::vector<int>(num_input_dims, 1));
+ params.strides = numpyIdxVectorToDimValues(std::vector<int>(num_input_dims, 1));
}
IE_ASSERT(params.begin.size() == num_input_dims);
IE_ASSERT(c != '1') << "VPU doesn't support shrink_axis_mask for StridedSlice";
}
- params.begin_mask = vectorToDimValues(begin_mask_values);
- params.end_mask = vectorToDimValues(end_mask_values);
+ params.begin_mask = numpyIdxVectorToDimValues(begin_mask_values);
+ params.end_mask = numpyIdxVectorToDimValues(end_mask_values);
return params;
}
m_params.strides_dms.set(dim, 1);
}
- auto clip = [](int value, int min, int max) {
- return std::min(std::max(min, value), max);
- };
-
for (const auto& dim : input->desc().dimsOrder().toPermutation()) {
m_params.strides_dms.set(dim, params.strides[dim]);
IE_ASSERT(params.begin_mask[dim] == 1 || params.begin_mask[dim] == 0);
IE_ASSERT(params.end_mask[dim] == 1 || params.end_mask[dim] == 0);
- m_params.begin_dms.set(dim,
- params.begin_mask[dim] ? clip(params.begin[dim], 0, input->desc().dim(dim)) : 0);
- m_params.end_dms.set(dim,
- params.end_mask[dim] ? clip(params.end[dim], 0, input->desc().dim(dim)) : input->desc().dim(dim));
+ m_params.begin_dms.set(dim, params.begin_mask[dim] ? params.begin[dim] : 0);
+ m_params.end_dms.set(dim, params.end_mask[dim] ? params.end[dim] : input->desc().dim(dim));
IE_ASSERT(dim != Dim::N || numDims < 4 || m_params.strides_dms[dim] == 1)
<< "VPU doesn't support batch strides for StridedSlice";
input = intermediateOutputData;
}
+ VPU_INTERNAL_CHECK(input->desc().dims() == output->desc().dims(),
+ "StridedSlice pass: result tensor dims (%v) must be equal to output "
+ "tensor dims (%v)", input->desc().dims(), output->desc().dims());
+
_stageBuilder->addCopyStage(
model,
formatString("%s@copy-output", stage->name()),
void ModelObj::replaceDataToShapeChild(
const DataToShapeAllocation& edge,
const Data& newChild) {
- auto parent = edge->parent();
- auto oldChild = edge->child();
-
- oldChild->_parentDataToShapeEdge = nullptr;
+ edge->_child->_parentDataToShapeEdge = nullptr;
edge->_child = newChild;
VPU_THROW_UNLESS(newChild->_parentDataToShapeEdge == nullptr,
}
}
+void ModelObj::disconnectDatas(const DataToShapeAllocation& edge) {
+ auto parent = edge->parent();
+ auto child = edge->child();
+
+ child->_parentDataToShapeEdge = nullptr;
+ parent->_childDataToShapeEdges.erase(edge);
+
+ IE_ASSERT(edge->_ptrPosInModel != _shapeEdgePtrList.end());
+ _shapeEdgePtrList.erase(edge->_ptrPosInModel);
+}
+
void ModelObj::disconnectStage(const Stage& stage) {
//
// Check that objects belong to the same Model.
layer->name, layer->type, 1, shape->name());
model->replaceStageOutput(dataProducerEdge, dataOutput);
- if (const auto& dataToShapeEdge = data->parentDataToShapeEdge()) {
- model->replaceDataToShapeChild(dataToShapeEdge, dataOutput);
+ if (auto dataToShapeEdge = data->parentDataToShapeEdge()) {
+ const auto& parent = dataToShapeEdge->parent();
+ VPU_THROW_UNLESS(parent == shape, "Myriad plugin encountered layer of type \"{}\" and name \"{}\" with input #{} (data input with name \"{}\") that "
+ "already has parent in terms of data to shape connection. The parent is expected to be input #{} (shape input with name \"{}\") of the layer, so "
+ "it's a \"{}\" with already connected inputs, but actual parent is other data object with name \"{}\". The case of connected inputs is considered "
+ "as \"{}\" that goes directly to \"{}\" as a result of some optimization (operation between them has been optimized out). Other cases, when some "
+ "input already has a connection, but with other data object are prohibited.",
+ layer->type, layer->name, 0, data->name(), 1, shape->name(), layer->type, parent->name(), layer->type, layer->type);
+ model->disconnectDatas(dataToShapeEdge);
}
model->removeUnusedData(data);
void initialCheckImpl() const override {
const auto& operation = type();
const auto& dataTypeInput0 = input(0)->desc().type();
+ const auto& dataTypeOutput = output(0)->desc().type();
{
auto supportedDataTypesInput0 = EnumSet<DataType>{DataType::FP16};
- if (operation == StageType::Sum || operation == StageType::Greater_equal || operation == StageType::Select ||
+ if (operation == StageType::Sum || operation == StageType::Greater_equal ||
+ operation == StageType::Equal || operation == StageType::Select ||
operation == StageType::Prod || operation == StageType::Max) {
supportedDataTypesInput0.insert(DataType::S32);
}
static_cast<Handle<StageNode>>(this), dataTypeInput0, supportedDataTypesInput0);
}
- if (operation != StageType::Select || dataTypeInput0 == DataType::FP16) {
- assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput0}, {dataTypeInput0}}, {{dataTypeInput0}});
- } else {
+ if (operation == StageType::Select && dataTypeInput0 == DataType::S32) {
auto supportedDataTypesInput1 = EnumSet<DataType>{DataType::FP16, DataType::S32};
const auto& dataTypeInput1 = input(1)->desc().type();
VPU_THROW_UNLESS(supportedDataTypesInput1.count(dataTypeInput1) != 0,
- "Stage node %v types check error: input #1 has type %v, but one of %v is expected",
- static_cast<Handle<StageNode>>(this), dataTypeInput1, supportedDataTypesInput1);
+ "Stage node %v types check error: input #1 has type %v, but one of %v is expected",
+ static_cast<Handle<StageNode>>(this), dataTypeInput1, supportedDataTypesInput1);
assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput1}, {dataTypeInput1}}, {{dataTypeInput1}});
+ } else if (operation == StageType::Greater && dataTypeInput0 != dataTypeOutput) {
+ assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}}, {{DataType::S32}});
+ } else {
+ assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput0}, {dataTypeInput0}}, {{dataTypeInput0}});
}
}
inputs,
outputs);
- auto specialZero = layer->GetParamAsInt("special_zero", 0);
+ auto specialZero = layer->GetParamAsBool("special_zero", false);
outShapeOfReshapeStage->attrs().set<bool>("specialZero", specialZero);
}
namespace MyriadPlugin {
ExecutableNetwork::ExecutableNetwork(
+ std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr>& devicePool,
const MyriadConfig& config) :
_config(config) {
_config.logLevel(),
defaultOutput(_config.pluginLogFilePath()));
- _executor = std::make_shared<MyriadExecutor>(_config.forceReset(), _config.logLevel(), _log);
+ _executor = std::make_shared<MyriadExecutor>(_config.forceReset(), std::move(mvnc), _config.logLevel(), _log);
_device = _executor->openDevice(devicePool, _config);
const auto& compileConfig = config.compileConfig();
}
ExecutableNetwork::ExecutableNetwork(
- ICNNNetwork& network, std::vector<DevicePtr>& devicePool,
+ ICNNNetwork& network,
+ std::shared_ptr<IMvnc> mvnc,
+ std::vector<DevicePtr>& devicePool,
const MyriadConfig& config) :
- ExecutableNetwork(devicePool, config) {
+ ExecutableNetwork(std::move(mvnc), devicePool, config) {
VPU_PROFILE(ExecutableNetwork);
const auto compilerLog = std::make_shared<Logger>(
}
ExecutableNetwork::ExecutableNetwork(std::istream& strm,
+ std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr> &devicePool,
const MyriadConfig& config) :
- ExecutableNetwork(devicePool, config) {
+ ExecutableNetwork(std::move(mvnc), devicePool, config) {
VPU_PROFILE(ExecutableNetwork);
Import(strm, devicePool, config);
}
ExecutableNetwork::ExecutableNetwork(
const std::string& blobFilename,
+ std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr>& devicePool,
const MyriadConfig& config) :
- ExecutableNetwork(devicePool, config) {
+ ExecutableNetwork(std::move(mvnc), devicePool, config) {
VPU_PROFILE(ExecutableNetwork);
std::ifstream blobFile{blobFilename, std::ios::binary};
Import(blobFile, devicePool, config);
typedef std::shared_ptr<ExecutableNetwork> Ptr;
explicit ExecutableNetwork(InferenceEngine::ICNNNetwork &network,
+ std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr> &devicePool,
const MyriadConfig& config);
explicit ExecutableNetwork(std::istream& strm,
+ std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr> &devicePool,
const MyriadConfig& config);
explicit ExecutableNetwork(const std::string &blobFilename,
+ std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr> &devicePool,
const MyriadConfig& config);
const size_t _maxTaskExecutorGetResultCount = 1;
std::queue<std::string> _taskExecutorGetResultIds;
- ExecutableNetwork(std::vector<DevicePtr> &devicePool,
- const MyriadConfig& config);
+ ExecutableNetwork(std::shared_ptr<IMvnc> mvnc,
+ std::vector<DevicePtr> &devicePool,
+ const MyriadConfig& config);
InferenceEngine::ITaskExecutor::Ptr getNextTaskExecutor() {
std::string id = _taskExecutorGetResultIds.front();
static std::mutex device_mutex;
-MyriadExecutor::MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log) {
+MyriadExecutor::MyriadExecutor(bool forceReset, std::shared_ptr<IMvnc> mvnc,
+ const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log), _mvnc(std::move(mvnc)) {
VPU_PROFILE(MyriadExecutor);
- _mvnc = std::make_shared<Mvnc>();
+ VPU_THROW_UNLESS(_mvnc, "mvnc is null");
int ncResetAll = forceReset;
auto status = ncGlobalSetOption(NC_RW_RESET_ALL, &ncResetAll, sizeof(ncResetAll));
if (status != NC_OK) {
return statusOpen;
}
+ ncDeviceOpenParams_t deviceOpenParams = {};
+ deviceOpenParams.watchdogHndl = _mvnc->watchdogHndl();
+ deviceOpenParams.watchdogInterval = config.watchdogInterval().count();
+ deviceOpenParams.customFirmwareDirectory = dirName.c_str();
+
// Open new device with specific path to FW folder
statusOpen = ncDeviceOpen(&device._deviceHandle,
- in_deviceDesc, config.watchdogInterval().count(), dirName.c_str());
+ in_deviceDesc, deviceOpenParams);
if (statusOpen != NC_OK) {
- ncDeviceClose(&device._deviceHandle);
+ ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
return statusOpen;
}
reinterpret_cast<void*>(&device._platform), &dataLength);
if (status != NC_OK || dataLength != sizeof(device._platform)) {
_log->warning("Failed to get device platform");
- ncDeviceClose(&device._deviceHandle);
+ ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
return status != NC_OK ? status : NC_ERROR; // for dataLength error
}
reinterpret_cast<void*>(&device._protocol), &dataLength);
if (status != NC_OK || dataLength != sizeof(device._protocol)) {
_log->warning("Failed to get device protocol");
- ncDeviceClose(&device._deviceHandle);
+ ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
return status != NC_OK ? status : NC_ERROR; // for dataLength error
}
reinterpret_cast<void*>(&device._maxGraphNum), &dataLength);
if (status != NC_OK || dataLength != sizeof(device._maxGraphNum)) {
_log->warning("Failed to get maximum supported number of graphs");
- ncDeviceClose(&device._deviceHandle);
+ ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
return status != NC_OK ? status : NC_ERROR; // for dataLength error
}
reinterpret_cast<void*>(&deviceName), &dataLength);
if (status != NC_OK || dataLength > NC_MAX_NAME_SIZE) {
_log->warning("Failed to get name of booted device");
- ncDeviceClose(&device._deviceHandle);
+ ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
return status != NC_OK ? status : NC_ERROR; // for dataLength error
} else {
device._name = deviceName;
if (status != NC_OK) {
_log->warning("Failed to set configuration for Power Manager");
- ncDeviceClose(&device._deviceHandle);
+ ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl());
return status;
}
uint32_t frequency;
};)
-void MyriadExecutor::closeDevices(std::vector<DevicePtr> &devicePool) {
+void MyriadExecutor::closeDevices(std::vector<DevicePtr> &devicePool, std::shared_ptr<IMvnc> mvnc) {
VPU_PROFILE(closeDevices);
std::lock_guard<std::mutex> lock(device_mutex);
for (auto &device : devicePool) {
if (device->_deviceHandle != nullptr) {
- auto res = ncDeviceClose(&(device->_deviceHandle));
+ auto res = ncDeviceClose(&(device->_deviceHandle), mvnc->watchdogHndl());
if (res != NC_OK)
printf("ncDeviceClose failed (%d)\n", static_cast<int>(res));
device->_deviceHandle = nullptr;
unsigned int _numStages = 0;
public:
- MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log);
+ MyriadExecutor(bool forceReset, std::shared_ptr<IMvnc> mvnc,
+ const LogLevel& vpuLogLevel, const Logger::Ptr& log);
~MyriadExecutor() = default;
/**
*/
DevicePtr openDevice(std::vector<DevicePtr> &devicePool, const MyriadConfig& config);
- static void closeDevices(std::vector<DevicePtr> &devicePool);
+ static void closeDevices(std::vector<DevicePtr> &devicePool, std::shared_ptr<IMvnc> mvnc);
void allocateGraph(DevicePtr &device,
GraphDesc &graphDesc,
_inputInfo.totalSize, nullptr, 0);
}
+static void copyBlobAccordingUpperBound(
+ const Blob::Ptr& in,
+ const Blob::Ptr& out) {
+ const auto inLayout = in->getTensorDesc().getLayout();
+ const auto outLayout = out->getTensorDesc().getLayout();
+
+ const auto& inDims = in->getTensorDesc().getDims();
+ const auto& outDims = out->getTensorDesc().getDims();
+
+ IE_ASSERT(inLayout == outLayout);
+
+ auto inPtr = in->cbuffer().as<uint8_t *>();
+ IE_ASSERT(inPtr != nullptr);
+
+ auto outPtr = out->cbuffer().as<uint8_t *>();
+ IE_ASSERT(outPtr != nullptr);
+
+ if (inDims.size() == 1) {
+ std::copy_n(
+ in->cbuffer().as<uint8_t*>(),
+ in->byteSize(),
+ out->buffer().as<uint8_t*>());
+ } else if (inDims.size() == 2) {
+ size_t inLineSize = inDims[1] * in->element_size();
+ size_t outLineSize = outDims[1] * out->element_size();
+ for (size_t n = 0; n < outDims[0]; n++) {
+ std::copy_n(
+ in->cbuffer().as<uint8_t*>() + n * inLineSize,
+ outLineSize,
+ out->buffer().as<uint8_t*>() + n * outLineSize);
+ }
+ } else {
+ VPU_THROW_EXCEPTION << "Copying of blobs with dynamic shape and num dims greater than 2 unsupported yet";
+ }
+}
+
void MyriadInferRequest::GetResult() {
VPU_PROFILE(GetResult);
const auto& ieOutDesc = ieBlob->getTensorDesc();
const auto& ieOutPrc = ieOutDesc.getPrecision();
+
auto ieOutDims = ieOutDesc.getDims();
+
// Eject dynamic output shape (suffix "@shape") and copy it to vector of dimensions in reverse order
const auto& shapeInfo = _outputInfo.offset.find(ieBlobName + "@shape");
+ // if (isDynamic)
if (shapeInfo != _outputInfo.offset.end()) {
- const auto shapeOffset = resultOffset(shapeInfo->first);
- const auto shapePtr = reinterpret_cast<const int32_t*>(resultBuffer.data() + shapeOffset);
+ auto outData = networkOutputs[ieBlobName];
+ const auto& descFromPlugin = _outputInfo.descFromPlugin.find(ieBlobName);
+ VPU_THROW_UNLESS(descFromPlugin != _outputInfo.descFromPlugin.end(),
+ "Can not find tensor descriptor by plugin for {} output", ieBlobName);
+ const auto& dynOutputDesc = descFromPlugin->second;
+
+ if (ieBlob->getTensorDesc().getLayout() != dynOutputDesc.getLayout()) {
+ ieBlob->deallocate();
+ ieBlob->getTensorDesc().reshape(dynOutputDesc.getDims(), dynOutputDesc.getLayout());
+ ieBlob->allocate();
+ outData->reshape(dynOutputDesc.getDims(), dynOutputDesc.getLayout());
+ }
- const auto shapeRank = ieOutDims.size();
+ const auto shapeResultOffset = resultOffset(shapeInfo->first);
+ const auto shapePtr = reinterpret_cast<const int32_t*>(resultBuffer.data() + shapeResultOffset);
+
+ auto shapeRank = dynOutputDesc.getDims().size();
+ ieOutDims.resize(shapeRank);
for (size_t idx = 0; idx < shapeRank; ++idx) {
ieOutDims[idx] = shapePtr[shapeRank - idx - 1];
}
- }
- // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called
- const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, ieOutDims, getVpuLayout(ieBlobName)};
- const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName));
- copyBlob(tmpBlob, ieBlob);
+ outData->setDims(ieOutDims);
+ ieBlob->getTensorDesc().setDims(ieOutDims);
+
+ // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called
+ const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, dynOutputDesc.getDims(), dynOutputDesc.getLayout()};
+ const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName));
+
+ copyBlobAccordingUpperBound(tmpBlob, ieBlob);
+ } else {
+ // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called
+ const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, ieOutDims, getVpuLayout(ieBlobName)};
+ const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName));
+
+ copyBlob(tmpBlob, ieBlob);
+ }
}
}
// Implementation of methods of class Mvnc
//------------------------------------------------------------------------------
+Mvnc::Mvnc() {
+ WatchdogHndl_t* watchdogHndl = nullptr;
+ if (watchdog_create(&watchdogHndl) != WD_ERRNO) {
+ THROW_IE_EXCEPTION << "Cannot create watchdog.";
+ }
+
+ m_watcdogPtr = WatchdogUniquePtr(watchdogHndl, [](WatchdogHndl_t* watchdogHndl) {
+ watchdog_destroy(watchdogHndl);
+ });
+}
+
std::vector<ncDeviceDescr_t> Mvnc::AvailableDevicesDesc() const {
int deviceCount = 0;
std::vector<ncDeviceDescr_t> availableDevices(NC_MAX_DEVICES);
#pragma once
+#include <mvnc.h>
+#include <watchdog.h>
+
#include <functional>
#include <vector>
#include <memory>
#include <string>
-#include <mvnc.h>
namespace vpu {
namespace MyriadPlugin {
+using WatchdogUniquePtr = std::unique_ptr<WatchdogHndl_t, std::function<void(WatchdogHndl_t*)>>;
+
//------------------------------------------------------------------------------
// class IMvnc
// This is a class interface for accessing devices.
virtual std::vector<ncDeviceDescr_t> AvailableDevicesDesc() const = 0;
virtual std::vector<std::string> AvailableDevicesNames() const = 0;
+ virtual WatchdogHndl_t* watchdogHndl() = 0;
+
// Destructor
virtual ~IMvnc() = default;
};
class Mvnc : public IMvnc {
public:
+ Mvnc();
+ ~Mvnc() override = default;
+
// Operations
std::vector<ncDeviceDescr_t> AvailableDevicesDesc() const override;
std::vector<std::string> AvailableDevicesNames() const override;
+
+ WatchdogHndl_t* watchdogHndl() override {
+ return m_watcdogPtr.get();
+ }
+
+private:
+ WatchdogUniquePtr m_watcdogPtr;
};
} // namespace MyriadPlugin
vpu::DynamicToStaticShape().transform(function);
}
- return std::make_shared<ExecutableNetwork>(*clonedNetwork, _devicePool, parsedConfigCopy);
+ return std::make_shared<ExecutableNetwork>(*clonedNetwork, _mvnc, _devicePool, parsedConfigCopy);
}
void Engine::SetConfig(const std::map<std::string, std::string> &config) {
Engine::Engine(std::shared_ptr<IMvnc> mvnc) :
_mvnc(std::move(mvnc)),
_metrics(std::make_shared<MyriadMetrics>()) {
- if (!_mvnc) {
- THROW_IE_EXCEPTION << "mvnc is invalid";
- }
+ VPU_THROW_UNLESS(_mvnc, "mvnc is null");
_pluginName = "MYRIAD";
const auto executableNetwork =
std::make_shared<ExecutableNetwork>(
- model, _devicePool, parsedConfigCopy);
+ model, _mvnc, _devicePool, parsedConfigCopy);
return InferenceEngine::ExecutableNetwork{IExecutableNetwork::Ptr(
new ExecutableNetworkBase<ExecutableNetworkInternal>(executableNetwork),
explicit Engine(std::shared_ptr<IMvnc> mvnc);
~Engine() override {
- MyriadExecutor::closeDevices(_devicePool);
+ MyriadExecutor::closeDevices(_devicePool, _mvnc);
}
void SetConfig(const std::map<std::string, std::string>& config) override;
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+using InputShape = ngraph::PartialShape;
+using WeightsShape = ngraph::Shape;
+using EltwiseType = ngraph::NodeTypeInfo;
+using EltwiseShape = ngraph::Shape;
+using IsNegative = bool;
+
+class ConvFusionTests: public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<InputShape, WeightsShape, EltwiseType, EltwiseShape, IsNegative> > {
+public:
+ std::shared_ptr<ngraph::Function> f, f_ref;
+
+ void SetUp() override {
+ const auto& input_shape = std::get<0>(GetParam());
+ const auto& weights_shape = std::get<1>(GetParam());
+ const auto& eltwise_type = std::get<2>(GetParam());
+ const auto& eltwise_shape = std::get<3>(GetParam());
+ const auto& is_negative = std::get<4>(GetParam());
+
+ f = get_initial_function(input_shape, weights_shape, eltwise_type, eltwise_shape);
+
+ if (is_negative) {
+ f_ref = get_initial_function(input_shape, weights_shape, eltwise_type, eltwise_shape);
+ } else {
+ f_ref = get_reference_function(input_shape, weights_shape, eltwise_type, eltwise_shape);
+ }
+ }
+
+private:
+ std::shared_ptr<ngraph::Function> get_initial_function(const InputShape& input_shape,
+ const WeightsShape& weights_shape,
+ const EltwiseType& eltwise_type,
+ const EltwiseShape& eltwise_shape) {
+ auto spatial_dims = input_shape.rank().get_length() - 2;
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
+ auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
+ auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
+ ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
+
+ auto const_node = ngraph::opset1::Constant::create(ngraph::element::f32, eltwise_shape, {1.1});
+ ngraph::Output<ngraph::Node> eltwise;
+ if (eltwise_type == ngraph::opset1::Add::type_info) {
+ eltwise = std::make_shared<ngraph::opset1::Add>(conv, const_node);
+ } else if (eltwise_type == ngraph::opset1::Multiply::type_info) {
+ eltwise = std::make_shared<ngraph::opset1::Multiply>(conv, const_node);
+ } else {
+ throw ngraph::ngraph_error("Unsupported element type");
+ }
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltwise.get_node_shared_ptr()}, ngraph::ParameterVector{input});
+ }
+
+ std::shared_ptr<ngraph::Function> get_reference_function(const InputShape& input_shape,
+ const WeightsShape& weights_shape,
+ const EltwiseType& eltwise_type,
+ const EltwiseShape& eltwise_shape) {
+ auto spatial_dims = input_shape.rank().get_length() - 2;
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
+ ngraph::Output<ngraph::Node> weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
+ ngraph::Output<ngraph::Node> conv = std::make_shared<ngraph::op::ConvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1),
+ ngraph::Strides(spatial_dims, 1), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
+
+ ngraph::Output<ngraph::Node> const_node;
+ const_node = ngraph::opset1::Constant::create(ngraph::element::f32, eltwise_shape, {1.1});
+ if (eltwise_type == ngraph::opset1::Add::type_info) {
+ if (eltwise_shape.size() != 1) {
+ const_node = ngraph::op::util::reshapeTo(const_node, ngraph::Shape{ngraph::shape_size(eltwise_shape)});
+ }
+ conv = conv.get_node_shared_ptr()->copy_with_new_inputs({input, weights, const_node});
+ } else if (eltwise_type == ngraph::opset1::Multiply::type_info) {
+ if (eltwise_shape.size() > 1) {
+ const_node = ngraph::op::util::reshapeTo(const_node, ngraph::Shape{ngraph::shape_size(eltwise_shape)});
+ }
+ ngraph::Shape const_shape(weights_shape.size(), 1);
+ const_shape[0] = weights_shape[0];
+ weights = std::make_shared<ngraph::opset1::Multiply>(weights, ngraph::op::util::reshapeTo(const_node, const_shape));
+ conv = conv.get_node_shared_ptr()->copy_with_new_inputs({input, weights});
+ } else {
+ throw ngraph::ngraph_error("Unsupported element type");
+ }
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv.get_node_shared_ptr()}, ngraph::ParameterVector{input});
+ }
+};
+
+TEST_P(ConvFusionTests, CompareFunctions) {
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvFusion().run_on_function(f);
+ f->validate_nodes_and_infer_types();
+ // ASSERT_NO_THROW(check_rt_info(f));
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+using add = ngraph::opset1::Add;
+using mul = ngraph::opset1::Multiply;
+
+INSTANTIATE_TEST_CASE_P(ConvAddFusion, ConvFusionTests,
+ testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1, 1}, false),
+ std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1, 1}, false),
+ std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, add::type_info, EltwiseShape{9, 1, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, DYN, 64, 64}, WeightsShape{6, 3, 3, 4, 2}, add::type_info, EltwiseShape{6, 1, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, 64, DYN, 64}, WeightsShape{5, 3, 3, 4, 3}, add::type_info, EltwiseShape{5, 1, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, 64, 64, DYN}, WeightsShape{5, 3, 3, 4, 3}, add::type_info, EltwiseShape{5, 1, 1, 1}, false),
+ std::make_tuple(InputShape{1, 3, 64, 64}, WeightsShape{6, 3, 1, 1}, add::type_info, EltwiseShape{6, 1, 1}, false),
+ std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1}, add::type_info, EltwiseShape{7, 1, 1}, false),
+ std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2}, add::type_info, EltwiseShape{8, 1, 1}, false),
+ std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3}, add::type_info, EltwiseShape{9, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, DYN, 64}, WeightsShape{6, 3, 3, 4}, add::type_info, EltwiseShape{6, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, 64, DYN}, WeightsShape{5, 3, 3, 4}, add::type_info, EltwiseShape{5, 1, 1}, false),
+ std::make_tuple(InputShape{DYN, DYN, DYN}, WeightsShape{5, 3, 1}, add::type_info, EltwiseShape{5, 1}, false),
+ std::make_tuple(InputShape{DYN, 3, 10}, WeightsShape{3, 3, 1}, add::type_info, EltwiseShape{3, 1}, false),
+ std::make_tuple(InputShape{2, DYN, 9}, WeightsShape{2, 3, 2}, add::type_info, EltwiseShape{2, 1}, false),
+ std::make_tuple(InputShape{3, 3, DYN}, WeightsShape{1, 3, 3}, add::type_info, EltwiseShape{1, 1}, false)));
+
+INSTANTIATE_TEST_CASE_P(DISABLED_ConvAddFusionNegative, ConvFusionTests,
+ testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{2, 1}, true),
+ std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1}, true),
+ std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, add::type_info, EltwiseShape{9, 1, 1, 1, 1}, true)));
+
+INSTANTIATE_TEST_CASE_P(ConvMulFusion, ConvFusionTests,
+ testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1, 1}, false),
+ std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1, 1}, false),
+ std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, mul::type_info, EltwiseShape{9, 1, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, DYN, 64, 64}, WeightsShape{6, 3, 3, 4, 2}, mul::type_info, EltwiseShape{6, 1, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, 64, DYN, 64}, WeightsShape{5, 3, 3, 4, 3}, mul::type_info, EltwiseShape{5, 1, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, 64, 64, DYN}, WeightsShape{5, 3, 3, 4, 3}, mul::type_info, EltwiseShape{5, 1, 1, 1}, false),
+ std::make_tuple(InputShape{1, 3, 64, 64}, WeightsShape{6, 3, 1, 1}, mul::type_info, EltwiseShape{6, 1, 1}, false),
+ std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1}, mul::type_info, EltwiseShape{7, 1, 1}, false),
+ std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2}, mul::type_info, EltwiseShape{8, 1, 1}, false),
+ std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3}, mul::type_info, EltwiseShape{9, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, DYN, 64}, WeightsShape{6, 3, 3, 4}, mul::type_info, EltwiseShape{6, 1, 1}, false),
+ std::make_tuple(InputShape{3, 3, 64, DYN}, WeightsShape{5, 3, 3, 4}, mul::type_info, EltwiseShape{5, 1, 1}, false),
+ std::make_tuple(InputShape{DYN, DYN, DYN}, WeightsShape{5, 3, 1}, mul::type_info, EltwiseShape{5, 1}, false),
+ std::make_tuple(InputShape{DYN, 3, 10}, WeightsShape{3, 3, 1}, mul::type_info, EltwiseShape{3, 1}, false),
+ std::make_tuple(InputShape{2, DYN, 9}, WeightsShape{2, 3, 2}, mul::type_info, EltwiseShape{2, 1}, false),
+ std::make_tuple(InputShape{3, 3, DYN}, WeightsShape{1, 3, 3}, mul::type_info, EltwiseShape{1, 1}, false)));
+
+INSTANTIATE_TEST_CASE_P(DISABLED_ConvMulFusionNegative, ConvFusionTests,
+ testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{2, 1}, true),
+ std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1}, true),
+ std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, mul::type_info, EltwiseShape{9, 1, 1, 1, 1}, true)));
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <ngraph/pass/algebraic_simplification.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
+#include <transformations/convert_opset1_to_legacy/convert_convolutions.hpp>
+#include <ngraph_ops/convolution_ie.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+using InputShape = ngraph::PartialShape;
+using WeightsShape = ngraph::Shape;
+
+class ConvertConvolutionsTest: public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<InputShape, WeightsShape> > {
+public:
+ std::shared_ptr<ngraph::Function> f, f_ref;
+
+ void SetUp() override {
+ const auto& input_shape = std::get<0>(GetParam());
+ const auto& weights_shape = std::get<1>(GetParam());
+
+ f = get_initial_function(input_shape, weights_shape);
+ f_ref = get_reference_function(input_shape, weights_shape);
+ }
+
+private:
+ std::shared_ptr<ngraph::Function> get_initial_function(const ngraph::PartialShape & input_shape,
+ const ngraph::Shape & weights_shape) {
+ auto spatial_dims = input_shape.rank().get_length() - 2;
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
+ auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
+ auto conv = std::make_shared<ngraph::opset1::Convolution>(input, weights, ngraph::Strides(spatial_dims, 1),
+ ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::Strides(spatial_dims, 1));
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
+ }
+
+ std::shared_ptr<ngraph::Function> get_reference_function(const ngraph::PartialShape & input_shape,
+ const ngraph::Shape & weights_shape) {
+ auto spatial_dims = input_shape.rank().get_length() - 2;
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
+ auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
+ auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
+ ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
+ }
+};
+
+TEST_P(ConvertConvolutionsTest, CompareFunctions) {
+ const auto & orig_shape = f->get_output_partial_shape(0);
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertConvolutions().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+ ASSERT_TRUE(orig_shape.same_scheme(f->get_output_partial_shape(0))) << "Shape " << orig_shape << " is not equal to " << f->get_output_partial_shape(0);
+}
+
+INSTANTIATE_TEST_CASE_P(ConvertConvolution, ConvertConvolutionsTest,
+ testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}),
+ std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}),
+ std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}),
+ std::make_tuple(InputShape{3, 3, DYN, 64, 64}, WeightsShape{6, 3, 3, 4, 2}),
+ std::make_tuple(InputShape{3, 3, 64, DYN, 64}, WeightsShape{5, 3, 3, 4, 3}),
+ std::make_tuple(InputShape{3, 3, 64, 64, DYN}, WeightsShape{5, 3, 3, 4, 3}),
+ std::make_tuple(InputShape{1, 3, 64, 64}, WeightsShape{6, 3, 1, 1}),
+ std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1}),
+ std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2}),
+ std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3}),
+ std::make_tuple(InputShape{3, 3, DYN, 64}, WeightsShape{6, 3, 3, 4}),
+ std::make_tuple(InputShape{3, 3, 64, DYN}, WeightsShape{5, 3, 3, 4}),
+ std::make_tuple(InputShape{DYN, DYN, DYN}, WeightsShape{5, 3, 1}),
+ std::make_tuple(InputShape{DYN, 3, 10}, WeightsShape{3, 3, 1}),
+ std::make_tuple(InputShape{2, DYN, 9}, WeightsShape{2, 3, 2}),
+ std::make_tuple(InputShape{3, 3, DYN}, WeightsShape{1, 3, 3})));
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+std::shared_ptr<ngraph::Function> buildInputGraph(int64_t axis, int64_t group, const ::PartialShape& p) {
+ auto input = std::make_shared<::opset3::Parameter>(::element::f32, p);
+ auto shuffle_channels = std::make_shared<::opset3::ShuffleChannels>(input, axis, group);
+ shuffle_channels->set_friendly_name("shc");
+
+ auto f = std::make_shared<::Function>(::NodeVector{shuffle_channels}, ::ParameterVector{input});
+
+ ::pass::InitNodeInfo().run_on_function(f);
+ ::pass::ConvertShuffleChannels3().run_on_function(f);
+ f->validate_nodes_and_infer_types();
+ return f;
+}
+
+TEST(TransformationTests, ConvertShuffleChannelsAxis0) {
+ int64_t group = 4;
+ auto ps = ::PartialShape{12, Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()};
+ std::shared_ptr<ngraph::Function> f = buildInputGraph(0, group, ps), f_ref(nullptr);
+ ASSERT_NO_THROW(check_rt_info(f));
+
+ auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps);
+
+ auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+ auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0));
+ auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+ original_shape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+ ::opset2::Constant::create(element::i64, Shape({2}), {1, 3}));
+
+ ::OutputVector new_dims = {
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{group}),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1}),
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(1), reduce_axis_const, true)};
+
+ auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0);
+ auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false);
+ auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({3}),
+ {1, 0, 2}));
+ auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+ f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input});
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertShuffleChannelsAxis1) {
+ int64_t group = 4;
+ auto ps = ::PartialShape{Dimension::dynamic(), 12, Dimension::dynamic(), Dimension::dynamic()};
+ std::shared_ptr<ngraph::Function> f = buildInputGraph(1, group, ps), f_ref(nullptr);
+ ASSERT_NO_THROW(check_rt_info(f));
+
+ auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps);
+
+ auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+ auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0));
+ auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+ original_shape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+ ::opset2::Constant::create(element::i64, Shape({3}), {1, 1, 2}));
+
+ ::OutputVector new_dims = {
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{group}),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1}),
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)};
+
+ auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0);
+ auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false);
+ auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({4}),
+ {0, 2, 1, 3}));
+ auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+ f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input});
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertShuffleChannelsAxis2) {
+ int64_t group = 4;
+ auto ps = ::PartialShape{Dimension::dynamic(), Dimension::dynamic(), 12, Dimension::dynamic()};
+ std::shared_ptr<ngraph::Function> f = buildInputGraph(2, group, ps), f_ref(nullptr);
+ ASSERT_NO_THROW(check_rt_info(f));
+
+ auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps);
+
+ auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+ auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0));
+ auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+ original_shape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+ ::opset2::Constant::create(element::i64, Shape({3}), {2, 1, 1}));
+
+ ::OutputVector new_dims = {
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{group}),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1}),
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)};
+
+ auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0);
+ auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false);
+ auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({4}),
+ {0, 2, 1, 3}));
+ auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+ f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input});
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertShuffleChannelsLastAxis) {
+ int64_t group = 4;
+ auto ps = ::PartialShape{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), 12};
+ std::shared_ptr<ngraph::Function> f = buildInputGraph(-1, group, ps), f_ref(nullptr);
+ ASSERT_NO_THROW(check_rt_info(f));
+
+ auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps);
+
+ auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0});
+ auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0));
+ auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>(
+ original_shape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{0}),
+ ::opset2::Constant::create(element::i64, Shape({2}), {3, 1}));
+
+ ::OutputVector new_dims = {
+ std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{group}),
+ ::opset2::Constant::create(element::i64, Shape({1}), std::vector<int64_t>{-1})};
+
+ auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0);
+ auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false);
+ auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0),
+ ::opset2::Constant::create(element::i64, Shape({3}),
+ {0, 2, 1}));
+ auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false);
+
+ f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input});
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n";
+}
\ No newline at end of file
#include <assert.h>
#include <ngraph/function.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Function> & f1, const std::shared_ptr<ngraph::Function> & f2) {
/*
return std::string(typeInfo.name) + "/" + std::to_string(typeInfo.version);
};
+ std::ostringstream err_log;
+
std::queue<std::pair<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node> > > q;
q.push({f1_results[0], f2_results[0]});
while (!q.empty()) {
return {false, "Number of inputs is different: " + std::to_string(node1->inputs().size()) + " and " + std::to_string(node2->inputs().size())};
}
+ if (node1->outputs().size() != node2->outputs().size()) {
+ return {false, "Number of outputs is different: " + std::to_string(node1->outputs().size()) + " and " + std::to_string(node2->outputs().size())};
+ }
+
for (int i = 0; i < node1->inputs().size(); ++i) {
- if (!node1->input(i).get_partial_shape().compatible(node2->input(i).get_partial_shape())) {
- std::ostringstream out("Different shape detected");
- out << node1->input(i).get_partial_shape() << " and " << node2->input(i).get_partial_shape();
- return {false, out.str()};
+ if (!node1->input(i).get_partial_shape().same_scheme(node2->input(i).get_partial_shape())) {
+ err_log << "Different shape detected" << std::endl
+ << node1->description() << " Input(" << i << ") " << node1->input(i).get_partial_shape() << " and "
+ << node2->description() << " Input(" << i << ") " << node2->input(i).get_partial_shape() << std::endl;
}
q.push({node1->input_value(i).get_node_shared_ptr(), node2->input_value(i).get_node_shared_ptr()});
}
+
+ for (int i = 0; i < node1->outputs().size(); ++i) {
+ if (!node1->output(i).get_partial_shape().same_scheme(node2->output(i).get_partial_shape())) {
+ err_log << "Different shape detected" << std::endl
+ << node1->description() << " Output(" << i << ") " << node1->output(i).get_partial_shape() << " and "
+ << node2->description() << " Output(" << i << ") " << node2->output(i).get_partial_shape() << std::endl;
+ }
+ }
}
- return {true, ""};
+ return {err_log.str().empty(), err_log.str()};
}
void check_rt_info(const std::shared_ptr<ngraph::Function> & f) {
if (!err_msg.empty()) {
throw ngraph::ngraph_error(err_msg);
}
+}
+
+void visualize_function(std::shared_ptr<ngraph::Function> f, const std::string & file_name) {
+ std::vector<std::shared_ptr<ngraph::Function> > g{f};
+ ngraph::pass::VisualizeTree(file_name).run_on_module(g);
}
\ No newline at end of file
std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Function> & f1, const std::shared_ptr<ngraph::Function> & f2);
-void check_rt_info(const std::shared_ptr<ngraph::Function> & f);
\ No newline at end of file
+void check_rt_info(const std::shared_ptr<ngraph::Function> & f);
+
+void visualize_function(std::shared_ptr<ngraph::Function> f, const std::string & file_name);
\ No newline at end of file
InferenceEngine::CNNNetwork network(f);
- // Set PrimitivesPriority to all Convolutinos
+ // Set PrimitivesPriority to all Convolutions
auto nGraph = network.getFunction();
ASSERT_TRUE(nGraph);
for (auto & op : nGraph->get_ops()) {
TEST_P(TransposeToReshapeTests, CompareFunctions) {
ngraph::pass::InitNodeInfo().run_on_function(f);
ngraph::pass::AlgebraicSimplification().run_on_function(f);
+ f->validate_nodes_and_infer_types();
ASSERT_NO_THROW(check_rt_info(f));
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
testing::Values(std::make_tuple(InputShape{1, 3, 64, 1}, TransposeOrder{0, 1, 3, 2}, ReferenceParams({1, 3, 1, 64})),
std::make_tuple(InputShape{1, 3, 1, 64}, TransposeOrder{1, 0, 3, 2}, ReferenceParams({3, 1, 64, 1})),
std::make_tuple(InputShape{DYN, DYN, 1}, TransposeOrder{0, 2, 1}, ReferenceParams({0, 1, -1})),
- std::make_tuple(InputShape{1, 1, DYN}, TransposeOrder{2, 1, 0}, ReferenceParams({-1, 1, 1})),
+ std::make_tuple(InputShape{1, 1, DYN}, TransposeOrder{2, 1, 0}, ReferenceParams({-1, 0, 1})),
std::make_tuple(InputShape{DYN, 1, 64, 1}, TransposeOrder{1, 0, 3, 2}, ReferenceParams({1, -1, 1, 64}))));
INSTANTIATE_TEST_CASE_P(ReshapeWithGather, TransposeToReshapeTests,
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_constants.hpp>
+#include "behavior/add_output.hpp"
+#include "functional_test_utils/test_model/test_model.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+InferenceEngine::CNNNetwork getTargetNetwork() {
+ auto model = FuncTestUtils::TestModel::getModelWithMemory(InferenceEngine::Precision::FP32);
+ auto ie = PluginCache::get().ie();
+ return ie->ReadNetwork(model.model_xml_str, model.weights_blob);
+}
+
+addOutputsParams testCases[] = {addOutputsParams(getTargetNetwork(), {"Memory_1"}, CommonTestUtils::DEVICE_CPU)};
+
+INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputsTest, ::testing::ValuesIn(testCases),
+ AddOutputsTest::getTestCaseName);
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "multi-device/multi_device_config.hpp"
+
+#include "behavior/set_preprocess.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+ const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+ };
+
+ const std::vector<std::map<std::string, std::string>> configs = {
+ {},
+ {{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
+ {{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "0"}, {InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, "1"}}
+ };
+
+ const std::vector<std::map<std::string, std::string>> multiConfigs = {
+ {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}}
+ };
+
+ INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests,
+ ::testing::Combine(
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ::testing::ValuesIn(configs)),
+ PreProcessTests::getTestCaseName);
+
+ INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests,
+ ::testing::Combine(
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+ ::testing::ValuesIn(multiConfigs)),
+ PreProcessTests::getTestCaseName);
+} // namespace
\ No newline at end of file
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "other/add_output.hpp"
-
-const auto addOutputParams =
- ::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_CPU));
-
-INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
- AddOutputTestsCommonClass::getTestCaseName);
-
-TEST_P(AddOutputTestsCommonClass, basic) {
- run_test();
-}
--- /dev/null
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<size_t> numOutChannels = {1, 5, 16};
+
+/* ============= 2D ConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 3, 30, 30},
+ {1, 16, 10, 10},
+ {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}, {3, 5}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}, {1, 3}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}, {1, 1}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}, {2, 2}};
+
+const auto conv2DParams_ExplicitPadding = ::testing::Combine(
+ ::testing::ValuesIn(kernels2D),
+ ::testing::ValuesIn(strides2D),
+ ::testing::ValuesIn(padBegins2D),
+ ::testing::ValuesIn(padEnds2D),
+ ::testing::ValuesIn(dilations2D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto conv2DParams_AutoPadValid = ::testing::Combine(
+ ::testing::ValuesIn(kernels2D),
+ ::testing::ValuesIn(strides2D),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+ ::testing::ValuesIn(dilations2D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest,
+ ::testing::Combine(
+ conv2DParams_ExplicitPadding,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes2D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest,
+ ::testing::Combine(
+ conv2DParams_AutoPadValid,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes2D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D ConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 3, 10, 10, 10},
+ {1, 16, 5, 5, 5},
+ {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}, {1, 1, 1}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}, {2, 2, 2}};
+
+const auto conv3DParams_ExplicitPadding = ::testing::Combine(
+ ::testing::ValuesIn(kernels3D),
+ ::testing::ValuesIn(strides3D),
+ ::testing::ValuesIn(padBegins3D),
+ ::testing::ValuesIn(padEnds3D),
+ ::testing::ValuesIn(dilations3D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto conv3DParams_AutoPadValid = ::testing::Combine(
+ ::testing::ValuesIn(kernels3D),
+ ::testing::ValuesIn(strides3D),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+ ::testing::ValuesIn(dilations3D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest,
+ ::testing::Combine(
+ conv3DParams_ExplicitPadding,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes3D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest,
+ ::testing::Combine(
+ conv3DParams_AutoPadValid,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes3D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/cum_sum.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+const std::vector<std::vector<size_t>> shapes = {
+ {16},
+ {9, 15},
+ {16, 10, 12},
+ {5, 14, 5, 7},
+ {7, 8, 6, 7, 13}
+};
+
+const std::vector<InferenceEngine::Precision> inputPrecision = {
+ InferenceEngine::Precision::I8,
+ InferenceEngine::Precision::U8,
+ InferenceEngine::Precision::I16,
+ InferenceEngine::Precision::I32,
+ InferenceEngine::Precision::FP32
+};
+
+const std::vector<int64_t> axes = { 0, 1, 2, 3, 4 };
+const std::vector<int64_t> negativeAxes = { -1, -2, -3, -4, -5 };
+
+const std::vector<bool> exclusive = {true, false};
+const std::vector<bool> reverse = {true, false};
+
+const auto testCasesNegativeAxis = ::testing::Combine(
+ ::testing::Values(std::vector<size_t>{4, 16, 3, 6, 5}),
+ ::testing::Values(InferenceEngine::Precision::FP32),
+ ::testing::ValuesIn(negativeAxes),
+ ::testing::ValuesIn(exclusive),
+ ::testing::ValuesIn(reverse),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_0 = ::testing::Combine(
+ ::testing::ValuesIn(shapes),
+ ::testing::ValuesIn(inputPrecision),
+ ::testing::Values(axes[0]),
+ ::testing::ValuesIn(exclusive),
+ ::testing::ValuesIn(reverse),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_1 = ::testing::Combine(
+ ::testing::ValuesIn(std::vector<std::vector<size_t>>(shapes.begin() + 1, shapes.end())),
+ ::testing::ValuesIn(inputPrecision),
+ ::testing::Values(axes[1]),
+ ::testing::ValuesIn(exclusive),
+ ::testing::ValuesIn(reverse),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_2 = ::testing::Combine(
+ ::testing::ValuesIn(std::vector<std::vector<size_t>>(shapes.begin() + 2, shapes.end())),
+ ::testing::ValuesIn(inputPrecision),
+ ::testing::Values(axes[2]),
+ ::testing::ValuesIn(exclusive),
+ ::testing::ValuesIn(reverse),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_3 = ::testing::Combine(
+ ::testing::ValuesIn(std::vector<std::vector<size_t>>(shapes.begin() + 3, shapes.end())),
+ ::testing::ValuesIn(inputPrecision),
+ ::testing::Values(axes[3]),
+ ::testing::ValuesIn(exclusive),
+ ::testing::ValuesIn(reverse),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCasesAxis_4 = ::testing::Combine(
+ ::testing::ValuesIn(std::vector<std::vector<size_t>>(shapes.begin() + 4, shapes.end())),
+ ::testing::ValuesIn(inputPrecision),
+ ::testing::Values(axes[4]),
+ ::testing::ValuesIn(exclusive),
+ ::testing::ValuesIn(reverse),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_negative_axis, CumSumLayerTest, testCasesNegativeAxis, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_0, CumSumLayerTest, testCasesAxis_0, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_1, CumSumLayerTest, testCasesAxis_1, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_2, CumSumLayerTest, testCasesAxis_2, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_3, CumSumLayerTest, testCasesAxis_3, CumSumLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_4, CumSumLayerTest, testCasesAxis_4, CumSumLayerTest::getTestCaseName);
+
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// NOTE: WILL BE REWORKED (31905)
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ie_core.hpp"
+#include "single_layer_tests/eltwise.hpp"
+
+using namespace EltwiseTestNamespace;
+
+std::vector<EltwiseOpType> operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY };
+std::vector<ParameterInputIdx> primary_input_idx = { 0, 1 };
+std::vector<InputLayerType> secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER };
+std::vector<InferenceEngine::Precision> net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 };
+std::vector<InferenceEngine::SizeVector> flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} };
+std::vector<InferenceEngine::SizeVector> non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} };
+std::map<std::string, std::string> additional_config = {};
+
+const auto FlatEltwiseParams =
+::testing::Combine(
+ ::testing::ValuesIn(operations),
+ ::testing::ValuesIn(primary_input_idx),
+ ::testing::ValuesIn(secondary_input_types),
+ ::testing::ValuesIn(net_precisions),
+ ::testing::ValuesIn(flat_shapes),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ::testing::Values(additional_config));
+
+const auto NonFlatEltwiseParams =
+::testing::Combine(
+ ::testing::ValuesIn(operations),
+ ::testing::ValuesIn(primary_input_idx),
+ ::testing::ValuesIn(secondary_input_types),
+ ::testing::ValuesIn(net_precisions),
+ ::testing::ValuesIn(non_flat_shapes),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ::testing::Values(additional_config));
+
+INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams,
+ EltwiseLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams,
+ EltwiseLayerTest::getTestCaseName);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/fake_quantize.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6}};
+const std::vector<std::vector<size_t>> constShapes = {{1}};
+const std::vector<size_t> levels = {16, 255, 256};
+
+const auto fqParams = ::testing::Combine(
+ ::testing::ValuesIn(levels),
+ ::testing::ValuesIn(constShapes)
+);
+
+INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest,
+ ::testing::Combine(
+ fqParams,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ FakeQuantizeLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/group_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+const std::vector<size_t> numGroups = {2, 8, 16};
+
+/* ============= 2D GroupConvolution ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10},
+ {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+const auto groupConvBackpropData2DParams_ExplicitPadding = ::testing::Combine(
+ ::testing::ValuesIn(kernels2D),
+ ::testing::ValuesIn(strides2D),
+ ::testing::ValuesIn(padBegins2D),
+ ::testing::ValuesIn(padEnds2D),
+ ::testing::ValuesIn(dilations2D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::ValuesIn(numGroups),
+ ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine(
+ ::testing::ValuesIn(kernels2D),
+ ::testing::ValuesIn(strides2D),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+ ::testing::ValuesIn(dilations2D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::ValuesIn(numGroups),
+ ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(GroupConvBackpropData2D_ExplicitPadding, GroupConvBackpropDataLayerTest,
+ ::testing::Combine(
+ groupConvBackpropData2DParams_ExplicitPadding,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes2D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ GroupConvBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(GroupConvBackpropData2D_AutoPadValid, GroupConvBackpropDataLayerTest,
+ ::testing::Combine(
+ groupConvBackpropData2DParams_AutoPadValid,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes2D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ GroupConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D GroupConvolution ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5},
+ {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto groupConvBackpropData3DParams_ExplicitPadding = ::testing::Combine(
+ ::testing::ValuesIn(kernels3D),
+ ::testing::ValuesIn(strides3D),
+ ::testing::ValuesIn(padBegins3D),
+ ::testing::ValuesIn(padEnds3D),
+ ::testing::ValuesIn(dilations3D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::ValuesIn(numGroups),
+ ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine(
+ ::testing::ValuesIn(kernels3D),
+ ::testing::ValuesIn(strides3D),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+ ::testing::ValuesIn(dilations3D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::ValuesIn(numGroups),
+ ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(GroupConvBackpropData3D_ExplicitPadding, GroupConvBackpropDataLayerTest,
+ ::testing::Combine(
+ groupConvBackpropData3DParams_ExplicitPadding,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes3D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ GroupConvBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(GroupConvBackpropData3D_AutoPadValid, GroupConvBackpropDataLayerTest,
+ ::testing::Combine(
+ groupConvBackpropData3DParams_AutoPadValid,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes3D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ GroupConvBackpropDataLayerTest::getTestCaseName);
+
+} // namespace
std::vector<std::string> disabledTestPatterns() {
return {
// TODO: Issue 26264
- R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=CEIL.*)"
+ R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=CEIL.*)",
+ // TODO: Issue 31839
+ R"(.*(QuantConvBackpropData3D).*)",
+ // TODO: Issue 31841
+ R"(.*(QuantGroupConvBackpropData3D).*)",
+ // TODO: Issue 31843
+ R"(.*(QuantGroupConvBackpropData2D)*QG=Perchannel.*)",
+ // TODO: Issue 32023
+ R"(.*(QuantGroupConvBackpropData2D)*QG=Pertensor.*)",
+ // TODO: Issue 31845
+ R"(.*(FakeQuantize).*)"
};
}
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/quantized_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::helpers;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+
+const std::vector<size_t > levels = {256};
+// FIXME: Perchannel tests fail because of bug in LPT
+const std::vector<QuantizationGranularity > granularity = {Pertensor, Perchannel};
+
+/* ============= 2D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+
+const auto quantConvBackpropData2DParams = ::testing::Combine(
+ ::testing::ValuesIn(kernels2D),
+ ::testing::ValuesIn(strides2D),
+ ::testing::ValuesIn(padBegins2D),
+ ::testing::ValuesIn(padEnds2D),
+ ::testing::ValuesIn(dilations2D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::AUTO),
+ ::testing::ValuesIn(levels),
+ ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(QuantConvBackpropData2D, QuantConvBackpropDataLayerTest,
+ ::testing::Combine(
+ quantConvBackpropData2DParams,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes2D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ QuantConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D ConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto quantConvBackpropData3DParams = ::testing::Combine(
+ ::testing::ValuesIn(kernels3D),
+ ::testing::ValuesIn(strides3D),
+ ::testing::ValuesIn(padBegins3D),
+ ::testing::ValuesIn(padEnds3D),
+ ::testing::ValuesIn(dilations3D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::AUTO),
+ ::testing::ValuesIn(levels),
+ ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(QuantConvBackpropData3D, QuantConvBackpropDataLayerTest,
+ ::testing::Combine(
+ quantConvBackpropData3DParams,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes3D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ QuantConvBackpropDataLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/quantized_group_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::helpers;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+const std::vector<size_t> numGroups = {2, 8, 16};
+
+const std::vector<size_t > levels = {256};
+const std::vector<QuantizationGranularity > granularity = {Pertensor, Perchannel};
+
+/* ============= 2D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+
+const auto quantGroupConvBackpropData2DParams = ::testing::Combine(
+ ::testing::ValuesIn(kernels2D),
+ ::testing::ValuesIn(strides2D),
+ ::testing::ValuesIn(padBegins2D),
+ ::testing::ValuesIn(padEnds2D),
+ ::testing::ValuesIn(dilations2D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::ValuesIn(numGroups),
+ ::testing::Values(ngraph::op::PadType::AUTO),
+ ::testing::ValuesIn(levels),
+ ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(QuantGroupConvBackpropData2D, QuantGroupConvBackpropDataLayerTest,
+ ::testing::Combine(
+ quantGroupConvBackpropData2DParams,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes2D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ QuantGroupConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto quantGroupConvBackpropData3DParams = ::testing::Combine(
+ ::testing::ValuesIn(kernels3D),
+ ::testing::ValuesIn(strides3D),
+ ::testing::ValuesIn(padBegins3D),
+ ::testing::ValuesIn(padEnds3D),
+ ::testing::ValuesIn(dilations3D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::ValuesIn(numGroups),
+ ::testing::Values(ngraph::op::PadType::AUTO),
+ ::testing::ValuesIn(levels),
+ ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(QuantGroupConvBackpropData3D, QuantGroupConvBackpropDataLayerTest,
+ ::testing::Combine(
+ quantGroupConvBackpropData3DParams,
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::ValuesIn(inputShapes3D),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ QuantGroupConvBackpropDataLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_constants.hpp>
+#include "behavior/add_output.hpp"
+#include "functional_test_utils/test_model/test_model.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+InferenceEngine::CNNNetwork getTargetNetwork() {
+ auto model = FuncTestUtils::TestModel::getModelWithMemory(InferenceEngine::Precision::FP32);
+ auto ie = PluginCache::get().ie();
+ return ie->ReadNetwork(model.model_xml_str, model.weights_blob);
+}
+addOutputsParams testCases[] = {addOutputsParams(getTargetNetwork(), {"Memory_1"}, CommonTestUtils::DEVICE_GNA)};
+
+INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputsTest, ::testing::ValuesIn(testCases), AddOutputsTest::getTestCaseName);
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "other/add_output.hpp"
-
-const auto addOutputParams =
- ::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_GNA));
-
-INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams,
- AddOutputTestsCommonClass::getTestCaseName);
-
-TEST_P(AddOutputTestsCommonClass, basic) {
- run_test();
-}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// NOTE: WILL BE REWORKED (31905)
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ie_core.hpp"
+#include "single_layer_tests/eltwise.hpp"
+
+using namespace EltwiseTestNamespace;
+
+std::vector<EltwiseOpType> operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY };
+std::vector<ParameterInputIdx> primary_input_idx = { 0, 1 };
+std::vector<InputLayerType> secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER };
+std::vector<InferenceEngine::Precision> net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 };
+std::vector<InferenceEngine::SizeVector> flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} };
+std::vector<InferenceEngine::SizeVector> non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} };
+std::map<std::string, std::string> additional_config = { {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+ {"GNA_SCALE_FACTOR_0", "1638.4"}, {"GNA_SCALE_FACTOR_1", "1638.4"} };
+
+const auto FlatEltwiseParams =
+::testing::Combine(
+ ::testing::ValuesIn(operations),
+ ::testing::ValuesIn(primary_input_idx),
+ ::testing::ValuesIn(secondary_input_types),
+ ::testing::ValuesIn(net_precisions),
+ ::testing::ValuesIn(flat_shapes),
+ ::testing::Values(CommonTestUtils::DEVICE_GNA),
+ ::testing::Values(additional_config));
+
+const auto NonFlatEltwiseParams =
+::testing::Combine(
+ ::testing::ValuesIn(operations),
+ ::testing::ValuesIn(primary_input_idx),
+ ::testing::ValuesIn(secondary_input_types),
+ ::testing::ValuesIn(net_precisions),
+ ::testing::ValuesIn(non_flat_shapes),
+ ::testing::Values(CommonTestUtils::DEVICE_GNA),
+ ::testing::Values(additional_config));
+
+INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams,
+ EltwiseLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(DISABLED_Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams,
+ EltwiseLayerTest::getTestCaseName);
// TODO: FIX BUG 31661
".*Behavior.*Callback.*"
};
-}
\ No newline at end of file
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "multi-device/multi_device_config.hpp"
+
+#include "behavior/set_preprocess.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+ const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+ };
+
+ const std::vector<std::map<std::string, std::string>> configs = {
+ {},
+ };
+
+ const std::vector<std::map<std::string, std::string>> multiConfigs = {
+ {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}}
+ };
+
+ INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests,
+ ::testing::Combine(
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU),
+ ::testing::ValuesIn(configs)),
+ PreProcessTests::getTestCaseName);
+
+ INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests,
+ ::testing::Combine(
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+ ::testing::ValuesIn(multiConfigs)),
+ PreProcessTests::getTestCaseName);
+} // namespace
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<size_t> numOutChannels = {1, 5, 16};
+
+/* ============= 2D ConvolutionBackpropData ============= */
+const std::vector<InferenceEngine::Precision> netPrecisions2D = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 3, 30, 30},
+ {1, 16, 10, 10},
+ {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}, {3, 5}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 3}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}, {1, 1}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+const auto conv2DParams_ExplicitPadding = ::testing::Combine(
+ ::testing::ValuesIn(kernels2D),
+ ::testing::ValuesIn(strides2D),
+ ::testing::ValuesIn(padBegins2D),
+ ::testing::ValuesIn(padEnds2D),
+ ::testing::ValuesIn(dilations2D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto conv2DParams_AutoPadValid = ::testing::Combine(
+ ::testing::ValuesIn(kernels2D),
+ ::testing::ValuesIn(strides2D),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+ ::testing::ValuesIn(dilations2D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest,
+ ::testing::Combine(
+ conv2DParams_ExplicitPadding,
+ ::testing::ValuesIn(netPrecisions2D),
+ ::testing::ValuesIn(inputShapes2D),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest,
+ ::testing::Combine(
+ conv2DParams_AutoPadValid,
+ ::testing::ValuesIn(netPrecisions2D),
+ ::testing::ValuesIn(inputShapes2D),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D ConvolutionBackpropData ============= */
+const std::vector<InferenceEngine::Precision> netPrecisions3D = {
+ InferenceEngine::Precision::FP32,
+};
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 3, 10, 10, 10},
+ {1, 16, 5, 5, 5},
+ {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}, {1, 1, 1}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto conv3DParams_ExplicitPadding = ::testing::Combine(
+ ::testing::ValuesIn(kernels3D),
+ ::testing::ValuesIn(strides3D),
+ ::testing::ValuesIn(padBegins3D),
+ ::testing::ValuesIn(padEnds3D),
+ ::testing::ValuesIn(dilations3D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto conv3DParams_AutoPadValid = ::testing::Combine(
+ ::testing::ValuesIn(kernels3D),
+ ::testing::ValuesIn(strides3D),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+ ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+ ::testing::ValuesIn(dilations3D),
+ ::testing::ValuesIn(numOutChannels),
+ ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest,
+ ::testing::Combine(
+ conv3DParams_ExplicitPadding,
+ ::testing::ValuesIn(netPrecisions3D),
+ ::testing::ValuesIn(inputShapes3D),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest,
+ ::testing::Combine(
+ conv3DParams_AutoPadValid,
+ ::testing::ValuesIn(netPrecisions3D),
+ ::testing::ValuesIn(inputShapes3D),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+ ConvolutionBackpropDataLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// NOTE: WILL BE REWORKED (31905)
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ie_core.hpp"
+#include "single_layer_tests/eltwise.hpp"
+
+using namespace EltwiseTestNamespace;
+
+std::vector<EltwiseOpType> operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY };
+std::vector<ParameterInputIdx> primary_input_idx = { 0, 1 };
+std::vector<InputLayerType> secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER };
+std::vector<InferenceEngine::Precision> net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 };
+std::vector<InferenceEngine::SizeVector> flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} };
+std::vector<InferenceEngine::SizeVector> non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} };
+std::map<std::string, std::string> additional_config = {};
+
+const auto FlatEltwiseParams =
+::testing::Combine(
+ ::testing::ValuesIn(operations),
+ ::testing::ValuesIn(primary_input_idx),
+ ::testing::ValuesIn(secondary_input_types),
+ ::testing::ValuesIn(net_precisions),
+ ::testing::ValuesIn(flat_shapes),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU),
+ ::testing::Values(additional_config));
+
+const auto NonFlatEltwiseParams =
+::testing::Combine(
+ ::testing::ValuesIn(operations),
+ ::testing::ValuesIn(primary_input_idx),
+ ::testing::ValuesIn(secondary_input_types),
+ ::testing::ValuesIn(net_precisions),
+ ::testing::ValuesIn(non_flat_shapes),
+ ::testing::Values(CommonTestUtils::DEVICE_GPU),
+ ::testing::Values(additional_config));
+
+INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams,
+ EltwiseLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams,
+ EltwiseLayerTest::getTestCaseName);
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "multi-device/multi_device_config.hpp"
+
+#include "behavior/set_preprocess.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+ const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP16
+ };
+
+ const std::vector<std::map<std::string, std::string>> configs = {
+ {},
+ };
+
+ const std::vector<std::map<std::string, std::string>> multiConfigs = {
+ {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_MYRIAD}}
+ };
+
+ INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests,
+ ::testing::Combine(
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
+ ::testing::ValuesIn(configs)),
+ PreProcessTests::getTestCaseName);
+
+ INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests,
+ ::testing::Combine(
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+ ::testing::ValuesIn(multiConfigs)),
+ PreProcessTests::getTestCaseName);
+} // namespace
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/equal.hpp"
+
+#include "common_test_utils/test_constants.hpp"
+
+#include <vector>
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+std::vector<std::vector<InferenceEngine::SizeVector>> inShapes = {
+ {{200}, {200}},
+ {{1000}, {1}},
+ {{1, 256, 512}, {1, 256, 512}},
+ {{1}, {1, 256, 512}},
+};
+
+INSTANTIATE_TEST_CASE_P(equalS32, EqualLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inShapes),
+ ::testing::Values(InferenceEngine::Precision::I32),
+ ::testing::Values(InferenceEngine::Precision::I32),
+ ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
+ EqualLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/greater.hpp"
+
+#include "common_test_utils/test_constants.hpp"
+
+#include <vector>
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+std::vector<std::vector<InferenceEngine::SizeVector>> inShapes = {
+ {{200}, {200}},
+ {{1000}, {1}},
+ {{1, 256, 512}, {1, 256, 512}},
+ {{1}, {1, 256, 512}},
+};
+
+INSTANTIATE_TEST_CASE_P(greaterS32, GreaterLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inShapes),
+ ::testing::Values(InferenceEngine::Precision::FP16),
+ ::testing::Values(InferenceEngine::Precision::I32),
+ ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
+ GreaterLayerTest::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+
+#include "common_test_utils/test_common.hpp"
+#include <ie_core.hpp>
+
+typedef std::tuple<
+ InferenceEngine::CNNNetwork, // CNNNetwork to work with
+ std::vector<std::string>, // Target layers to add as outputs
+ std::string> // Target device name
+ addOutputsParams;
+
+class AddOutputsTest : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<addOutputsParams> {
+protected:
+ InferenceEngine::CNNNetwork net;
+ std::vector<std::string> outputsToAdd;
+ std::string deviceName;
+
+ void SetUp();
+public:
+ static std::string getTestCaseName(const testing::TestParamInfo<addOutputsParams> &obj);
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include "ie_extension.h"
+#include <condition_variable>
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+ typedef std::tuple<
+ InferenceEngine::Precision, // Network precision
+ std::string, // Device name
+ std::map<std::string, std::string> // Config
+ > PreProcessParams;
+
+class PreProcessTests : public testing::WithParamInterface<PreProcessParams>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<PreProcessParams> obj);
+
+protected:
+ void SetUp() override;
+ void TearDown() override;
+};
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <map>
-
-#include "common_test_utils/common_layers_params.hpp"
-#include "common_test_utils/common_utils.hpp"
-#include "common_test_utils/test_common.hpp"
-#include "common_test_utils/test_constants.hpp"
-#include "common_test_utils/xml_net_builder/ir_net.hpp"
-#include "common_test_utils/xml_net_builder/xml_filler.hpp"
-#include "ie_core.hpp"
-
-class AddOutputTestsCommonClass : public CommonTestUtils::TestsCommon,
- public testing::WithParamInterface<std::tuple<std::string, std::string>> {
-private:
- static std::string generate_model();
-
-public:
- static std::string getTestCaseName(testing::TestParamInfo<std::tuple<std::string, std::string>> obj);
- void run_test();
-};
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+ InferenceEngine::SizeVector, // Kernel size
+ InferenceEngine::SizeVector, // Strides
+ std::vector<ptrdiff_t>, // Pad begin
+ std::vector<ptrdiff_t>, // Pad end
+ InferenceEngine::SizeVector, // Dilation
+ size_t, // Num out channels
+ ngraph::op::PadType // Padding type
+> convBackpropDataSpecificParams;
+typedef std::tuple<
+ convBackpropDataSpecificParams,
+ InferenceEngine::Precision, // Net precision
+ InferenceEngine::SizeVector, // Input shapes
+ LayerTestsUtils::TargetDevice // Device name
+> convBackpropDataLayerTestParamsSet;
+namespace LayerTestsDefinitions {
+
+
+class ConvolutionBackpropDataLayerTest : public testing::WithParamInterface<convBackpropDataLayerTestParamsSet>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<convBackpropDataLayerTestParamsSet> obj);
+
+protected:
+ void SetUp() override;
+};
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+ InferenceEngine::SizeVector, // Input shapes
+ InferenceEngine::Precision, // Input precision
+ int64_t, // Axis
+ bool, // Exclusive
+ bool, // Reverse
+ std::string> cumSumParams; // Device name
+
+class CumSumLayerTest : public testing::WithParamInterface<cumSumParams>, public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<cumSumParams> obj);
+
+protected:
+ void SetUp() override;
+};
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// NOTE: WILL BE REWORKED (31905)
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "ie_core.hpp"
+
+namespace EltwiseTestNamespace {
+
+ using ParameterInputIdx = int;
+ enum class InputLayerType {
+ CONSTANT,
+ PARAMETER
+ };
+ enum class EltwiseOpType {
+ ADD,
+ SUBSTRACT,
+ MULTIPLY
+ };
+ const char* InputLayerType_to_string(InputLayerType lt);
+ const char* EltwiseOpType_to_string(EltwiseOpType eOp);
+}// namespace EltwiseTestNamespace
+
+typedef std::tuple<
+ EltwiseTestNamespace::EltwiseOpType, // eltwise op type
+ EltwiseTestNamespace::ParameterInputIdx, // primary input idx
+ EltwiseTestNamespace::InputLayerType, // secondary input type
+ InferenceEngine::Precision, // Net precision
+ InferenceEngine::SizeVector, // Input shapes
+ std::string, // Device name
+ std::map<std::string, std::string> // Additional network configuration
+> eltwiseLayerTestParamsSet;
+
+class EltwiseLayerTest : public testing::WithParamInterface<eltwiseLayerTestParamsSet>,
+ public LayerTestsUtils::LayerTestsCommon {
+protected:
+ void SetUp() override;
+
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<eltwiseLayerTestParamsSet> obj);
+};
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+namespace LayerTestsDefinitions {
+
+using EqualTestParam = typename std::tuple<
+ std::vector<InferenceEngine::SizeVector>, // Input shapes
+ InferenceEngine::Precision, // Input precision
+ InferenceEngine::Precision, // Output precision
+ LayerTestsUtils::TargetDevice>; // Config
+
+class EqualLayerTest : public testing::WithParamInterface<EqualTestParam>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(const testing::TestParamInfo<EqualTestParam>& obj);
+
+protected:
+ void SetUp() override;
+};
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+ size_t, // levels
+ std::vector<size_t> // const inputs shape
+> fqSpecificParams;
+typedef std::tuple<
+ fqSpecificParams,
+ InferenceEngine::Precision, // Net precision
+ InferenceEngine::SizeVector, // Input shapes
+ LayerTestsUtils::TargetDevice // Device name
+> fqLayerTestParamsSet;
+namespace LayerTestsDefinitions {
+
+
+class FakeQuantizeLayerTest : public testing::WithParamInterface<fqLayerTestParamsSet>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj);
+
+protected:
+ void SetUp() override;
+};
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+namespace LayerTestsDefinitions {
+
+using GreaterTestParam = typename std::tuple<
+ std::vector<InferenceEngine::SizeVector>, // Input shapes
+ InferenceEngine::Precision, // Input precision
+ InferenceEngine::Precision, // Output precision
+ LayerTestsUtils::TargetDevice>; // Config
+
+class GreaterLayerTest : public testing::WithParamInterface<GreaterTestParam>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(const testing::TestParamInfo<GreaterTestParam>& obj);
+
+protected:
+ void SetUp() override;
+};
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+ InferenceEngine::SizeVector,
+ InferenceEngine::SizeVector,
+ std::vector<ptrdiff_t>,
+ std::vector<ptrdiff_t>,
+ InferenceEngine::SizeVector,
+ size_t,
+ size_t,
+ ngraph::op::PadType> groupConvBackpropDataSpecificParams;
+typedef std::tuple<
+ groupConvBackpropDataSpecificParams,
+ InferenceEngine::Precision,
+ InferenceEngine::SizeVector,
+ LayerTestsUtils::TargetDevice> groupConvBackpropDataLayerTestParamsSet;
+
+namespace LayerTestsDefinitions {
+
+class GroupConvBackpropDataLayerTest : public testing::WithParamInterface<groupConvBackpropDataLayerTestParamsSet>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<groupConvBackpropDataLayerTestParamsSet> obj);
+
+protected:
+ void SetUp() override;
+};
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+ InferenceEngine::SizeVector,
+ InferenceEngine::SizeVector,
+ std::vector<ptrdiff_t>,
+ std::vector<ptrdiff_t>,
+ InferenceEngine::SizeVector,
+ size_t,
+ ngraph::op::PadType,
+ size_t,
+ ngraph::helpers::QuantizationGranularity> quantConvBackpropDataSpecificParams;
+typedef std::tuple<
+ quantConvBackpropDataSpecificParams,
+ InferenceEngine::Precision,
+ InferenceEngine::SizeVector,
+ LayerTestsUtils::TargetDevice> quantConvBackpropDataLayerTestParamsSet;
+
+namespace LayerTestsDefinitions {
+
+class QuantConvBackpropDataLayerTest : public testing::WithParamInterface<quantConvBackpropDataLayerTestParamsSet>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<quantConvBackpropDataLayerTestParamsSet> obj);
+
+protected:
+ void SetUp() override;
+};
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+typedef std::tuple<
+ InferenceEngine::SizeVector,
+ InferenceEngine::SizeVector,
+ std::vector<ptrdiff_t>,
+ std::vector<ptrdiff_t>,
+ InferenceEngine::SizeVector,
+ size_t,
+ size_t,
+ ngraph::op::PadType,
+ size_t,
+ ngraph::helpers::QuantizationGranularity> quantGroupConvBackpropDataSpecificParams;
+typedef std::tuple<
+ quantGroupConvBackpropDataSpecificParams,
+ InferenceEngine::Precision,
+ InferenceEngine::SizeVector,
+ LayerTestsUtils::TargetDevice> quantGroupConvBackpropDataLayerTestParamsSet;
+
+namespace LayerTestsDefinitions {
+
+class QuantGroupConvBackpropDataLayerTest : public testing::WithParamInterface<quantGroupConvBackpropDataLayerTestParamsSet>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<quantGroupConvBackpropDataLayerTestParamsSet> obj);
+
+protected:
+ void SetUp() override;
+};
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <common_test_utils/common_utils.hpp>
+#include "behavior/add_output.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+std::string AddOutputsTest::getTestCaseName(const testing::TestParamInfo<addOutputsParams> &obj) {
+ std::ostringstream results;
+ InferenceEngine::CNNNetwork net;
+ std::vector<std::string> outputsToAdd;
+ std::string deviceName;
+ std::tie(net, outputsToAdd, deviceName) = obj.param;
+ results << "Outputs:" << CommonTestUtils::vec2str<std::string>(outputsToAdd);
+ return results.str();
+}
+
+void AddOutputsTest::SetUp() {
+ std::tie(net, outputsToAdd, deviceName) = GetParam();
+}
+
+TEST_P(AddOutputsTest, smoke_CheckOutputExist) {
+ std::vector<std::string> expectedOutputs = outputsToAdd;
+ for (const auto &out : net.getOutputsInfo()) {
+ expectedOutputs.push_back(out.first);
+ }
+ for (const auto &out : outputsToAdd) {
+ net.addOutput(out);
+ }
+ auto ie = PluginCache::get().ie(deviceName);
+ auto executableNet = ie->LoadNetwork(net, deviceName);
+ auto outputs = executableNet.GetOutputsInfo();
+
+ for (const auto &out : expectedOutputs) {
+ ASSERT_TRUE(outputs.count(out)) << "Layer " << out << " expected to be in network outputs but it's not!";
+ }
+}
\ No newline at end of file
ASSERT_NE(std::string(resp.msg).find("returnGeneralErrorIfCallbackThrowException"), std::string::npos);
}
-} // namespace LayerTestsDefinitions
\ No newline at end of file
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+#include <cpp_interfaces/exception2status.hpp>
+#include "common_test_utils/test_assertions.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ie_preprocess.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+#include "ngraph_functions/subgraph_builders.hpp"
+#include "behavior/set_preprocess.hpp"
+
+namespace LayerTestsDefinitions {
+ std::string PreProcessTests::getTestCaseName(testing::TestParamInfo<PreProcessParams> obj) {
+ InferenceEngine::Precision netPrecision;
+ std::string targetDevice;
+ std::map<std::string, std::string> configuration;
+ std::tie(netPrecision, targetDevice, configuration) = obj.param;
+ std::ostringstream result;
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+ if (!configuration.empty()) {
+ result << "configItem=" << configuration.begin()->first << "_" << configuration.begin()->second;
+ }
+ return result.str();
+ }
+
+ void PreProcessTests::SetUp() {
+ InferenceEngine::Precision netPrecision;
+ std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
+ function = ngraph::builder::subgraph::makeConvPoolRelu();
+ }
+
+ void PreProcessTests::TearDown() {
+ if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
+ PluginCache::get().reset();
+ }
+ }
+
+TEST_P(PreProcessTests, SetPreProcessToInputInfo) {
+ // Skip test according to plugin specific disabledTestPatterns() (if any)
+ SKIP_IF_CURRENT_TEST_IS_DISABLED()
+ // Create CNNNetwork from ngrpah::Function
+ InferenceEngine::CNNNetwork cnnNet(function);
+
+ auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess();
+ preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR);
+
+ // Get Core from cache
+ auto ie = PluginCache::get().ie();
+ // Load CNNNetwork to target plugins
+ auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration);
+ // Create InferRequest
+ auto req = execNet.CreateInferRequest();
+ {
+ InferenceEngine::ConstInputsDataMap inputsMap = execNet.GetInputsInfo();
+ const auto& name = inputsMap.begin()->second->name();
+ const InferenceEngine::PreProcessInfo *info = &req.GetPreProcess(name.c_str());
+ ASSERT_EQ(info->getResizeAlgorithm(), InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR);
+ ASSERT_PREPROCESS_INFO_EQ(preProcess, *info);
+ }
+ function.reset();
+ }
+
+TEST_P(PreProcessTests, SetPreProcessToInferRequest) {
+ // Skip test according to plugin specific disabledTestPatterns() (if any)
+ SKIP_IF_CURRENT_TEST_IS_DISABLED()
+ // Create CNNNetwork from ngrpah::Function
+ InferenceEngine::CNNNetwork cnnNet(function);
+
+ auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess();
+ preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR);
+
+ // Get Core from cache
+ auto ie = PluginCache::get().ie();
+ // Load CNNNetwork to target plugins
+ auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration);
+ // Create InferRequest
+ auto req = execNet.CreateInferRequest();
+ InferenceEngine::ConstInputsDataMap inputsMap = execNet.GetInputsInfo();
+ const auto& name = inputsMap.begin()->second->name();
+ auto inputBlob = FuncTestUtils::createAndFillBlob(
+ cnnNet.getInputsInfo().begin()->second->getTensorDesc());
+ req.SetBlob(cnnNet.getInputsInfo().begin()->first, inputBlob);
+ {
+ const InferenceEngine::PreProcessInfo *info = &req.GetPreProcess(name.c_str());
+ ASSERT_EQ(cnnNet.getInputsInfo().begin()->second->getPreProcess().getResizeAlgorithm(),
+ info->getResizeAlgorithm());
+ }
+ function.reset();
+ }
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
+++ /dev/null
-// Copyright (C) 2020 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "other/add_output.hpp"
-
-// TODO: Replace IRBuilder with NGraph when it supports Memory Layer
-std::string AddOutputTestsCommonClass::generate_model() {
- CommonTestUtils::IRBuilder_v6 test_model_builder("model");
-
- auto precision = InferenceEngine::Precision::FP32;
-
- auto Memory_1_layer =
- test_model_builder.AddLayer("Memory_1", "Memory", precision, {{"id", "r_1-3"}, {"index", "1"}, {"size", "2"}})
- .AddOutPort({1, 200})
- .getLayer();
- auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", precision).AddOutPort({1, 200}).getLayer();
- auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", precision, {{"operation", "mul"}})
- .AddInPort({1, 200})
- .AddInPort({1, 200})
- .AddOutPort({1, 200})
- .getLayer();
-
- auto Activation_4_layer =
- test_model_builder.AddLayer("Activation_4", "Activation", precision, {{"type", "sigmoid"}})
- .AddInPort({1, 200})
- .AddOutPort({1, 200})
- .getLayer();
- auto Memory_5_layer =
- test_model_builder.AddLayer("Memory_5", "Memory", precision, {{"id", "r_1-3"}, {"index", "0"}, {"size", "2"}})
- .AddInPort({1, 200})
- .getLayer();
-
- test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0));
- test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1));
- test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0));
- test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0));
-
- auto serial = test_model_builder.serialize();
-
- return serial;
-}
-
-std::string AddOutputTestsCommonClass::getTestCaseName(
- testing::TestParamInfo<std::tuple<std::string, std::string>> obj) {
- std::string layer;
- std::string engine;
-
- std::tie(layer, engine) = obj.param;
- return layer + "_" + engine;
-}
-
-void AddOutputTestsCommonClass::run_test() {
- std::string layer_name;
- std::string engine_type;
-
- std::tie(layer_name, engine_type) = this->GetParam();
-
- auto model = this->generate_model();
-
- InferenceEngine::Core ie;
- InferenceEngine::CNNNetwork network;
- InferenceEngine::ExecutableNetwork executableNet;
-
- auto null_blob = CommonTestUtils::getWeightsBlob(0);
- network = ie.ReadNetwork(model, null_blob);
- network.addOutput(layer_name);
- executableNet = ie.LoadNetwork(network, engine_type);
-
- auto outputs = executableNet.GetOutputsInfo();
-
- auto layer_output = outputs[layer_name];
-
- ASSERT_EQ(true, layer_output && "layer not found in outputs");
-}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/convolution_backprop_data.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<convBackpropDataLayerTestParamsSet> obj) {
+ convBackpropDataSpecificParams convBackpropDataParams;
+ InferenceEngine::Precision netPrecision;
+ InferenceEngine::SizeVector inputShapes;
+ std::string targetDevice;
+ std::tie(convBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param;
+ ngraph::op::PadType padType;
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ std::vector<ptrdiff_t> padBegin, padEnd;
+ size_t convOutChannels;
+ std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convBackpropDataParams;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+ result << "S" << CommonTestUtils::vec2str(stride) << "_";
+ result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+ result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+ result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+ result << "O=" << convOutChannels << "_";
+ result << "AP=" << padType << "_";
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+ return result.str();
+}
+
+void ConvolutionBackpropDataLayerTest::SetUp() {
+ convBackpropDataSpecificParams convBackpropDataParams;
+ std::vector<size_t> inputShape;
+ auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+ std::tie(convBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+ ngraph::op::PadType padType;
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ std::vector<ptrdiff_t> padBegin, padEnd;
+ size_t convOutChannels;
+ std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convBackpropDataParams;
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+ auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+ auto paramOuts = ngraph::helpers::convert2OutputVector(
+ ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+ auto convBackpropData = std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(
+ ngraph::builder::makeConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin,
+ padEnd, dilation, padType, convOutChannels));
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(convBackpropData)};
+ function = std::make_shared<ngraph::Function>(results, params, "convolutionBackpropData");
+}
+
+TEST_P(ConvolutionBackpropDataLayerTest, CompareWithRefs) {
+ Run();
+}
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/cum_sum.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string CumSumLayerTest::getTestCaseName(testing::TestParamInfo<cumSumParams> obj) {
+ InferenceEngine::SizeVector inputShapes;
+ InferenceEngine::Precision inputPrecision;
+ int64_t axis;
+ bool exclusive, reverse;
+ std::string targetDevice;
+ std::tie(inputShapes, inputPrecision, axis, exclusive, reverse, targetDevice) = obj.param;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "Precision=" << inputPrecision.name() << "_";
+ result << "Axis=" << axis << "_";
+ result << "Exclusive=" << (exclusive ? "TRUE" : "FALSE") << "_";
+ result << "Reverse=" << (reverse ? "TRUE" : "FALSE") << "_";
+ result << "TargetDevice=" << targetDevice;
+ return result.str();
+}
+
+void CumSumLayerTest::SetUp() {
+ InferenceEngine::SizeVector inputShapes;
+ InferenceEngine::Precision inputPrecision;
+ bool exclusive, reverse;
+ int64_t axis;
+ std::tie(inputShapes, inputPrecision, axis, exclusive, reverse, targetDevice) = this->GetParam();
+ auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
+ ngraph::ParameterVector paramVector;
+ auto paramData = std::make_shared<ngraph::opset1::Parameter>(inType, ngraph::Shape(inputShapes));
+ paramVector.push_back(paramData);
+
+ auto axisNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, std::vector<int64_t>{axis})->output(0);
+
+ auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(paramVector));
+ auto cumSum = std::dynamic_pointer_cast<ngraph::op::CumSum>(ngraph::builder::makeCumSum(paramOuts[0], axisNode, exclusive, reverse));
+
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(cumSum)};
+ function = std::make_shared<ngraph::Function>(results, paramVector, "cumsum");
+}
+
+TEST_P(CumSumLayerTest, CompareWithRefs) {
+ Run();
+};
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "common_test_utils/common_layers_params.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+#include "common_test_utils/xml_net_builder/xml_filler.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ie_core.hpp"
+#include "single_layer_tests/eltwise.hpp"
+
+using namespace EltwiseTestNamespace;
+
+std::string EltwiseLayerTest::getTestCaseName(testing::TestParamInfo<eltwiseLayerTestParamsSet> obj) {
+ EltwiseOpType op;
+ ParameterInputIdx primary_input_idx;
+ InputLayerType secondary_input_type;
+ InferenceEngine::Precision prec;
+ InferenceEngine::SizeVector vec;
+ LayerTestsUtils::TargetDevice dev;
+ std::map<std::string, std::string> additional_config;
+ std::tie(op, primary_input_idx, secondary_input_type, prec, vec, dev, additional_config) = obj.param;
+
+ std::ostringstream result;
+ result << "operation=" << EltwiseOpType_to_string(op) << "_";
+ result << "netPRC=" << prec.name() << "_";
+ result << "primaryInputIdx=" << primary_input_idx << "_";
+ result << "secondaryInputType=" << InputLayerType_to_string(secondary_input_type) << "_";
+ result << "inputShapes=" << CommonTestUtils::vec2str(vec) << "_";
+ result << "targetDevice=" << dev;
+ return result.str();
+}
+
+void EltwiseLayerTest::SetUp() {
+ EltwiseOpType op;
+ ParameterInputIdx primary_input_idx;
+ InputLayerType secondary_input_type;
+ InferenceEngine::SizeVector inputShape;
+ InferenceEngine::Precision netPrecision;
+ ngraph::ParameterVector parameter_inputs;
+ std::map<std::string, std::string> additional_config;
+ std::tie(op, primary_input_idx, secondary_input_type, netPrecision, inputShape, targetDevice, additional_config) = this->GetParam();
+ configuration.insert(additional_config.begin(), additional_config.end());
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+ std::shared_ptr<ngraph::Node> input0_node;
+ std::shared_ptr<ngraph::Node> input1_node;
+ auto primary_input = ngraph::builder::makeParams(ngPrc, { inputShape })[0];
+
+ switch (secondary_input_type) {
+ case InputLayerType::CONSTANT:
+ {
+ auto shape_total = 1;
+ for (auto dim : inputShape) {
+ shape_total *= dim;
+ }
+
+ const float min = -10;
+ const float max = 10;
+ const float range = max - min;
+ const float step = range / shape_total;
+
+ std::vector<float> const_vec(shape_total);
+ for (int i = 0; i < shape_total; i++) {
+ const_vec[i] = min + step * i;
+ }
+
+ auto const_vals = ngraph::builder::makeConstant(ngPrc, inputShape, const_vec);
+ parameter_inputs.push_back(primary_input);
+
+ if (primary_input_idx == 0) {
+ input0_node = primary_input;
+ input1_node = const_vals;
+ } else {
+ input0_node = const_vals;
+ input1_node = primary_input;
+ }
+ break;
+ }
+ case InputLayerType::PARAMETER:
+ {
+ auto secondary_input = ngraph::builder::makeParams(ngPrc, { inputShape })[0];
+ if (primary_input_idx == 0) {
+ parameter_inputs.push_back(primary_input);
+ parameter_inputs.push_back(secondary_input);
+ input0_node = primary_input;
+ input1_node = secondary_input;
+ } else {
+ parameter_inputs.push_back(secondary_input);
+ parameter_inputs.push_back(primary_input);
+ input0_node = secondary_input;
+ input1_node = primary_input;
+ }
+ break;
+ }
+ default:
+ ASSERT_EQ("unknown input type", "");
+ break;
+ }
+
+ std::shared_ptr<ngraph::op::util::BinaryElementwiseArithmetic> ngraph_op = nullptr;
+ switch (op) {
+ case EltwiseOpType::ADD:
+ ngraph_op = std::make_shared<ngraph::op::Add>(input0_node, input1_node);
+ break;
+ case EltwiseOpType::MULTIPLY:
+ ngraph_op = std::make_shared<ngraph::op::Multiply>(input0_node, input1_node);
+ break;
+ case EltwiseOpType::SUBSTRACT:
+ ngraph_op = std::make_shared<ngraph::op::Subtract>(input0_node, input1_node);
+ break;
+ default:
+ ASSERT_EQ(std::string("Unknown Eltwise operation type: ") + EltwiseOpType_to_string(op), "");
+ break;
+ }
+ function = std::make_shared<ngraph::Function>(ngraph_op, parameter_inputs, "Eltwise_op");
+}
+
+const char* EltwiseTestNamespace::InputLayerType_to_string(InputLayerType lt) {
+ switch (lt) {
+ case InputLayerType::CONSTANT:
+ return "CONSTANT";
+ case InputLayerType::PARAMETER:
+ return "PARAMETER";
+ default:
+ return "NOT_SUPPORTED_INPUT_LAYER_TYPE";
+ }
+}
+
+const char* EltwiseTestNamespace::EltwiseOpType_to_string(EltwiseOpType eOp) {
+ switch (eOp) {
+ case EltwiseOpType::ADD:
+ return "Sum";
+ case EltwiseOpType::MULTIPLY:
+ return "Prod";
+ case EltwiseOpType::SUBSTRACT:
+ return "Sub";
+ default:
+ return "NOT_SUPPORTED_ELTWISE_OPERATION";
+ }
+}
+
+TEST_P(EltwiseLayerTest, basic) {
+ Run();
+}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/equal.hpp"
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <ie_core.hpp>
+
+
+namespace LayerTestsDefinitions {
+
+std::string EqualLayerTest::getTestCaseName(const testing::TestParamInfo<EqualTestParam>& obj) {
+ InferenceEngine::Precision inPrecision;
+ InferenceEngine::Precision outPrecision;
+ std::vector<InferenceEngine::SizeVector> inputShapes;
+ std::string targetDevice;
+
+ std::tie(inputShapes, inPrecision, outPrecision, targetDevice) = obj.param;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "inPrc=" << inPrecision.name() << "_";
+ result << "outPrc=" << outPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+
+ return result.str();
+}
+
+void EqualLayerTest::SetUp() {
+ std::vector<InferenceEngine::SizeVector> inputShapes;
+ InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::UNSPECIFIED;
+
+ std::tie(inputShapes, inputPrecision, outPrc, targetDevice) = this->GetParam();
+
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
+ auto paramsVector = ngraph::builder::makeParams(ngPrc, {inputShapes});
+ IE_ASSERT(paramsVector.size() == 2);
+
+ auto equalOp = std::make_shared<ngraph::opset3::Equal>(paramsVector[0], paramsVector[1]);
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(equalOp)};
+
+ function = std::make_shared<ngraph::Function>(results, paramsVector, "Equal");
+}
+
+TEST_P(EqualLayerTest, CompareWithRefs) {
+ Run();
+
+ if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) {
+ PluginCache::get().reset();
+ }
+}
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/fake_quantize.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
+ fqSpecificParams fqParams;
+ InferenceEngine::Precision netPrecision;
+ InferenceEngine::SizeVector inputShapes;
+ std::string targetDevice;
+ std::tie(fqParams, netPrecision, inputShapes, targetDevice) = obj.param;
+ size_t levels;
+ std::vector<size_t> constShape;
+ std::tie(levels, constShape) = fqParams;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "CS=" << CommonTestUtils::vec2str(constShape) << "_";
+ result << "LEVELS=" << levels << "_";
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+ return result.str();
+}
+
+void FakeQuantizeLayerTest::SetUp() {
+ fqSpecificParams fqParams;
+ std::vector<size_t> inputShape;
+ auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+ std::tie(fqParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ size_t levels;
+ std::vector<size_t> constShape;
+ std::tie(levels, constShape) = fqParams;
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+ auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+ auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+ auto fq = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape));
+
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(fq)};
+ function = std::make_shared<ngraph::Function>(results, params, "fakeQuantize");
+}
+
+TEST_P(FakeQuantizeLayerTest, CompareWithRefs) {
+ Run();
+}
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/greater.hpp"
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <ie_core.hpp>
+
+
+namespace LayerTestsDefinitions {
+
+std::string GreaterLayerTest::getTestCaseName(const testing::TestParamInfo<GreaterTestParam>& obj) {
+ InferenceEngine::Precision inPrecision;
+ InferenceEngine::Precision outPrecision;
+ std::vector<InferenceEngine::SizeVector> inputShapes;
+ std::string targetDevice;
+
+ std::tie(inputShapes, inPrecision, outPrecision, targetDevice) = obj.param;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "inPrc=" << inPrecision.name() << "_";
+ result << "outPrc=" << outPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+
+ return result.str();
+}
+
+void GreaterLayerTest::SetUp() {
+ std::vector<InferenceEngine::SizeVector> inputShapes;
+ InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::UNSPECIFIED;
+
+ std::tie(inputShapes, inputPrecision, outPrc, targetDevice) = this->GetParam();
+
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
+ auto paramsVector = ngraph::builder::makeParams(ngPrc, {inputShapes});
+ IE_ASSERT(paramsVector.size() == 2);
+
+ auto equalOp = std::make_shared<ngraph::opset3::Greater>(paramsVector[0], paramsVector[1]);
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(equalOp)};
+
+ function = std::make_shared<ngraph::Function>(results, paramsVector, "Greater");
+}
+
+TEST_P(GreaterLayerTest, CompareWithRefs) {
+ Run();
+
+ if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) {
+ PluginCache::get().reset();
+ }
+}
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/group_convolution_backprop_data.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string GroupConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<groupConvBackpropDataLayerTestParamsSet> obj) {
+ groupConvBackpropDataSpecificParams groupConvBackpropDataParams;
+ InferenceEngine::Precision netPrecision;
+ InferenceEngine::SizeVector inputShapes;
+ std::string targetDevice;
+ std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param;
+ ngraph::op::PadType padType;
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ std::vector<ptrdiff_t> padBegin, padEnd;
+ size_t convOutChannels, numGroups;
+ std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvBackpropDataParams;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+ result << "S" << CommonTestUtils::vec2str(stride) << "_";
+ result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+ result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+ result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+ result << "O=" << convOutChannels << "_";
+ result << "G=" << numGroups << "_";
+ result << "AP=" << padType << "_";
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+ return result.str();
+}
+
+void GroupConvBackpropDataLayerTest::SetUp() {
+ groupConvBackpropDataSpecificParams groupConvBackpropDataParams;
+ std::vector<size_t> inputShape;
+ auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+ std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+ ngraph::op::PadType padType;
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ std::vector<ptrdiff_t> padBegin, padEnd;
+ size_t convOutChannels, numGroups;
+ std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvBackpropDataParams;
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+ auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+ auto paramOuts = ngraph::helpers::convert2OutputVector(
+ ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+ auto groupConvBackpropData = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(
+ ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin,
+ padEnd, dilation, padType, convOutChannels, numGroups));
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(groupConvBackpropData)};
+ function = std::make_shared<ngraph::Function>(results, params, "GroupConvolutionBackpropData");
+}
+
+TEST_P(GroupConvBackpropDataLayerTest, CompareWithRefs) {
+ Run();
+}
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "subgraph_tests/quantized_convolution_backprop_data.hpp"
+
+using ngraph::helpers::QuantizationGranularity;
+
+namespace LayerTestsDefinitions {
+
+std::string QuantConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<quantConvBackpropDataLayerTestParamsSet> obj) {
+ quantConvBackpropDataSpecificParams groupConvBackpropDataParams;
+ InferenceEngine::Precision netPrecision;
+ InferenceEngine::SizeVector inputShapes;
+ std::string targetDevice;
+ std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param;
+ ngraph::op::PadType padType;
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ std::vector<ptrdiff_t> padBegin, padEnd;
+ size_t convOutChannels;
+ size_t quantLevels;
+ QuantizationGranularity quantGranularity;
+ std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+ result << "S" << CommonTestUtils::vec2str(stride) << "_";
+ result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+ result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+ result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+ result << "O=" << convOutChannels << "_";
+ result << "AP=" << padType << "_";
+ result << "Levels=" << quantLevels << "_";
+ result << "QG=" << quantGranularity << "_";
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+ return result.str();
+}
+
+void QuantConvBackpropDataLayerTest::SetUp() {
+ quantConvBackpropDataSpecificParams groupConvBackpropDataParams;
+ std::vector<size_t> inputShape;
+ auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+ std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+ ngraph::op::PadType padType;
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ std::vector<ptrdiff_t> padBegin, padEnd;
+ size_t convOutChannels;
+ size_t quantLevels;
+ QuantizationGranularity quantGranularity;
+ std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams;
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+ auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+ auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+ std::vector<size_t> dataFqConstShapes(inputShape.size(), 1);
+ if (quantGranularity == ngraph::helpers::Perchannel)
+ dataFqConstShapes[1] = inputShape[1];
+ auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes);
+
+ std::vector<size_t> weightsShapes = {inputShape[1], convOutChannels};
+ weightsShapes.insert(weightsShapes.end(), kernel.begin(), kernel.end());
+
+ std::vector<float> weightsData;
+ auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty());
+
+ std::vector<size_t> weightsFqConstShapes(weightsShapes.size(), 1);
+ if (quantGranularity == ngraph::helpers::Perchannel)
+ weightsFqConstShapes[0] = weightsShapes[0];
+
+ auto weightsFq = ngraph::builder::makeFakeQuantize(weightsNode, ngPrc, quantLevels, weightsFqConstShapes);
+
+ auto convBackpropData = std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(
+ ngraph::builder::makeConvolutionBackpropData(dataFq, weightsFq, ngPrc, stride, padBegin, padEnd, dilation, padType));
+
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(convBackpropData)};
+ function = std::make_shared<ngraph::Function>(results, params, "QuantConvolutionBackpropData");
+}
+
+TEST_P(QuantConvBackpropDataLayerTest, CompareWithRefs) {
+ Run();
+}
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "subgraph_tests/quantized_group_convolution_backprop_data.hpp"
+
+using ngraph::helpers::QuantizationGranularity;
+
+namespace LayerTestsDefinitions {
+
+std::string QuantGroupConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<quantGroupConvBackpropDataLayerTestParamsSet> obj) {
+ quantGroupConvBackpropDataSpecificParams groupConvBackpropDataParams;
+ InferenceEngine::Precision netPrecision;
+ InferenceEngine::SizeVector inputShapes;
+ std::string targetDevice;
+ std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param;
+ ngraph::op::PadType padType;
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ std::vector<ptrdiff_t> padBegin, padEnd;
+ size_t convOutChannels, numGroups;
+ size_t quantLevels;
+ QuantizationGranularity quantGranularity;
+ std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+ result << "S" << CommonTestUtils::vec2str(stride) << "_";
+ result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+ result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+ result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+ result << "O=" << convOutChannels << "_";
+ result << "G=" << numGroups << "_";
+ result << "AP=" << padType << "_";
+ result << "Levels=" << quantLevels << "_";
+ result << "QG=" << quantGranularity << "_";
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+ return result.str();
+}
+
+void QuantGroupConvBackpropDataLayerTest::SetUp() {
+ quantGroupConvBackpropDataSpecificParams groupConvBackpropDataParams;
+ std::vector<size_t> inputShape;
+ auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+ std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam();
+ ngraph::op::PadType padType;
+ InferenceEngine::SizeVector kernel, stride, dilation;
+ std::vector<ptrdiff_t> padBegin, padEnd;
+ size_t convOutChannels, numGroups;
+ size_t quantLevels;
+ QuantizationGranularity quantGranularity;
+ std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams;
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+ auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+ auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+ std::vector<size_t> dataFqConstShapes(inputShape.size(), 1);
+ if (quantGranularity == ngraph::helpers::Perchannel)
+ dataFqConstShapes[1] = inputShape[1];
+ auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes);
+
+ std::vector<size_t> weightsShapes = {inputShape[1], convOutChannels};
+ if (weightsShapes[0] % numGroups || weightsShapes[1] % numGroups)
+ throw std::runtime_error("incorrect shape for QuantGroupConvolutionBackpropData");
+ weightsShapes[0] /= numGroups;
+ weightsShapes[1] /= numGroups;
+ weightsShapes.insert(weightsShapes.begin(), numGroups);
+ weightsShapes.insert(weightsShapes.end(), kernel.begin(), kernel.end());
+
+ std::vector<float> weightsData;
+ auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty());
+
+ std::vector<size_t> weightsFqConstShapes(weightsShapes.size(), 1);
+ if (quantGranularity == ngraph::helpers::Perchannel)
+ weightsFqConstShapes[0] = weightsShapes[0];
+
+ auto weightsFq = ngraph::builder::makeFakeQuantize(weightsNode, ngPrc, quantLevels, weightsFqConstShapes);
+
+ auto groupConvBackpropData = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(
+ ngraph::builder::makeGroupConvolutionBackpropData(dataFq, weightsFq, ngPrc, stride, padBegin, padEnd, dilation, padType));
+
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(groupConvBackpropData)};
+ function = std::make_shared<ngraph::Function>(results, params, "QuantGroupConvolutionBackpropData");
+}
+
+TEST_P(QuantGroupConvBackpropDataLayerTest, CompareWithRefs) {
+ Run();
+}
+} // namespace LayerTestsDefinitions
inline std::string vec2str(const std::vector<vecElementType> &vec) {
std::ostringstream result;
result << "(";
- std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator<size_t>(result, "."));
+ std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator<vecElementType>(result, "."));
result << vec.back() << ")";
return result.str();
}
convWeigthsSize + convBiasesSize + fcWeigthsSize + fcBiasesSize));
}
+
+TestModel getModelWithMemory(InferenceEngine::Precision netPrc) {
+ CommonTestUtils::IRBuilder_v6 test_model_builder("model");
+
+ auto Memory_1_layer =
+ test_model_builder.AddLayer("Memory_1", "Memory", netPrc, {{"id", "r_1-3"},
+ {"index", "1"},
+ {"size", "2"}})
+ .AddOutPort({1, 200})
+ .getLayer();
+ auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", netPrc).AddOutPort({1, 200}).getLayer();
+ auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", netPrc, {{"operation", "mul"}})
+ .AddInPort({1, 200})
+ .AddInPort({1, 200})
+ .AddOutPort({1, 200})
+ .getLayer();
+
+ auto Activation_4_layer =
+ test_model_builder.AddLayer("Activation_4", "Activation", netPrc, {{"type", "sigmoid"}})
+ .AddInPort({1, 200})
+ .AddOutPort({1, 200})
+ .getLayer();
+ auto Memory_5_layer =
+ test_model_builder.AddLayer("Memory_5", "Memory", netPrc, {{"id", "r_1-3"},
+ {"index", "0"},
+ {"size", "2"}})
+ .AddInPort({1, 200})
+ .getLayer();
+
+ test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0));
+ test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1));
+ test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0));
+ test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0));
+
+ auto serial = test_model_builder.serialize();
+
+ return TestModel(serial, CommonTestUtils::getWeightsBlob(0));
+}
} // namespace TestModel
} // namespace FuncTestUtils
\ No newline at end of file
const TestModel convReluNormPoolFcModelFP16 = getConvReluNormPoolFcModel(InferenceEngine::Precision::FP16);
const TestModel convReluNormPoolFcModelQ78 = getConvReluNormPoolFcModel(InferenceEngine::Precision::Q78);
+TestModel getModelWithMemory(InferenceEngine::Precision netPrc);
+
const char incorrect_input_name[] = "incorrect_input_name";
} // namespace TestModel
const std::vector<float> &filterWeights = {},
const std::vector<float> &biasesWeights = {});
+std::shared_ptr<ngraph::Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in,
+ const element::Type &type,
+ const std::vector<size_t> &filterSize,
+ const std::vector<size_t> &strides,
+ const std::vector<ptrdiff_t> &padsBegin,
+ const std::vector<ptrdiff_t> &padsEnd,
+ const std::vector<size_t> &dilations,
+ const op::PadType &autoPad,
+ size_t numOutChannels,
+ bool addBiases = false,
+ const std::vector<float> &filterWeights = {},
+ const std::vector<float> &biasesWeights = {});
+
+std::shared_ptr<ngraph::Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in,
+ const ngraph::Output<Node> &weights,
+ const element::Type &type,
+ const std::vector<size_t> &strides,
+ const std::vector<ptrdiff_t> &padsBegin,
+ const std::vector<ptrdiff_t> &padsEnd,
+ const std::vector<size_t> &dilations,
+ const op::PadType &autoPad,
+ bool addBiases = false,
+ const std::vector<float> &biasesWeights = {});
+
+std::shared_ptr<ngraph::Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node> &in,
+ const element::Type &type,
+ const std::vector<size_t> &filterSize,
+ const std::vector<size_t> &strides,
+ const std::vector<ptrdiff_t> &padsBegin,
+ const std::vector<ptrdiff_t> &padsEnd,
+ const std::vector<size_t> &dilations,
+ const op::PadType &autoPad,
+ size_t numOutChannels,
+ size_t numGroups,
+ bool addBiases = false,
+ const std::vector<float> &filterWeights = {},
+ const std::vector<float> &biasesWeights = {});
+
+std::shared_ptr<ngraph::Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node> &in,
+ const ngraph::Output<Node> &weights,
+ const element::Type &type,
+ const std::vector<size_t> &strides,
+ const std::vector<ptrdiff_t> &padsBegin,
+ const std::vector<ptrdiff_t> &padsEnd,
+ const std::vector<size_t> &dilations,
+ const op::PadType &autoPad,
+ bool addBiases = false,
+ const std::vector<float> &biasesWeights = {});
+
std::shared_ptr<ngraph::Node> makeSplit(const ngraph::Output<Node> &in,
const element::Type &type,
size_t numSplits,
std::shared_ptr<ngraph::Node> makeSelect(std::vector<ngraph::Output<Node>> &in,
const ngraph::op::AutoBroadcastSpec& auto_broadcast);
+std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
+ const element::Type &type,
+ std::size_t levels,
+ std::vector<size_t> constShapes,
+ const std::vector<float> &inputLowData,
+ const std::vector<float> &inputHighData,
+ const std::vector<float> &outputLowData,
+ const std::vector<float> &outputHighData);
+
+std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
+ const element::Type &type,
+ std::size_t levels,
+ std::vector<size_t> constShapes);
+
+std::shared_ptr<ngraph::Node> makeCumSum(const ngraph::Output<Node> &in,
+ const ngraph::Output<Node> &axis,
+ bool exclusive,
+ bool reverse);
+
} // namespace builder
} // namespace ngraph
Gelu
};
+enum QuantizationGranularity {
+ Pertensor,
+ Perchannel
+};
+
+inline std::string quantizationGranularityToString(const QuantizationGranularity& granularity) {
+ static std::map<QuantizationGranularity, std::string> names = {
+ {Pertensor, "Pertensor"},
+ {Perchannel, "Perchannel"},
+ };
+
+ auto i = names.find(granularity);
+ if (i != names.end())
+ return i->second;
+ else
+ throw std::runtime_error("Unsupported QuantizationGranularity type");
+}
+
+inline std::ostream& operator<<(std::ostream& out, const QuantizationGranularity& granularity) {
+ return out << quantizationGranularityToString(granularity);
+}
+
ngraph::OutputVector convert2OutputVector(const std::vector<std::shared_ptr<ngraph::Node>> &nodes);
template<class opType>
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//
+
+#include <vector>
+#include <memory>
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in,
+ const element::Type &type,
+ const std::vector<size_t> &filterSize,
+ const std::vector<size_t> &strides,
+ const std::vector<ptrdiff_t> &padsBegin,
+ const std::vector<ptrdiff_t> &padsEnd,
+ const std::vector<size_t> &dilations,
+ const op::PadType &autoPad,
+ size_t numOutChannels,
+ bool addBiases,
+ const std::vector<float> &filterWeights,
+ const std::vector<float> &biasesWeights) {
+ bool randomFilterWeights = filterWeights.empty();
+ auto shape = in.get_shape();
+ std::vector<size_t> filterWeightsShape = {shape[1], numOutChannels};
+ filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end());
+ auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights);
+
+ return makeConvolutionBackpropData(in, filterWeightsNode, type, strides, padsBegin, padsEnd, dilations, autoPad, addBiases, biasesWeights);
+}
+
+std::shared_ptr<Node> makeConvolutionBackpropData(const ngraph::Output<Node> &in,
+ const ngraph::Output<Node> &weights,
+ const element::Type &type,
+ const std::vector<size_t> &strides,
+ const std::vector<ptrdiff_t> &padsBegin,
+ const std::vector<ptrdiff_t> &padsEnd,
+ const std::vector<size_t> &dilations,
+ const op::PadType &autoPad,
+ bool addBiases,
+ const std::vector<float> &biasesWeights) {
+ auto deconv = std::make_shared<opset1::ConvolutionBackpropData>(in, weights, strides, padsBegin, padsEnd, dilations, autoPad);
+
+ if (addBiases) {
+ bool randomBiases = biasesWeights.empty();
+ auto biasesWeightsNode = makeConstant(type, {}, biasesWeights, randomBiases);
+ auto add = std::make_shared<ngraph::opset1::Add>(deconv, biasesWeightsNode);
+ return add;
+ } else {
+ return deconv;
+ }
+}
+
+} // namespace builder
+} // namespace ngraph
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<ngraph::Node> makeCumSum(const ngraph::Output<Node> &in,
+ const ngraph::Output<Node> &axis,
+ bool exclusive,
+ bool reverse) {
+ return std::make_shared<ngraph::op::CumSum>(in, axis, exclusive, reverse);
+}
+
+} // namespace builder
+} // namespace ngraph
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//
+
+#include <vector>
+#include <memory>
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
+ const element::Type &type,
+ std::size_t levels,
+ std::vector<size_t> constShapes,
+ const std::vector<float> &inputLowData,
+ const std::vector<float> &inputHighData,
+ const std::vector<float> &outputLowData,
+ const std::vector<float> &outputHighData) {
+ auto inputLowNode = makeConstant(type, constShapes, inputLowData, inputLowData.empty());
+ auto inputHighNode = makeConstant(type, constShapes, inputHighData, inputHighData.empty());
+ auto outputLowNode = makeConstant(type, constShapes, outputLowData, outputLowData.empty());
+ auto outputHighNode = makeConstant(type, constShapes, outputHighData, outputHighData.empty());
+
+ auto fq = std::make_shared<opset1::FakeQuantize>(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels);
+
+ return fq;
+}
+
+std::shared_ptr<ngraph::Node> makeFakeQuantize(const ngraph::Output<ngraph::Node> &in,
+ const ngraph::element::Type &type,
+ std::size_t levels,
+ std::vector<size_t> constShapes) {
+ size_t constDataSize = ngraph::shape_size(constShapes);
+ std::vector<float> inputLowData, inputHighData, outputLowData, outputHighData;
+ inputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+ if (levels != 2) {
+ inputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+ outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+ outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+ } else {
+ inputHighData = inputLowData;
+ outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+ outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
+
+ for (int i = 0; i < constDataSize; i++) {
+ if (outputLowData[i] > outputHighData[i]) {
+ outputLowData[i] = 1;
+ outputHighData[i] = 0;
+ } else {
+ outputLowData[i] = 0;
+ outputHighData[i] = 1;
+ }
+ }
+ }
+
+ for (int i = 0; i < constDataSize; i++) {
+ inputLowData[i] = std::min(inputLowData[i], inputHighData[i]);
+ inputHighData[i] = std::max(inputLowData[i], inputHighData[i]);
+ if (inputLowData[i] == inputHighData[i])
+ inputHighData[i] += 1;
+ }
+
+ for (int i = 0; i < constDataSize; i++) {
+ outputLowData[i] = std::min(outputLowData[i], outputHighData[i]);
+ outputHighData[i] = std::max(outputLowData[i], outputHighData[i]);
+ if (outputLowData[i] == outputHighData[i])
+ outputHighData[i] += 1;
+ }
+
+ auto inputLowNode = ngraph::builder::makeConstant(type, constShapes, inputLowData, inputLowData.empty());
+ auto inputHighNode = ngraph::builder::makeConstant(type, constShapes, inputHighData, inputHighData.empty());
+ auto outputLowNode = ngraph::builder::makeConstant(type, constShapes, outputLowData, outputLowData.empty());
+ auto outputHighNode = ngraph::builder::makeConstant(type, constShapes, outputHighData, outputHighData.empty());
+
+ auto fq = std::make_shared<ngraph::opset1::FakeQuantize>(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels);
+
+ return fq;
+}
+
+} // namespace builder
+} // namespace ngraph
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//
+
+#include <vector>
+#include <memory>
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node> &in,
+ const element::Type &type,
+ const std::vector<size_t> &filterSize,
+ const std::vector<size_t> &strides,
+ const std::vector<ptrdiff_t> &padsBegin,
+ const std::vector<ptrdiff_t> &padsEnd,
+ const std::vector<size_t> &dilations,
+ const op::PadType &autoPad,
+ size_t numOutChannels,
+ size_t numGroups,
+ bool addBiases,
+ const std::vector<float> &filterWeights,
+ const std::vector<float> &biasesWeights) {
+ bool randomFilterWeights = filterWeights.empty();
+ auto shape = in.get_shape();
+ std::vector<size_t> filterWeightsShape = {shape[1], numOutChannels};
+ if (filterWeightsShape[0] % numGroups || filterWeightsShape[1] % numGroups)
+ throw std::runtime_error("incorrected shape for GroupConvolutionBackpropData");
+ filterWeightsShape[0] /= numGroups;
+ filterWeightsShape[1] /= numGroups;
+ filterWeightsShape.insert(filterWeightsShape.begin(), numGroups);
+ filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end());
+ auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights);
+
+ return makeGroupConvolutionBackpropData(in, filterWeightsNode, type, strides, padsBegin, padsEnd, dilations, autoPad, addBiases, biasesWeights);
+}
+
+std::shared_ptr<Node> makeGroupConvolutionBackpropData(const ngraph::Output<Node> &in,
+ const ngraph::Output<Node> &weights,
+ const element::Type &type,
+ const std::vector<size_t> &strides,
+ const std::vector<ptrdiff_t> &padsBegin,
+ const std::vector<ptrdiff_t> &padsEnd,
+ const std::vector<size_t> &dilations,
+ const op::PadType &autoPad,
+ bool addBiases,
+ const std::vector<float> &biasesWeights) {
+ auto deconv = std::make_shared<opset1::GroupConvolutionBackpropData>(in, weights, strides, padsBegin, padsEnd, dilations, autoPad);
+ if (addBiases) {
+ bool randomBiases = biasesWeights.empty();
+ auto biasesWeightsNode = makeConstant(type, {}, biasesWeights, randomBiases);
+ auto add = std::make_shared<ngraph::opset1::Add>(deconv, biasesWeightsNode);
+ return add;
+ } else {
+ return deconv;
+ }
+}
+
+} // namespace builder
+} // namespace ngraph
\ No newline at end of file
element::Type(toPrecision).get_type_name());
}
}
+ case element::Type_t::boolean: {
+ switch (toPrecision) {
+ case element::Type_t::u8: {
+ return convertPrecision<bool, uint8_t>(output, elementsCount, element::Type(toPrecision).size());
+ }
+ case element::Type_t::u16: {
+ return convertPrecision<bool, uint16_t>(output, elementsCount, element::Type(toPrecision).size());
+ }
+ case element::Type_t::i8: {
+ return convertPrecision<bool, int8_t>(output, elementsCount, element::Type(toPrecision).size());
+ }
+ case element::Type_t::i16: {
+ return convertPrecision<bool, int16_t>(output, elementsCount, element::Type(toPrecision).size());
+ }
+ case element::Type_t::i32: {
+ return convertPrecision<bool, int32_t>(output, elementsCount, element::Type(toPrecision).size());
+ }
+ case element::Type_t::i64: {
+ return convertPrecision<bool, int64_t>(output, elementsCount, element::Type(toPrecision).size());
+ }
+ case element::Type_t::f32: {
+ return convertPrecision<bool, float>(output, elementsCount, element::Type(toPrecision).size());
+ }
+ case element::Type_t::u64: {
+ return convertPrecision<bool, uint64_t>(output, elementsCount, element::Type(toPrecision).size());
+ }
+ default:
+ throw std::runtime_error("convertOutputPrecision can't convert from: " + element::Type(fromPrecision).get_type_name() + " to: " +
+ element::Type(toPrecision).get_type_name());
+ }
+ }
default:
throw std::runtime_error("convertOutputPrecision can't convert from: " + element::Type(fromPrecision).get_type_name() + " precision");
}
#include <ie_blob.h>
#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
-using namespace InferenceEngine;
+#include "unit_test_utils/mocks/mock_allocator.hpp"
-using BlobTests = ::testing::Test;
+#ifdef WIN32
+#define UNUSED
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+class BlobTests: public ::testing::Test {
+protected:
+ virtual void TearDown() {}
+
+ virtual void SetUp() {}
+
+ std::shared_ptr<MockAllocator> createMockAllocator() {
+ return std::shared_ptr<MockAllocator>(new MockAllocator());
+ }
+};
// Testing TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0)
-TEST(BlobTests, TBlobThrowsIfPtrForPreAllocatorIsNullPtr) {
- ASSERT_THROW(TBlob<float>({ Precision::FP32, {1}, C }, nullptr),
- InferenceEngine::details::InferenceEngineException);
+TEST_F(BlobTests, TBlobThrowsIfPtrForPreAllocatorIsNullPtr) {
+ ASSERT_THROW(InferenceEngine::TBlob<float>({InferenceEngine::Precision::FP32, {1}, InferenceEngine::C}, nullptr),
+ InferenceEngine::details::InferenceEngineException);
+}
+
+// Testing TBlob(const TensorDesc& tensorDesc, const std::std::shared_ptr<IAllocator>& alloc)
+TEST_F(BlobTests, TBlobThrowsIfAllocatorIsNullPtr) {
+ ASSERT_THROW(InferenceEngine::TBlob<float>(
+ {InferenceEngine::Precision::FP32, {1}, InferenceEngine::C}, std::shared_ptr<InferenceEngine::IAllocator>()),
+ InferenceEngine::details::InferenceEngineException);
+}
+
+
+TEST_F(BlobTests, canCreateBlobUsingDefaultAllocator) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+ EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+ {
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+ blob.allocate();
+ }
+}
+
+TEST_F(BlobTests, secondAllocateWontMemLeak) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).Times(2).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+ EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(2).WillRepeatedly(testing::Return(true));
+
+ {
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+ blob.allocate();
+ blob.allocate();
+ }
+}
+
+
+TEST_F(BlobTests, doesNotUnlockIfLockFailed) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+ EXPECT_CALL(*allocator.get(), lock(reinterpret_cast<void*>(1), InferenceEngine::LOCK_FOR_WRITE)).Times(1);
+ EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+ blob.allocate();
+ {
+ float UNUSED *ptr = blob.data();
+ }
+}
+
+TEST_F(BlobTests, canAccessDataUsingAllocator) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ float data[] = {5.f, 6.f, 7.f};
+
+ EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+ EXPECT_CALL(*allocator.get(), lock(reinterpret_cast<void*>(1), InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data));
+ EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast<void*>(1))).Times(1);
+ EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+ blob.allocate();
+ {
+ float *ptr = blob.data();
+ ASSERT_EQ(ptr[2] , 7);
+ }
+}
+
+
+TEST_F(BlobTests, canLockReadOnlyDataForRead) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ float data[] = {5, 6, 7};
+
+ EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+ EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_READ)).WillRepeatedly(testing::Return(data));
+ EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+ EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast<void*>(1))).Times(1);
+
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+ blob.allocate();
+
+ const float *ptr = blob.readOnly();
+ ASSERT_EQ(ptr[2] , 7);
}
-// Testing TBlob(const TensorDesc& tensorDesc, const std::shared_ptr<IAllocator>& alloc)
-TEST(BlobTests, TBlobThrowsIfAllocatorIsNullPtr) {
- ASSERT_THROW(TBlob<float>({ Precision::FP32, {1}, C }, std::shared_ptr<IAllocator> ()),
- InferenceEngine::details::InferenceEngineException);
+TEST_F(BlobTests, canAccessDataUsingBufferBaseMethod) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ float data[] = {5, 6, 7};
+
+ EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+ EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data));
+ EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast<void*>(1))).Times(1);
+ EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+ blob.allocate();
+ auto buffer = blob.rwmap();
+ const float *ptr = buffer.as<const float *>();
+ ASSERT_EQ(ptr[2] , 7);
+}
+
+TEST_F(BlobTests, canMoveFromTBlobWithSameType) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ uint8_t data[] = {5, 6};
+
+ EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(uint8_t))).WillRepeatedly(testing::Return(reinterpret_cast<void*>(1)));
+ EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data));
+ EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast<void*>(1))).Times(1);
+ EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1);
+
+ InferenceEngine::TBlob<uint8_t > blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+ blob.allocate();
+
+ InferenceEngine::TBlob<uint8_t > newBlob(std::move(blob));
+
+ auto buffer = newBlob.rwmap();
+ uint8_t *ptr = buffer.as <uint8_t *>();
+ ASSERT_EQ(ptr[0] , data[0]);
+}
+
+TEST_F(BlobTests, saveDimsAndSizeAfterMove) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ InferenceEngine::TBlob<uint8_t > blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+
+ InferenceEngine::TBlob<uint8_t > newBlob(std::move(blob));
+
+ ASSERT_EQ(newBlob.size(), 1 * 2 * 3);
+ ASSERT_EQ(newBlob.getTensorDesc().getDims()[0], 1);
+ ASSERT_EQ(newBlob.getTensorDesc().getDims()[1], 2);
+ ASSERT_EQ(newBlob.getTensorDesc().getDims()[2], 3);
+}
+
+TEST_F(BlobTests, canCopyBlob) {
+ InferenceEngine::SizeVector v = {1, 3};
+ InferenceEngine::TBlob<uint8_t> blob({ InferenceEngine::Precision::U8, v, InferenceEngine::HW });
+ blob.allocate();
+ blob.data()[0] = 1;
+ blob.data()[1] = 2;
+ blob.data()[2] = 3;
+
+ InferenceEngine::TBlob<uint8_t> blob2(blob);
+
+ ASSERT_EQ(blob2.getTensorDesc().getDims().size(), blob.getTensorDesc().getDims().size());
+ ASSERT_EQ(blob2.getTensorDesc().getDims()[0], blob.getTensorDesc().getDims()[0]);
+ ASSERT_EQ(blob2.getTensorDesc().getDims()[1], blob.getTensorDesc().getDims()[1]);
+ ASSERT_EQ(blob2.size(), blob.size());
+ ASSERT_EQ(blob2.data()[0], blob.data()[0]);
+ ASSERT_EQ(blob2.data()[1], blob.data()[1]);
+ ASSERT_EQ(blob2.data()[2], blob.data()[2]);
+}
+
+TEST_F(BlobTests, canCompareToNullPtrWithoutDereferencing) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ auto allocator = createMockAllocator();
+
+ InferenceEngine::TBlob<uint8_t> blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW },
+ std::dynamic_pointer_cast<InferenceEngine::IAllocator>(allocator));
+
+ ASSERT_TRUE(blob.readOnly() == nullptr);
+ ASSERT_TRUE(blob.data() == nullptr);
+ ASSERT_TRUE(blob.rwmap() == nullptr);
+
+ ASSERT_TRUE(nullptr == blob.readOnly());
+ ASSERT_TRUE(nullptr == blob.data());
+ ASSERT_TRUE(nullptr == blob.rwmap());
+}
+
+TEST_F(BlobTests, canCreateBlob) {
+ InferenceEngine::SizeVector size = { 1, 1, 1 };
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW });
+ ASSERT_NE(blob.size(), 0);
+ ASSERT_EQ(blob.rwmap(), nullptr);
+}
+
+TEST_F(BlobTests, canAllocateBlob) {
+ InferenceEngine::SizeVector size = { 1, 1, 1 };
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW });
+ blob.allocate();
+ float* buffer = static_cast<float*>(blob.data());
+ ASSERT_NE(buffer, nullptr);
+}
+
+TEST_F(BlobTests, canDeallocateBlob) {
+ InferenceEngine::SizeVector size = { 1, 1, 1 };
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW });
+ blob.allocate();
+ blob.deallocate();
+ ASSERT_EQ(nullptr, blob.data().as<float*>());
+}
+
+TEST_F(BlobTests, canCreateBlobWithoutDims) {
+ InferenceEngine::TBlob<float> blob(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::NCHW));
+ ASSERT_EQ(blob.getTensorDesc().getDims().size(), 0);
+}
+
+TEST_F(BlobTests, canReadDataFromConstBlob) {
+ InferenceEngine::TBlob<float> blob({ InferenceEngine::Precision::FP32, { 1, 1, 1 }, InferenceEngine::CHW });
+ blob.allocate();
+ blob.data()[0] = 1.0f;
+ InferenceEngine::TBlob<float> const blob2 = blob;
+ const float* buf = blob2.readOnly();
+ ASSERT_NE(buf, nullptr);
+}
+
+TEST_F(BlobTests, canMakeSharedBlob) {
+ InferenceEngine::SizeVector size = { 1, 1, 1 };
+ InferenceEngine::TBlob<float>::Ptr blob1 = InferenceEngine::make_shared_blob<float>(
+ InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::NCHW));
+ InferenceEngine::TBlob<float>::Ptr blob2 = InferenceEngine::make_shared_blob<float>(
+ { InferenceEngine::Precision::FP32, size, InferenceEngine::CHW });
+ InferenceEngine::TBlob<float>::Ptr blob3
+ = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, { 0 }, InferenceEngine::C });
+ ASSERT_EQ(blob1->size(), 0);
+ ASSERT_EQ(blob2->size(), 1);
+ ASSERT_EQ(blob3->size(), 0);
+}
+
+TEST_F(BlobTests, cannotCreateBlobWithIncorrectPrecision) {
+ InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP16, {1, 3, 227, 227}, InferenceEngine::Layout::NCHW);
+ ASSERT_THROW(InferenceEngine::make_shared_blob<float>(desc), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(BlobTests, canUseBlobInMoveSemantics) {
+ InferenceEngine::TBlob<float> b(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::C));
+
+ b.getTensorDesc().setDims({3});
+ b.allocate();
+ b.data()[0] = 1.0f;
+ b.data()[1] = 2.0f;
+ b.data()[2] = 3.0f;
+
+ std::vector<float> dump;
+
+ for (const auto& e : b) {
+ dump.push_back(e);
+ }
+
+ ASSERT_EQ(dump.size(), 3);
+
+ ASSERT_EQ(dump[0], 1.0f);
+ ASSERT_EQ(dump[1], 2.0f);
+ ASSERT_EQ(dump[2], 3.0f);
+}
+
+TEST_F(BlobTests, DISABLED_canUseLockedMemoryAsRvalueReference) {
+ std::vector<float> dump;
+ std::vector<float> v({1.0f, 2.0f, 3.0f});
+ auto blob = InferenceEngine::make_shared_blob<float>(
+ InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::C), &v[0], v.size());
+ for (auto e : *blob) {
+ dump.push_back(e);
+ }
+
+ ASSERT_EQ(dump.size(), 3);
+
+ ASSERT_EQ(dump[0], 1.0f);
+ ASSERT_EQ(dump[1], 2.0f);
+ ASSERT_EQ(dump[2], 3.0f);
+}
+
+TEST_F(BlobTests, canCreateBlobOnExistedMemory) {
+ float input[] = {0.1f, 0.2f, 0.3f};
+ {
+ auto b = InferenceEngine::make_shared_blob<float>(
+ InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {1, 2}, InferenceEngine::HW), input);
+ auto i = b->begin();
+ ASSERT_NEAR(*i, 0.1, 0.00001);
+ i++;
+ ASSERT_NEAR(*i, 0.2, 0.00001);
+ i++;
+ ASSERT_EQ(i, b->end());
+
+ ASSERT_EQ(&*b->begin(), input);
+ }
+}
+
+
+TEST_F(BlobTests, canModifyDataInRangedFor) {
+ InferenceEngine::SizeVector v = {1, 2, 3};
+ InferenceEngine::TBlob<int> blob({ InferenceEngine::Precision::I32, v, InferenceEngine::CHW });
+ blob.allocate();
+
+ for (auto & data : blob) {
+ data = 5;
+ }
+
+ for (int i = 0; i < v.size(); i++) {
+ ASSERT_EQ(5, blob.data()[i]) << "Mismatch at" << i;
+ }
+}
+
+TEST_F(BlobTests, makeRoiBlobNchw) {
+ // we create main blob with NCHW layout. We will crop ROI from this blob.
+ InferenceEngine::SizeVector dims = {1, 3, 6, 5}; // RGB picture of size (WxH) = 5x6
+ InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob<uint8_t>(
+ InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NCHW));
+ blob->allocate();
+
+ // create ROI blob based on the already created blob
+ InferenceEngine::ROI roi = {0, 2, 1, 2, 4}; // cropped picture with: id = 0, (x,y) = (2,1), sizeX (W) = 2, sizeY (H) = 4
+ InferenceEngine::Blob::Ptr roiBlob = make_shared_blob(blob, roi);
+
+ // check that BlockingDesc is constructed properly for the ROI blob
+ InferenceEngine::SizeVector refDims = {1, 3, 4, 2};
+ InferenceEngine::SizeVector refOrder = {0, 1, 2, 3};
+ size_t refOffset = 7;
+ InferenceEngine::SizeVector refStrides = {90, 30, 5, 1};
+ ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims);
+ ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder);
+ ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset);
+ ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides);
+}
+
+TEST_F(BlobTests, makeRoiBlobNhwc) {
+ // we create main blob with NHWC layout. We will crop ROI from this blob.
+ InferenceEngine::SizeVector dims = {1, 3, 4, 8}; // RGB picture of size (WxH) = 8x4
+ InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob<uint8_t>(
+ InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NHWC));
+ blob->allocate();
+
+ // create ROI blob based on the already created blob
+ InferenceEngine::ROI roi = {0, 3, 2, 5, 2}; // cropped picture with: id = 0, (x,y) = (3,2), sizeX (W) = 5, sizeY (H) = 2
+ InferenceEngine::Blob::Ptr roiBlob = make_shared_blob(blob, roi);
+
+ // check that BlockingDesc is constructed properly for the ROI blob
+ InferenceEngine::SizeVector refDims = {1, 2, 5, 3};
+ InferenceEngine::SizeVector refOrder = {0, 2, 3, 1};
+ size_t refOffset = 57;
+ InferenceEngine::SizeVector refStrides = {96, 24, 3, 1};
+ ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims);
+ ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder);
+ ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset);
+ ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides);
+}
+
+TEST_F(BlobTests, makeRoiBlobWrongSize) {
+ // we create main blob with NCHW layout. We will crop ROI from this blob.
+ InferenceEngine::SizeVector dims = {1, 3, 4, 4}; // RGB picture of size (WxH) = 4x4
+ InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob<uint8_t>(
+ InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NCHW));
+ blob->allocate();
+
+ // try to create ROI blob with wrong size
+ InferenceEngine::ROI roi = {0, 1, 1, 4, 4}; // cropped picture with: id = 0, (x,y) = (1,1), sizeX (W) = 4, sizeY (H) = 4
+ ASSERT_THROW(make_shared_blob(blob, roi), InferenceEngine::details::InferenceEngineException);
}
// SPDX-License-Identifier: Apache-2.0
//
-#include <ie_blob.h>
#include <ie_compound_blob.h>
#include <gtest/gtest.h>
#include <random>
#include <chrono>
-#include <gmock/gmock-spec-builders.h>
-
-#include "unit_test_utils/mocks/mock_allocator.hpp"
-
-#ifdef WIN32
-#define UNUSED
-#else
-#define UNUSED __attribute__((unused))
-#endif
-
using namespace ::testing;
using namespace std;
using namespace InferenceEngine;
-class BlobTests: public ::testing::Test {
-protected:
- virtual void TearDown() {
- }
-
- virtual void SetUp() {
- }
-
- shared_ptr<MockAllocator> createMockAllocator() {
- return shared_ptr<MockAllocator>(new MockAllocator());
- }
-
-public:
-
-};
-
class CompoundBlobTests : public ::testing::Test {
protected:
Blob::Ptr _test_blob;
class NV12BlobTests : public CompoundBlobTests {};
class I420BlobTests : public CompoundBlobTests {};
-struct ScopedTimer
-{
- chrono::high_resolution_clock::time_point t0;
- function<void(int)> cb;
-
- ScopedTimer(function<void(int)> callback)
- : t0(chrono::high_resolution_clock::now())
- , cb(callback)
- {
- }
- ~ScopedTimer(void)
- {
- auto t1 = chrono::high_resolution_clock::now();
- auto milli = chrono::duration_cast<chrono::microseconds>(t1-t0).count();
-
- cb((int)milli);
- }
-};
-
TEST(BlobConversionTests, canWorkWithMemoryBlob) {
Blob::Ptr blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, {1, 3, 4, 4}, NCHW));
ASSERT_TRUE(blob->is<MemoryBlob>());
ASSERT_EQ(stored_value, tblob->data()[0]);
}
-TEST_F(BlobTests, canCreateBlobUsingDefaultAllocator)
-{
- SizeVector v = {1,2,3};
- auto allocator = createMockAllocator();
-
- EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
- EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
- {
- TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
- blob.allocate();
- }
-}
-
-TEST_F(BlobTests, secondAllocateWontMemLeak) {
- SizeVector v = {1,2,3};
- auto allocator = createMockAllocator();
-
- EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).Times(2).WillRepeatedly(Return((void*)1));
- EXPECT_CALL(*allocator.get(), free(_)).Times(2).WillRepeatedly(Return(true));
-
- {
- TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
- blob.allocate();
- blob.allocate();
- }
-}
-
-
-TEST_F(BlobTests, doesNotUnlockIfLockFailed)
-{
- SizeVector v = {1,2,3};
- auto allocator = createMockAllocator();
-
- EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
- EXPECT_CALL(*allocator.get(), lock((void*)1,LOCK_FOR_WRITE)).Times(1);
- EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
- TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
- blob.allocate();
- {
- float UNUSED *ptr = blob.data();
- }
-}
-
-TEST_F(BlobTests, canAccessDataUsingAllocator)
-{
- SizeVector v = {1,2,3};
- auto allocator = createMockAllocator();
-
- float data[] = {5.f,6.f,7.f};
-
- EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
- EXPECT_CALL(*allocator.get(), lock((void*)1, LOCK_FOR_WRITE)).WillRepeatedly(Return(data));
- EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1);
- EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
- TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
- blob.allocate();
- {
- float *ptr = blob.data();
- ASSERT_EQ(ptr[2] , 7);
- }
-
-}
-
-
-TEST_F(BlobTests, canLockReadOnlyDataForRead)
-{
- SizeVector v = {1, 2, 3};
- auto allocator = createMockAllocator();
-
- float data[] = {5,6,7};
-
- EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
- EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_READ)).WillRepeatedly(Return(data));
- EXPECT_CALL(*allocator.get(), free(_)).Times(1);
- EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1);
-
- TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
- blob.allocate();
-
- const float *ptr = blob.readOnly();
- ASSERT_EQ(ptr[2] , 7);
-}
-
-TEST_F(BlobTests, canAccessDataUsingBufferBaseMethod)
-{
- SizeVector v = {1, 2, 3};
- auto allocator = createMockAllocator();
-
- float data[] = {5,6,7};
-
- EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1));
- EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_WRITE)).WillRepeatedly(Return(data));
- EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1);
- EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
- TBlob<float> blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
- blob.allocate();
- auto buffer = blob.rwmap();
- const float *ptr = buffer.as<const float *>();
- ASSERT_EQ(ptr[2] , 7);
-}
-
-TEST_F(BlobTests, canMoveFromTBlobWithSameType)
-{
- SizeVector v = {1, 2, 3};
- auto allocator = createMockAllocator();
-
- uint8_t data[] = {5,6};
-
- EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(uint8_t))).WillRepeatedly(Return((void*)1));
- EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_WRITE)).WillRepeatedly(Return(data));
- EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1);
- EXPECT_CALL(*allocator.get(), free(_)).Times(1);
-
- TBlob<uint8_t > blob({ Precision::U8, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
- blob.allocate();
-
- TBlob<uint8_t > newBlob(std::move(blob));
-
- auto buffer = newBlob.rwmap();
- uint8_t *ptr = buffer.as <uint8_t *>();
- ASSERT_EQ(ptr[0] , data[0]);
-}
-
-TEST_F(BlobTests, saveDimsAndSizeAfterMove)
-{
- SizeVector v = {1, 2, 3};
- auto allocator = createMockAllocator();
-
- TBlob<uint8_t > blob({ Precision::U8, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-
- TBlob<uint8_t > newBlob(std::move(blob));
-
- ASSERT_EQ(newBlob.size(), 1 * 2 * 3);
- ASSERT_EQ(newBlob.getTensorDesc().getDims()[0], 1);
- ASSERT_EQ(newBlob.getTensorDesc().getDims()[1], 2);
- ASSERT_EQ(newBlob.getTensorDesc().getDims()[2], 3);
-}
-
-TEST_F(BlobTests, canCopyBlob)
-{
- SizeVector v = {1, 3};
- TBlob<uint8_t> blob({ Precision::U8, v, HW });
- blob.allocate();
- blob.data()[0] = 1;
- blob.data()[1] = 2;
- blob.data()[2] = 3;
-
- TBlob<uint8_t> blob2(blob);
-
- ASSERT_EQ(blob2.getTensorDesc().getDims().size(), blob.getTensorDesc().getDims().size());
- ASSERT_EQ(blob2.getTensorDesc().getDims()[0], blob.getTensorDesc().getDims()[0]);
- ASSERT_EQ(blob2.getTensorDesc().getDims()[1], blob.getTensorDesc().getDims()[1]);
- ASSERT_EQ(blob2.size(), blob.size());
- ASSERT_EQ(blob2.data()[0], blob.data()[0]);
- ASSERT_EQ(blob2.data()[1], blob.data()[1]);
- ASSERT_EQ(blob2.data()[2], blob.data()[2]);
-}
-
-TEST_F(BlobTests, canCompareToNullPtrWithoutDereferencing) {
- SizeVector v = {1, 2, 3};
- auto allocator = createMockAllocator();
-
- TBlob<uint8_t> blob({ Precision::U8, v, CHW }, dynamic_pointer_cast<IAllocator>(allocator));
-
- ASSERT_TRUE(blob.readOnly() == nullptr);
- ASSERT_TRUE(blob.data() == nullptr);
- ASSERT_TRUE(blob.rwmap() == nullptr);
-
- ASSERT_TRUE(nullptr == blob.readOnly());
- ASSERT_TRUE(nullptr == blob.data());
- ASSERT_TRUE(nullptr == blob.rwmap());
-}
-
-TEST_F(BlobTests, canCreateBlob) {
- InferenceEngine::SizeVector size = { 1, 1, 1 };
- InferenceEngine::TBlob<float> blob({ Precision::FP32, size, CHW });
- ASSERT_NE(blob.size(), 0);
- ASSERT_EQ(blob.rwmap(), nullptr);
-}
-
-TEST_F(BlobTests, canAllocateBlob) {
- InferenceEngine::SizeVector size = { 1, 1, 1 };
- InferenceEngine::TBlob<float> blob({ Precision::FP32, size, CHW });
- blob.allocate();
- float* buffer = static_cast<float*>(blob.data());
- ASSERT_NE(buffer, nullptr);
-}
-
-TEST_F(BlobTests, canDeallocateBlob) {
- InferenceEngine::SizeVector size = { 1, 1, 1 };
- InferenceEngine::TBlob<float> blob({ Precision::FP32, size, CHW });
- blob.allocate();
- blob.deallocate();
- ASSERT_EQ(nullptr, blob.data().as<float*>());
-}
-
-TEST_F(BlobTests, canCreateBlobWithoutDims) {
- InferenceEngine::TBlob<float> blob(TensorDesc(Precision::FP32, NCHW));
- ASSERT_EQ(blob.getTensorDesc().getDims().size(), 0);
-}
-
-TEST_F(BlobTests, canReadDataFromConstBlob) {
- InferenceEngine::TBlob<float> blob({ Precision::FP32, { 1, 1, 1 }, CHW });
- blob.allocate();
- blob.data()[0] = 1.0f;
- InferenceEngine::TBlob<float> const blob2 = blob;
- const float* buf = blob2.readOnly();
- ASSERT_NE(buf, nullptr);
-}
-
-TEST_F(BlobTests, canMakeSharedBlob) {
- InferenceEngine::SizeVector size = { 1, 1, 1 };
- InferenceEngine::TBlob<float>::Ptr blob1 = InferenceEngine::make_shared_blob<float>(TensorDesc(Precision::FP32, NCHW));
- InferenceEngine::TBlob<float>::Ptr blob2 = InferenceEngine::make_shared_blob<float>({ Precision::FP32, size, CHW });
- InferenceEngine::TBlob<float>::Ptr blob3
- = InferenceEngine::make_shared_blob<float>({ Precision::FP32, { 0 }, C });
- ASSERT_EQ(blob1->size(), 0);
- ASSERT_EQ(blob2->size(), 1);
- ASSERT_EQ(blob3->size(), 0);
-}
-
-TEST_F(BlobTests, cannotCreateBlobWithIncorrectPrecision) {
- InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP16, {1, 3, 227, 227}, Layout::NCHW);
- ASSERT_THROW(InferenceEngine::make_shared_blob<float>(desc), InferenceEngine::details::InferenceEngineException);
-}
-
-TEST_F(BlobTests, canUseBlobInMoveSemantics) {
-
- TBlob<float> b(TensorDesc(Precision::FP32, C));
-
- b.getTensorDesc().setDims({3});
- b.allocate();
- b.data()[0] = 1.0f;
- b.data()[1] = 2.0f;
- b.data()[2] = 3.0f;
-
- std::vector<float> dump;
-
- for (const auto & e: b) {
- dump.push_back(e);
- }
-
- ASSERT_EQ(dump.size(), 3);
-
- ASSERT_EQ(dump[0], 1.0f);
- ASSERT_EQ(dump[1], 2.0f);
- ASSERT_EQ(dump[2], 3.0f);
-
-}
-
-TEST_F(BlobTests, DISABLED_canUseLockedMemoryAsRvalueReference) {
-
- std::vector<float> dump;
- std::vector<float> v({1.0f, 2.0f, 3.0f});
- for (auto e: *make_shared_blob<float>(TensorDesc(Precision::FP32, C), &v[0], v.size())) {
- dump.push_back(e);
- }
-
- ASSERT_EQ(dump.size(), 3);
-
- ASSERT_EQ(dump[0], 1.0f);
- ASSERT_EQ(dump[1], 2.0f);
- ASSERT_EQ(dump[2], 3.0f);
-}
-
-TEST_F(BlobTests, canCreateBlobOnExistedMemory) {
-
- float input[] = {0.1f, 0.2f, 0.3f};
- {
- auto b = make_shared_blob<float>(TensorDesc(Precision::FP32, {1, 2}, HW), input);
- auto i = b->begin();
- ASSERT_NEAR(*i, 0.1, 0.00001);
- i++;
- ASSERT_NEAR(*i, 0.2, 0.00001);
- i++;
- ASSERT_EQ(i, b->end());
-
- ASSERT_EQ(&*b->begin(), input);
- }
-}
-
-
-TEST_F(BlobTests, canModifyDataInRangedFor) {
-
- SizeVector v = {1,2,3};
- TBlob<int> blob({ Precision::I32, v, CHW });
- blob.allocate();
-
- for (auto & data : blob) {
- data = 5;
- }
-
- for(int i=0;i<v.size();i++) {
- ASSERT_EQ(5, blob.data()[i]) << "Mismatch at" << i;
- }
-}
-
-TEST_F(BlobTests, makeRoiBlobNchw) {
- // we create main blob with NCHW layout. We will crop ROI from this blob.
- SizeVector dims = {1, 3, 6, 5}; // RGB picture of size (WxH) = 5x6
- Blob::Ptr blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, dims, NCHW));
- blob->allocate();
-
- // create ROI blob based on the already created blob
- ROI roi = {0, 2, 1, 2, 4}; // cropped picture with: id = 0, (x,y) = (2,1), sizeX (W) = 2, sizeY (H) = 4
- Blob::Ptr roiBlob = make_shared_blob(blob, roi);
-
- // check that BlockingDesc is constructed properly for the ROI blob
- SizeVector refDims = {1, 3, 4, 2};
- SizeVector refOrder = {0, 1, 2, 3};
- size_t refOffset = 7;
- SizeVector refStrides = {90, 30, 5, 1};
- ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims);
- ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder);
- ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset);
- ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides);
-}
-
-TEST_F(BlobTests, makeRoiBlobNhwc) {
- // we create main blob with NHWC layout. We will crop ROI from this blob.
- SizeVector dims = {1, 3, 4, 8}; // RGB picture of size (WxH) = 8x4
- Blob::Ptr blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, dims, NHWC));
- blob->allocate();
-
- // create ROI blob based on the already created blob
- ROI roi = {0, 3, 2, 5, 2}; // cropped picture with: id = 0, (x,y) = (3,2), sizeX (W) = 5, sizeY (H) = 2
- Blob::Ptr roiBlob = make_shared_blob(blob, roi);
-
- // check that BlockingDesc is constructed properly for the ROI blob
- SizeVector refDims = {1, 2, 5, 3};
- SizeVector refOrder = {0, 2, 3, 1};
- size_t refOffset = 57;
- SizeVector refStrides = {96, 24, 3, 1};
- ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims);
- ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder);
- ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset);
- ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides);
-}
-
-TEST_F(BlobTests, makeRoiBlobWrongSize) {
- // we create main blob with NCHW layout. We will crop ROI from this blob.
- SizeVector dims = {1, 3, 4, 4}; // RGB picture of size (WxH) = 4x4
- Blob::Ptr blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, dims, NCHW));
- blob->allocate();
-
- // try to create ROI blob with wrong size
- ROI roi = {0, 1, 1, 4, 4}; // cropped picture with: id = 0, (x,y) = (1,1), sizeX (W) = 4, sizeY (H) = 4
- ASSERT_THROW(make_shared_blob(blob, roi), InferenceEngine::details::InferenceEngineException);
-}
-
TEST_F(CompoundBlobTests, cannotCreateCompoundBlobFromNullptr) {
Blob::Ptr valid = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, {1, 3, 4, 4}, NCHW));
EXPECT_THROW(make_shared_blob<CompoundBlob>(std::vector<Blob::Ptr>({valid, nullptr})),
auto c_v_blob = make_cblob(v_blob);
using ie_exception_t = InferenceEngine::details::InferenceEngineException;
- EXPECT_THROW(make_shared_blob<I420Blob>(c_y_blob, u_blob, v_blob ), ie_exception_t);
- EXPECT_THROW(make_shared_blob<I420Blob>(y_blob, c_u_blob, v_blob ), ie_exception_t);
+ EXPECT_THROW(make_shared_blob<I420Blob>(c_y_blob, u_blob, v_blob), ie_exception_t);
+ EXPECT_THROW(make_shared_blob<I420Blob>(y_blob, c_u_blob, v_blob), ie_exception_t);
EXPECT_THROW(make_shared_blob<I420Blob>(y_blob, u_blob, c_v_blob), ie_exception_t);
}
Blob::Ptr v_blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, {1, 1, 3, 4}, NHWC));
EXPECT_THROW(make_shared_blob<I420Blob>(y_blob, u_blob, v_blob), InferenceEngine::details::InferenceEngineException);
EXPECT_THROW(make_shared_blob<I420Blob>(y_blob, v_blob, u_blob), InferenceEngine::details::InferenceEngineException);
-
}
TEST_F(I420BlobTests, cannotCreateI420BlobFromPlanesWithWrongWidthRatio) {
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "behavior_test_plugin_set_preprocess.hpp"
-#include "cldnn_test_data.hpp"
-
-INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
- BehaviorPluginTestPreProcess,
- ValuesIn(supportedValues),
- getTestCaseName);
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "behavior_test_plugin_set_preprocess.hpp"
-#include "mkldnn_test_data.hpp"
-
-INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
- BehaviorPluginTestPreProcess,
- ValuesIn(requestsSupportedValues),
- getTestCaseName);
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "behavior_test_plugin.h"
-
-using namespace std;
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-namespace {
- std::string getTestCaseName(testing::TestParamInfo<BehTestParams> obj) {
- return obj.param.device + "_" + obj.param.input_blob_precision.name()
- + (obj.param.config.size() ? "_" + obj.param.config.begin()->second : "");
- }
-}
-
-TEST_P(BehaviorPluginTestPreProcess, SetPreProcessToInputInfo) {
- InferenceEngine::Core core;
-
- CNNNetwork cnnNetwork = core.ReadNetwork(GetParam().model_xml_str, GetParam().weights_blob);
-
- auto &preProcess = cnnNetwork.getInputsInfo().begin()->second->getPreProcess();
- preProcess.setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR);
-
- InferenceEngine::IExecutableNetwork::Ptr exeNetwork;
- ASSERT_NO_THROW(exeNetwork = core.LoadNetwork(cnnNetwork, GetParam().device, GetParam().config));
-
- IInferRequest::Ptr inferRequest;
- ASSERT_EQ(StatusCode::OK, exeNetwork->CreateInferRequest(inferRequest, &response));
-
- {
- ConstInputsDataMap inputsMap;
- ASSERT_EQ(StatusCode::OK, exeNetwork->GetInputsInfo(inputsMap, &response));
- const auto& name = inputsMap.begin()->second->name();
- const PreProcessInfo *info;
- inferRequest->GetPreProcess(name.c_str(), &info, &response);
-
- ASSERT_EQ(info->getResizeAlgorithm(), ResizeAlgorithm::RESIZE_BILINEAR);
- ASSERT_PREPROCESS_INFO_EQ(preProcess, *info);
- }
-}
-
-TEST_P(BehaviorPluginTestPreProcess, SetPreProcessToInferRequest) {
- TestEnv::Ptr testEnv;
- ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
- ResponseDesc response;
-
- auto& request = testEnv->inferRequest;
- PreProcessInfo preProcessInfo;
- preProcessInfo.setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR);
-
- IInferRequest::Ptr untouched_request = testEnv->exeNetwork.CreateInferRequest();
-
- ConstInputsDataMap inputs = testEnv->exeNetwork.GetInputsInfo();
- auto input_name = inputs.begin()->second->name();
- auto inputBlob = prepareInputBlob(GetParam().input_blob_precision, testEnv->inputDims);
-
- ASSERT_EQ(StatusCode::OK, request->SetBlob(input_name.c_str(), inputBlob, preProcessInfo, &response));
-
- {
- const PreProcessInfo *info = nullptr;
- ASSERT_EQ(StatusCode::OK, request->GetPreProcess(input_name.c_str(), &info, &response));
- ASSERT_EQ(info->getResizeAlgorithm(), ResizeAlgorithm::RESIZE_BILINEAR);
- ASSERT_PREPROCESS_INFO_EQ(preProcessInfo, *info);
- }
-
- {
- const PreProcessInfo *info = nullptr;
- ASSERT_EQ(StatusCode::OK, untouched_request->GetPreProcess(input_name.c_str(), &info, &response));
- ASSERT_EQ(testEnv->network.getInputsInfo()[input_name]->getPreProcess().getResizeAlgorithm(),info->getResizeAlgorithm());
- }
-}
class AOTBehaviorTests : public BehaviorPluginTest {
public:
+ WatchdogHndl_t* m_watchdogHndl = nullptr;
typedef std::chrono::high_resolution_clock Time;
typedef std::chrono::milliseconds ms;
void SetUp() override {
initialize_usb_boot();
+
+ ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl));
+ }
+
+ void TearDown() override {
+ watchdog_destroy(m_watchdogHndl);
}
void dumpBlob() {
deviceDesc.protocol = NC_ANY_PROTOCOL;
deviceDesc.platform = NC_ANY_PLATFORM;
- statusOpen = ncDeviceOpen(&device, deviceDesc, 1000, pathToFw);
+ ncDeviceOpenParams_t deviceOpenParams = {};
+ deviceOpenParams.watchdogHndl = m_watchdogHndl;
+ deviceOpenParams.watchdogInterval = 1000;
+ deviceOpenParams.customFirmwareDirectory = pathToFw;
+
+ statusOpen = ncDeviceOpen(&device, deviceDesc, deviceOpenParams);
if (statusOpen != NC_OK) {
- ncDeviceClose(&device);
+ ncDeviceClose(&device, m_watchdogHndl);
return false;
}
sizeof(ElfN_Ehdr) + sizeof(mv_blob_header));
ncGraphDestroy(&graphHandle);
- ncDeviceClose(&device);
+ ncDeviceClose(&device, m_watchdogHndl);
ASSERT_EQ(NC_OK, res);
}
sizeof(ElfN_Ehdr) + sizeof(mv_blob_header));
ncGraphDestroy(&graphHandle);
- ncDeviceClose(&device);
+ ncDeviceClose(&device, m_watchdogHndl);
ASSERT_NE(NC_OK, res);
}
class MYRIADWatchdog : public BehaviorPluginTest,
public MyriadDevicesInfo {
public:
+ WatchdogHndl_t* m_watchdogHndl = nullptr;
typedef std::chrono::high_resolution_clock Time;
typedef std::chrono::milliseconds ms;
void SetUp() override {
initialize_usb_boot();
+
+ ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl));
+ }
+
+ void TearDown() override {
+ watchdog_destroy(m_watchdogHndl);
}
struct DevicesState {
ncDeviceHandle_t *device = nullptr;
void resetOneDevice() {
- ncDeviceClose(&device);
+ ncDeviceClose(&device, m_watchdogHndl);
device = nullptr;
}
deviceDesc.protocol = NC_ANY_PROTOCOL;
deviceDesc.platform = NC_ANY_PLATFORM;
- statusOpen = ncDeviceOpen(&device, deviceDesc, watchdogInterval, pathToFw);
+ ncDeviceOpenParams_t deviceOpenParams = {};
+ deviceOpenParams.watchdogHndl = m_watchdogHndl;
+ deviceOpenParams.watchdogInterval = watchdogInterval;
+ deviceOpenParams.customFirmwareDirectory = pathToFw;
+
+ statusOpen = ncDeviceOpen(&device, deviceDesc, deviceOpenParams);
if (statusOpen != NC_OK) {
- ncDeviceClose(&device);
+ ncDeviceClose(&device, m_watchdogHndl);
}
}
};
ExecutableNetwork ret;
ctime = Time::now();
ASSERT_THROW(ret = core.LoadNetwork(network, GetParam().device, {
- {KEY_LOG_LEVEL, LOG_DEBUG}}),
+ {KEY_LOG_LEVEL, LOG_INFO}}),
InferenceEngine::details::InferenceEngineException);
ASSERT_BOOTED_DEVICES_ONE_MORE();
ExecutableNetwork ret;
ctime = Time::now();
ASSERT_THROW(ret = core.LoadNetwork(network, GetParam().device, {
- {KEY_LOG_LEVEL, LOG_DEBUG},
+ {KEY_LOG_LEVEL, LOG_INFO},
{KEY_VPU_MYRIAD_WATCHDOG, NO}}),
InferenceEngine::details::InferenceEngineException);
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "behavior_test_plugin_set_preprocess.hpp"
-#include "vpu_test_data.hpp"
-
-INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
- BehaviorPluginTestPreProcess,
- ValuesIn(supportedValues),
- getTestCaseName);
#include "network_i8.hpp"
-#define XBYAK_NO_OP_NAMES
-#define XBYAK_UNDEF_JNL
-#include "../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h"
-
/*************************************************
* !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!!
* All ref values was obtained from Caffe scoring
* !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!!
*************************************************/
-#ifndef ENABLE_MKL_DNN
-#include "disable_tests.hpp"
-#endif
TEST_P(ModelTransformationsTest, LPT) {}
#include "ie_util_internal.hpp"
#include "cnn_network_ngraph_impl.hpp"
-
-#define XBYAK_NO_OP_NAMES
-#define XBYAK_UNDEF_JNL
-#include "../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h"
+#include <ie_system_conf.h>
using namespace ::testing;
using namespace InferenceEngine;
if (transformationsParam.modelParams.referenceOutputDataWithTransformations.size() == 1) {
referenceValues = transformationsParam.modelParams.referenceOutputDataWithTransformations[0];
} else {
- referenceValues = Xbyak::util::Cpu().has(Xbyak::util::Cpu::tAVX512F) ?
+ referenceValues = InferenceEngine::with_cpu_x86_avx512f() ?
transformationsParam.modelParams.referenceOutputDataWithTransformations[1] :
transformationsParam.modelParams.referenceOutputDataWithTransformations[0];
}
if (transformationsParam.modelParams.referenceOutputDataWithoutTransformations.size() == 1) {
referenceValues = transformationsParam.modelParams.referenceOutputDataWithoutTransformations[0];
} else {
- referenceValues = Xbyak::util::Cpu().has(Xbyak::util::Cpu::tAVX512F) ?
+ referenceValues = InferenceEngine::with_cpu_x86_avx512f() ?
transformationsParam.modelParams.referenceOutputDataWithoutTransformations[1] :
transformationsParam.modelParams.referenceOutputDataWithoutTransformations[0];
}
};
class myriadLayersTestsStridedSlice_smoke: public myriadLayersTests_nightly,
- public testing::WithParamInterface<strided_slice_test_param> {
+ public testing::WithParamInterface<strided_slice_test_param> {
public:
std::string model_t = R"V0G0N(
<net Name="StridedSlice_net" version="2" precision="FP16" batch="1">
// Load network.
StatusCode st = GENERAL_ERROR;
ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(
- _exeNetwork, network, { {VPU_CONFIG_KEY(PERF_REPORT_MODE), VPU_CONFIG_VALUE(PER_STAGE)} },
+ _exeNetwork, network, { {VPU_CONFIG_KEY(DETECT_NETWORK_BATCH), CONFIG_VALUE(NO)} },
&_resp));
ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
ASSERT_NE(_exeNetwork, nullptr) << _resp.msg;
strided_slice_test_param{ { 2, 8, 32, 32}, 4, { 0, 0, 0, 2 }, { 2, 8, 32, 32 }, { 1, 1, 1, 3 }, {}, {}, {}, {}, {}, { 2, 8, 32, 10 } },
strided_slice_test_param{ { 1, 32, 128, 128 }, 4, {0, 0, 0, 0 }, { 1, 32, 128, 128 }, { 1, 2, 4, 8 }, {}, {}, {}, {}, {}, { 1, 16, 32, 16 } },
strided_slice_test_param{ { 1, 32, 128, 128 }, 4, {0, 16, 0, 0 }, { 1, 32, 128, 128 }, {}, {}, {}, {}, {}, {}, { 1, 16, 128, 128 } },
+
+ strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, 9999 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 1000 } },
+ strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, -1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 1000 } },
+ strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, -3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 998 } },
};
_begin_mask.insert(_begin_mask.end(), num_dims - _begin_mask.size(), 1);
_end_mask.insert(_end_mask.end(), num_dims - _end_mask.size(), 1);
- auto clip = [](int value, int min, int max) {
- return std::min(std::max(min, value), max);
+ const auto numpyIdxVectorToIdxVector = [&num_dims, &src_dims](const std::vector<int32_t>& values) {
+ std::vector<int32_t> convertedDims(num_dims);
+ for (size_t i = 0; i < num_dims; i++) {
+ auto value = values[i];
+ if (value < 0) {
+ value = std::max<int32_t>(src_dims[i] + value + 1, 0);
+ }
+ value = std::min<int32_t>(src_dims[i], value);
+ convertedDims[i] = value;
+ }
+
+ return convertedDims;
};
- auto begin_dms = begin;
- auto end_dms = end;
+ auto begin_dms = numpyIdxVectorToIdxVector(begin);
+ auto end_dms = numpyIdxVectorToIdxVector(end);
for (size_t i = 0; i < num_dims; i++) {
IE_ASSERT(_begin_mask[i] == 1 || _begin_mask[i] == 0);
IE_ASSERT(_end_mask[i] == 1 || _end_mask[i] == 0);
- begin_dms[i] = _begin_mask[i] ? begin[i] : 0;
- begin_dms[i] = clip(begin_dms[i], 0, src_dims[i]);
-
- end_dms[i] = _end_mask[i] ? end[i] : src_dims[i];
- end_dms[i] = clip(end_dms[i], 0, src_dims[i]);
+ begin_dms[i] = _begin_mask[i] ? begin_dms[i] : 0;
+ end_dms[i] = _end_mask[i] ? end_dms[i] : src_dims[i];
IE_ASSERT(begin_dms[i] >= 0 && begin_dms[i] < end_dms[i]);
IE_ASSERT(end_dms[i] <= src_dims[i]);
MKLDNN_TESTS_INCLUDE engines/mkldnn/graph/*.hpp)
include_directories(
${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include
- ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/common
- ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/cpu
engines/mkldnn/graph
${CMAKE_BINARY_DIR}/include/)
.called_with_input_and_expected_output(input_data, expected_result);
}
+TEST_F(FP32NonQuantizedTest, CropWithOffsetAndSecondDimPropagateForwardWithSuccessOnCPU) {
+ std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+ std::vector<float> expected_result = {7.0, 7.0, 7.0, 7.0, 7.0,
+ 7.0, 7.0, 7.0, 7.0, 7.0};
+
+ assert_that().onInferModel(cropWithOffsetAndSecondDimModel())
+ .inNotCompactMode().gna().propagate_forward().onCPU()
+ .called_with_input_and_expected_output(input_data, expected_result);
+}
+
TEST_F(FP32NonQuantizedTest, CropWithMaxOffsetPropagateForwardWithSuccessOnCPU) {
std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
.called_with_input(input_data).equals_to(expected_result1).equals_to(expected_result2);
}
-TEST_F(FP32NonQuantizedTest, TI1AlignedPropagateForward) {
+// DISABLED DUE TO (31901)
+TEST_F(FP32NonQuantizedTest, DISABLED_TI1AlignedPropagateForward) {
std::vector<float> input_data(32, 0.1f);
std::vector<float> expected_result1(32, 0.25883245);
std::vector<float> expected_result2(12, 0.59515548f);
.called_with_input(input_data).equals_to(expected_result1).And().equals_to(expected_result2);
}
-TEST_F(FP32NonQuantizedTest, TI3AlignedPropagateForward) {
+// DISABLED DUE TO (31901)
+TEST_F(FP32NonQuantizedTest, DISABLED_TI3AlignedPropagateForward) {
std::vector<float> input_data(96, 0.1f);
std::vector<float> expected_result1(32, 0.42592844f);
std::vector<float> expected_result2(12, 0.97069889f);
)V0G0N";
}
+std::string cropWithOffsetAndSecondDimModel() {
+ return R"V0G0N(
+<Net Name="cropWithOffsetModel" version="2" precision="FP32" batch="1">
+ <layers>
+ <layer name="input_1" type="input" id="0" precision="FP32">
+ <output>
+ <port id="0">
+ <dim>1</dim>
+ <dim>20</dim>
+ </port>
+ </output>
+ </layer>
+ <layer name="Crop1" type="Crop" id="1" precision="FP32">
+ <data axis="0,1" dim="1,10" offset="0,5"/>
+ <input>
+ <port id="0">
+ <dim>1</dim>
+ <dim>20</dim>
+ </port>
+ </input>
+ <output>
+ <port id="1">
+ <dim>1</dim>
+ <dim>10</dim>
+ </port>
+ </output>
+ </layer>
+ <layer name="FullyConnected1" id="2" type="InnerProduct" precision="FP32">
+ <fc out-size="10" />
+ <biases offset="0" size="40" />
+ <weights offset="40" size="400" />
+ <input>
+ <port id="0">
+ <dim>1</dim>
+ <dim>10</dim>
+ </port>
+ </input>
+ <output>
+ <port id="1">
+ <dim>1</dim>
+ <dim>10</dim>
+ </port>
+ </output>
+ </layer>
+ </layers>
+ <edges>
+ <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
+ <edge from-layer="1" from-port="1" to-layer="2" to-port="0" />
+ </edges>
+</Net>
+)V0G0N";
+}
+
+
std::string cropWithMaxOffsetModel() {
return R"V0G0N(
<Net Name="cropWithOffsetModel" version="2" precision="FP32" batch="1">
<dim>32</dim>
</port>
</output>
- <blobs>
- <weights offset="1724" size="32768"/>
- <biases offset="34492" size="4096"/>
- </blobs>
+ <blobs>
+ <weights offset="1724" size="32768"/>
+ <biases offset="34492" size="512"/>
+ </blobs>
</layer>
</layers>
<edges>
</port>
</output>
<port_map>
- <input axis="0" external_port_id="0" internal_layer_id="0" internal_port_id="0" start="0"/>
+ <input axis="1" external_port_id="0" internal_layer_id="0" internal_port_id="0" start="0"/>
<input external_port_id="1" internal_layer_id="1" internal_port_id="1"/>
<input external_port_id="2" internal_layer_id="1" internal_port_id="2"/>
<output external_port_id="3" internal_layer_id="1" internal_port_id="5"/>
std::string cropWithoutOffsetModel();
std::string cropWithAlignedOffsetModel();
std::string cropWithOffsetModel();
+std::string cropWithOffsetAndSecondDimModel();
std::string cropWithMaxOffsetModel();
std::string cropWithOffsetExtendedModel();
std::string twoCropsModel();
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
#include <ie_iextension.h>
#include <ie_core.hpp>
#include <ie_common.h>
#include <ie_layers.h>
-#include <tests_common.hpp>
-#include <mkldnn_extension_mngr.h>
#include "graph/test_graph.hpp"
+#include <tests_common.hpp>
using namespace ::testing;
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include <ie_core.hpp>
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <algorithm>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <map>
#include <memory>
#include <algorithm>
-#include "nodes/list.hpp"
#include "nodes/base.hpp"
using namespace InferenceEngine;
}
};
- class FakeLayerPLNImpl: public Cpu::ExtLayerBase {
+class FakeLayerPLNImpl: public Cpu::ExtLayerBase {
public:
explicit FakeLayerPLNImpl(const CNNLayer* layer) {
try {
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include <ie_iextension.h>
#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
-#include <mkldnn_extension_mngr.h>
#include "tests_common.hpp"
#include "unit_test_utils/mocks/mock_error_listener.hpp"
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-
-#include "common_test_utils/data_utils.hpp"
-#include "mkldnn_graph.h"
#include "test_graph.hpp"
+#include "common_test_utils/data_utils.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include "ir_gen_helper.hpp"
#include <ie_core.hpp>
#include <nodes/base.hpp>
-#include <cpu_isa_traits.hpp>
+#include <ie_system_conf.h>
using namespace InferenceEngine;
using namespace ::testing;
using namespace std;
-using namespace mkldnn;
using namespace single_layer_tests;
using namespace Extensions;
using namespace ::Cpu;
-using namespace mkldnn::impl;
struct mvn_test_params {
vector<size_t> dims;
InferenceEngine::Precision precision = data_desc.getPrecision();
Layout layout;
if (is_blocked) {
- int blk_size = cpu::mayiuse(cpu::avx512_common) ? 16 : 8;
+ int blk_size = InferenceEngine::with_cpu_x86_avx512f() ? 16 : 8;
std::vector<size_t> blocks = data_dims;
std::vector<size_t> order(blocks.size());
}
};
-REG_FACTORY_FOR(Cpu::ImplFactory<FakeLayerImpl_MVN>, FakeLayer_MVN);
-
class MKLDNNCPUExtMVNTests_Blocked: public TestsCommon, public WithParamInterface<mvn_test_params> {
std::string layers_t = R"V0G0N(
<layer name="fakeLayer1" id="1" type="FakeLayer_MVN">
ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
MKLDNNGraphTestClass graph;
- graph.CreateGraph(network);
+ auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
+ {
+ auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
+ defaultExt->AddExt("FakeLayer_MVN",
+ [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+ return new Cpu::ImplFactory<FakeLayerImpl_MVN>(layer);
+ });
+ manager->AddExtension(defaultExt);
+ }
+ graph.CreateGraph(network, manager);
auto& nodes = graph.getNodes();
nodes = graph.getNodes();
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include "common_test_utils/data_utils.hpp"
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
+
+#include "common_test_utils/data_utils.hpp"
#include "ir_gen_helper.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <nodes/base.hpp>
-#include <cpu_isa_traits.hpp>
+#include <ie_system_conf.h>
using namespace InferenceEngine;
using namespace ::testing;
using namespace std;
-using namespace mkldnn;
using namespace single_layer_tests;
using namespace Extensions;
using namespace ::Cpu;
-using namespace mkldnn::impl;
struct normalize_test_params {
struct {
InferenceEngine::Precision precision = data_desc.getPrecision();
Layout layout;
if (is_blocked) {
- int blk_size = cpu::mayiuse(cpu::avx512_common) ? 16 : 8;
+ int blk_size = InferenceEngine::with_cpu_x86_avx512f() ? 16 : 8;
std::vector<size_t> blocks = data_dims;
std::vector<size_t> order(blocks.size());
}
};
-REG_FACTORY_FOR(Cpu::ImplFactory<FakeLayerImpl_Normalize>, FakeLayer_Normalize);
-
class MKLDNNCPUExtNormalizeTests_Blocked: public TestsCommon, public WithParamInterface<normalize_test_params> {
std::string model_t = R"V0G0N(
<layer name="fakeLayer1" id="1" type="FakeLayer_Normalize">
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
MKLDNNGraphTestClass graph;
- graph.CreateGraph(network);
+ auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
+ {
+ auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
+ defaultExt->AddExt("FakeLayer_Normalize",
+ [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+ return new Cpu::ImplFactory<FakeLayerImpl_Normalize>(layer);
+ });
+ manager->AddExtension(defaultExt);
+ }
+ graph.CreateGraph(network, manager);
auto& nodes = graph.getNodes();
nodes = graph.getNodes();
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include "single_layer_common.hpp"
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include "ir_gen_helper.hpp"
#include <ie_core.hpp>
#include <nodes/base.hpp>
-#include <cpu_isa_traits.hpp>
using namespace InferenceEngine;
using namespace ::testing;
using namespace std;
-using namespace mkldnn;
using namespace single_layer_tests;
using namespace Extensions;
using namespace ::Cpu;
-using namespace mkldnn::impl;
struct resample_test_params {
std::vector<size_t> in_dims;
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <stdio.h>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include <cnn_network_impl.hpp>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
#include "tests_common.hpp"
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include <unordered_set>
#include <cnn_network_impl.hpp>
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
-
-#define XBYAK_NO_OP_NAMES
-#define XBYAK_UNDEF_JNL
-#include "../../../../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h"
+#include <ie_system_conf.h>
using namespace InferenceEngine;
using namespace ::testing;
p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
}
ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
- Xbyak::util::Cpu cpu;
- if (cpu.has(Xbyak::util::Cpu::tAVX512F)
- && cpu.has(Xbyak::util::Cpu::tAVX512BW)
- && cpu.has(Xbyak::util::Cpu::tAVX512VL)
- && cpu.has(Xbyak::util::Cpu::tAVX512DQ)
+ if (InferenceEngine::with_cpu_x86_avx512f() &&
+ InferenceEngine::with_cpu_x86_avx512_core()
&& !p.preferTypes.empty()
&& p.preferTypes[0] == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd) {
isWino = true;
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "ir_gen_helper.hpp"
#include "tests_common.hpp"
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
#define NOMINMAX
#endif
-#include <ie_plugin_config.hpp>
-#include "common_test_utils/data_utils.hpp"
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
+#include <ie_plugin_config.hpp>
+#include "common_test_utils/data_utils.hpp"
#include "single_layer_common.hpp"
#include <mkldnn_extension_utils.h>
#include <cnn_network_impl.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
+#include "test_graph.hpp"
+
#include <mkldnn_plugin.h>
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
#include "mkldnn_exec_network.h"
#include <ie_core.hpp>
-
-#include "test_graph.hpp"
-
#include <mkldnn_extension_utils.h>
#include <config.h>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <nodes/base.hpp>
}
};
-REG_FACTORY_FOR(Cpu::ImplFactory<FakeLayerImpl_permute>, FakeLayer_permute);
-
static std::string precToStr (Precision prec) {
return prec == Precision::I8 ? "I8" : "FP32";
}
ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
MKLDNNGraphTestClass graph;
- graph.CreateGraph(network);
+ auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
+ {
+ auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
+ defaultExt->AddExt("FakeLayer_permute",
+ [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+ return new Cpu::ImplFactory<FakeLayerImpl_permute>(layer);
+ });
+ manager->AddExtension(defaultExt);
+ }
+ graph.CreateGraph(network, manager);
auto& nodes = graph.getNodes();
for (int i = 0; i < nodes.size(); i++) {
if (nodes[i]->getType() == MKLDNNPlugin::Permute) {
InferenceEngine::StatusCode sts = implNet->setBatchSizeReshape(MB, &resp);
ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
+ auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
+ {
+ auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
+ defaultExt->AddExt("FakeLayer_permute",
+ [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+ return new Cpu::ImplFactory<FakeLayerImpl_permute>(layer);
+ });
+ manager->AddExtension(defaultExt);
+ }
MKLDNNGraphTestClass graph;
graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
- graph.CreateGraph(network);
+ graph.CreateGraph(network, manager);
InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.dims, InferenceEngine::TensorDesc::getLayoutByDims(p.dims)});
src->allocate();
#define NOMINMAX
#endif
-#include <ie_plugin_config.hpp>
-
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
+#include <ie_plugin_config.hpp>
#include "single_layer_common.hpp"
#include <ie_layers.h>
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include "ir_gen_helper.hpp"
#include <math.h>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
-#include <mkldnn_extension_mngr.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-#include "single_layer_common.hpp"
#include "test_graph.hpp"
-#include <mkldnn_extension_utils.h>
+#include "single_layer_common.hpp"
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <ie_system_conf.h>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include <ie_core.hpp>
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
using namespace ::testing;
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <cnn_network_impl.hpp>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include "ir_gen_helper.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include "common_test_utils/data_utils.hpp"
-#include <gtest/gtest.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
+#include "common_test_utils/data_utils.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include "ir_gen_helper.hpp"
#include <ie_core.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
-
#include "test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-#include "mkldnn_graph.h"
+#include "../test_graph.hpp"
#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
#include <mkldnn_extension_mngr.h>
#include "tests_common.hpp"
#include <ie_core.hpp>
-#include "../test_graph.hpp"
using namespace ::testing;
// SPDX-License-Identifier: Apache-2.0
//
-#include <gtest/gtest.h>
+#include "../test_graph.hpp"
#include "mkldnn_exec_network.h"
-#include <mkldnn_extension_utils.h>
#include "tests_common.hpp"
-#include "../test_graph.hpp"
-#include <ie_ir_reader.hpp>
#include <ie_core.hpp>
-#include <ie_system_conf.h>
#include <ngraph/ngraph.hpp>
#pragma once
-#include <nodes/list.hpp>
+// WA for windows.h
+#ifdef _WIN32
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+# ifndef _WINSOCKAPI_
+# define _WINSOCKAPI_
+# endif
+# ifndef _WINSOCK2API_
+# define _WINSOCK2API_
+# endif
+#endif
+
#include <gtest/gtest.h>
+#include <nodes/list.hpp>
#include <mkldnn_graph.h>
#include <mkldnn_memory.h>
#include <mkldnn_extension_utils.h>
#include <tests_common.hpp>
#include <watchdog/watchdog.h>
#include <watchdog/watchdogPrivate.hpp>
-#include <mvnc/include/ncPrivateTypes.h>
#include <thread>
using namespace ::testing;
using namespace InferenceEngine;
+using ms = std::chrono::milliseconds;
+
class MockWatchdogDevice : public Watchdog::IDevice {
public:
using time_point = Watchdog::IDevice::time_point;
- MOCK_QUALIFIED_METHOD1(setInterval, noexcept, void(const std::chrono::milliseconds));
MOCK_QUALIFIED_METHOD1(keepAlive, noexcept, void(const time_point &));
MOCK_QUALIFIED_METHOD1(dueIn, const noexcept, std::chrono::milliseconds (const time_point ¤t_time));
MOCK_QUALIFIED_METHOD0(isTimeout, const noexcept, bool ());
MOCK_QUALIFIED_METHOD0(getHandle, const noexcept, void* ());
};
-struct wd_context_opaque_private {
- void * magic = reinterpret_cast<void *> (0xdeadbeaf);
- Watchdog::IDevice * actual = nullptr;
- bool destroyed = false;
-};
-
-
class MVNCWatchdogTests: public TestsCommon {
protected:
- devicePrivate_t d;
- wd_context ctx, ctx1;
+ WatchdogHndl_t* m_watchdogHndl = nullptr;
+ WdDeviceHndl_t deviceHndl, deviceHndl1;
StrictMock<MockWatchdogDevice> mockWatchee, mockWatchee1;
- wd_context_opaque_private opaque, opaque1;
void SetUp() override {
- opaque.actual = &mockWatchee;
- ctx.opaque = &opaque;
-
- opaque1.actual = &mockWatchee1;
- ctx1.opaque = &opaque1;
+ deviceHndl.m_device = &mockWatchee;
+ deviceHndl1.m_device = &mockWatchee1;
- pthread_mutex_init(&d.dev_stream_m, nullptr);
+ ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl));
}
+
void TearDown() override {
- pthread_mutex_destroy(&d.dev_stream_m);
+ watchdog_destroy(m_watchdogHndl);
+ }
+
+ void setExpectations(StrictMock<MockWatchdogDevice>& mock){
+ EXPECT_CALL(mock, keepAlive(_)).Times(AtLeast(0));
+ EXPECT_CALL(mock, dueIn(_)).WillRepeatedly(Return(ms(20000)));
+ EXPECT_CALL(mock, isTimeout()).WillRepeatedly(Return(false));
+ EXPECT_CALL(mock, getHandle()).WillRepeatedly(Return(&mock));
}
};
-using ms = std::chrono::milliseconds;
TEST_F(MVNCWatchdogTests, canRegisterExternalWatchee) {
+ setExpectations(mockWatchee);
- int handle = 1;
- EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
- // do not expect that any ping happened before we remove the thread
- // this can be changed for example registering succeed only if first ping succeed
- EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(0));
- EXPECT_CALL(mockWatchee, setInterval(ms(1))).Times(1);
- EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false));
- EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Return(ms(20000)));
-
- d.wd_interval = 1;
-
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl));
// allowing thread spin
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
-}
-// TODO: implement logic
-TEST_F(MVNCWatchdogTests, DISABLED_removeDeviceIfXLINKSessionNotIninitialized) {
-
- d.wd_interval = 10;
- ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
- ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
-
- std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl));
}
-#if defined(__APPLE__) && !defined(NDEBUG)
-TEST_F(MVNCWatchdogTests, DISABLED_canNotBeRegisteredTwice) {
-#else
TEST_F(MVNCWatchdogTests, canNotBeRegisteredTwice) {
-#endif
+ setExpectations(mockWatchee);
- d.wd_interval = 10;
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl));
+ ASSERT_NE(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl));
- ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
- ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
// allowing thread spin
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
-}
-
-TEST_F(MVNCWatchdogTests, canUnRegisterNotInitialized) {
-
- ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl));
}
-TEST_F(MVNCWatchdogTests, canUnRegisterIfInterval0) {
+TEST_F(MVNCWatchdogTests, canNotUnRegisterNotInitialized) {
+ EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee));
- d.wd_interval = 0;
-
- ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
- ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
+ ASSERT_NE(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl));
}
-#if defined(__APPLE__) && !defined(NDEBUG)
-TEST_F(MVNCWatchdogTests, DISABLED_failUnRegisterTwice) {
-#else
TEST_F(MVNCWatchdogTests, failUnRegisterTwice) {
-#endif
+ setExpectations(mockWatchee);
- d.wd_interval = 10;
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl));
- ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx));
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
// allowing thread spin
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
- ASSERT_NE(WD_ERRNO, watchdog_unregister_device(&ctx));
+
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl));
+ ASSERT_NE(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl));
}
TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) {
- int handle = 1;
int x = 0;
int y = 0;
int z = 0;
- EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
+ EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee));
EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(1));
- EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1);
EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Invoke([&z, &y]() {
// will sleep at least 100 ms and avoid second keep alive call
y = 100;
return std::chrono::milliseconds(y);
}));
- EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&handle));
+ EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&mockWatchee1));
EXPECT_CALL(mockWatchee1, keepAlive(_)).Times(AtLeast(2));
- EXPECT_CALL(mockWatchee1, setInterval(ms(10))).Times(1);
EXPECT_CALL(mockWatchee1, isTimeout()).WillRepeatedly(Invoke([&x]() {
// allow every second time to wait
x = x == 0 ? 100 : 0;
return std::chrono::milliseconds(x);
}));
-
- d.wd_interval = 10;
-
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx1, &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl1));
std::this_thread::sleep_for(ms(1000));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx1));
-}
-
-TEST_F(MVNCWatchdogTests, canNotStartWatchdogIfIntervalInvalid) {
-
- opaque.actual = &mockWatchee;
-
- int handle = 1;
-
- EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
-
- d.wd_interval = 0;
- ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
-
- d.wd_interval = -1;
- ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d));
-
- // if fo some reason thread started we will get unxpected updatePongInterval calls
- std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl1));
}
TEST_F(MVNCWatchdogTests, canGetPingsOnRegularBasis) {
-
- int handle = 1;
int x = 0;
- EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
+ EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee));
// since interval is small keepAlive can happen several times once
EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(2));
- EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1);
EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false));
EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Invoke([&x](const MockWatchdogDevice::time_point ¤t_time){
x = x == 0 ? 100 : 0;
return std::chrono::milliseconds(x);
}));
-
- d.wd_interval = 10;
-
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl));
std::this_thread::sleep_for(ms(1000));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl));
}
TEST_F(MVNCWatchdogTests, canWakeUpWatchdogWhenAddAndRemoveDevice) {
-
- int handle = 1, handle1 = 2;
-
- EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle));
+ EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee));
EXPECT_CALL(mockWatchee, keepAlive(_)).Times(1);
- EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1);
EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false));
// without wake this will sleep for ever
EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Return(ms(20000)));
- EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&handle1));
+ EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&mockWatchee1));
EXPECT_CALL(mockWatchee1, keepAlive(_)).Times(1);
- EXPECT_CALL(mockWatchee1, setInterval(ms(10))).Times(1);
EXPECT_CALL(mockWatchee1, isTimeout()).WillRepeatedly(Return(false));
EXPECT_CALL(mockWatchee1, dueIn(_)).WillRepeatedly(Return(ms(20000)));
-
- d.wd_interval = 10;
-
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d));
-
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl));
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx1, &d));
-
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl1));
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx1));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl1));
}
TEST_F(MVNCWatchdogTests, stressWatchDog) {
-
const int num_watchdog_device = 10;
-
- watchdog_init_context(nullptr);
-
StrictMock<MockWatchdogDevice> mockWatchee[num_watchdog_device];
- int handle[num_watchdog_device];
- wd_context ctx[num_watchdog_device];
- wd_context_opaque_private opaque[num_watchdog_device];
+ WdDeviceHndl_t deviceHndl[num_watchdog_device];
for (int i = 0; i != num_watchdog_device; i++) {
- handle[i] = i;
-
- EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i));
+ EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i]));
// since interval is big keepAlive happens only once
EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1);
- EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1);
EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false));
EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000)));
- }
- d.wd_interval = 10;
+ deviceHndl[i].m_device = &mockWatchee[i];
+ }
for (int k = 0; k != num_watchdog_device; k++) {
- opaque[k].actual = &mockWatchee[k];
- ctx[k].opaque = &opaque[k];
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k]));
}
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
for (int k = 0; k != num_watchdog_device; k++) {
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k]));
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
TEST_F(MVNCWatchdogTests, stressWatchDog1) {
-
const int num_watchdog_device = 10;
const int num_watchdog_device_half = num_watchdog_device / 2;
- watchdog_init_context(nullptr);
-
StrictMock<MockWatchdogDevice> mockWatchee[num_watchdog_device];
- int handle[num_watchdog_device];
- wd_context ctx[num_watchdog_device];
- wd_context_opaque_private opaque[num_watchdog_device];
+ WdDeviceHndl_t deviceHndl[num_watchdog_device];
for (int i = 0; i != num_watchdog_device; i++) {
- handle[i] = i;
-
- EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i));
+ EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i]));
// since interval is big keepAlive happens only once
EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1);
- EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1);
EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false));
EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000)));
- }
- d.wd_interval = 10;
- for (int k = 0; k != num_watchdog_device; k++) {
- opaque[k].actual = &mockWatchee[k];
- ctx[k].opaque = &opaque[k];
+ deviceHndl[i].m_device = &mockWatchee[i];
}
for (int k = 0; k != num_watchdog_device_half; k++) {
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k]));
}
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
for (int k = 0; k != num_watchdog_device_half; k++) {
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k + num_watchdog_device_half], &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k + num_watchdog_device_half]));
std::this_thread::sleep_for(std::chrono::milliseconds(20));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k]));
std::this_thread::sleep_for(std::chrono::milliseconds(20));
}
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
for (int k = 0; k != num_watchdog_device_half; k++) {
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k + num_watchdog_device_half]));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k + num_watchdog_device_half]));
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
TEST_F(MVNCWatchdogTests, stressWatchDog2) {
-
const int num_watchdog_device = 30;
const int num_watchdog_device_half1 = num_watchdog_device / 3;
const int num_watchdog_device_half2 = 2 * num_watchdog_device / 3;
- watchdog_init_context(nullptr);
-
StrictMock<MockWatchdogDevice> mockWatchee[num_watchdog_device];
- int handle[num_watchdog_device];
- wd_context ctx[num_watchdog_device];
- wd_context_opaque_private opaque[num_watchdog_device];
+ WdDeviceHndl_t deviceHndl[num_watchdog_device];
for (int i = 0; i != num_watchdog_device; i++) {
- handle[i] = i;
-
- EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i));
+ EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i]));
// since interval is big keepAlive happens only once
if (i >= num_watchdog_device_half2) {
EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1);
}
- EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1);
EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false));
EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000)));
- }
- d.wd_interval = 10;
- for (int k = 0; k != num_watchdog_device; k++) {
- opaque[k].actual = &mockWatchee[k];
- ctx[k].opaque = &opaque[k];
+ deviceHndl[i].m_device = &mockWatchee[i];
}
for (int k = 0; k != num_watchdog_device_half1; k++) {
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k]));
}
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
for (int k = 0; k != num_watchdog_device_half1; k++) {
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k]));
}
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
for (int k = num_watchdog_device_half1; k != num_watchdog_device_half2; k++) {
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k]));
//this might lead to UB, for example thread might restart but after that device get removed, so giving more time
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k]));
}
for (int k = num_watchdog_device_half2; k != num_watchdog_device; k++) {
- ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d));
+ ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k]));
//this might lead to UB, for example thread might restart but after that device get removed, so giving more time
//so our expectations for number of calls are not set for last third
- ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k]));
+ ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k]));
}
std::this_thread::sleep_for(std::chrono::milliseconds(3000));
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <tests_common.hpp>
+#include <watchdog/xlink_device.h>
+#include <mvnc/include/ncPrivateTypes.h>
+
+using namespace ::testing;
+using namespace InferenceEngine;
+
+class XLinkDeviceTests: public TestsCommon {};
+class XLinkDeviceTestsWithParam: public TestsCommon, public testing::WithParamInterface<int> {};
+
+TEST_F(XLinkDeviceTests, shouldCreateXlinkDevice) {
+ devicePrivate_t devicePrivate = {0};
+ devicePrivate.wd_interval = 1;
+
+ WdDeviceHndl_t* deviceHndl = nullptr;
+ ASSERT_EQ(WD_ERRNO, xlink_device_create(&deviceHndl, &devicePrivate));
+
+ xlink_device_destroy(deviceHndl);
+}
+
+TEST_P(XLinkDeviceTestsWithParam, shouldNotCreateXlinkDeviceWithInvalidInterval) {
+ devicePrivate_t devicePrivate = {0};
+ devicePrivate.wd_interval = GetParam();
+
+ WdDeviceHndl_t* deviceHndl = nullptr;
+ ASSERT_NE(WD_ERRNO, xlink_device_create(&deviceHndl, &devicePrivate));
+
+ xlink_device_destroy(deviceHndl);
+}
+
+INSTANTIATE_TEST_CASE_P(WatchdogDevice,
+ XLinkDeviceTestsWithParam,
+ testing::Values(0, -1, -WATCHDOG_MAX_PING_INTERVAL_MS));
MOCK_QUALIFIED_METHOD0(AvailableDevicesNames, const, std::vector<std::string>());
MOCK_QUALIFIED_METHOD0(AvailableDevicesDesc, const, std::vector<ncDeviceDescr_t>());
+ MOCK_METHOD0(watchdogHndl, WatchdogHndl_t*());
+
~MvncStub() = default;
};
ie_developer_export_targets(pugixml_mt)
set_target_properties(pugixml_mt PROPERTIES FOLDER thirdparty)
endif()
+
+ if(ENABLE_MKL_DNN)
+ set(SDL_cmake_included ON)
+ include(mkldnn.cmake)
+ endif()
endfunction()
build_with_lto()
-
-if(ENABLE_MKL_DNN)
- set(SDL_cmake_included ON)
- include(mkldnn.cmake)
-endif()
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "convolution_kernel_b_fs_yx_fsv16_imad.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+#include <vector>
+#include <string>
+#include <iostream>
+#include <algorithm>
+
+//
+// Kernel specific constants
+//
+static constexpr size_t fsv = 16;
+static constexpr size_t simd = 16;
+
+static size_t getOutBlock_X(const size_t output_size_x, const size_t stride_x, const size_t filter_size_x, const size_t dilation_x) {
+ // Calculate number of variables needed to hold minimum input width.
+ // Equation for input block width: (output_block - 1) * stride + (filter_size - 1) * dilation + 1
+ // Result for one output_block gives minimum size of input width.
+ size_t min_in_block_size = (filter_size_x - 1) * dilation_x + 1;
+ // Input block is spread across sub-group, so ceil-divide by simd size.
+ size_t min_in_block_simds = kernel_selector::CeilDiv(min_in_block_size, simd);
+
+ size_t output_block_width = 0;
+ size_t max_block_size = std::min((min_in_block_simds * simd - 1 - (filter_size_x - 1) * dilation_x) / stride_x + 1, output_size_x);
+
+ if (output_size_x <= max_block_size)
+ return output_size_x;
+
+ for (size_t block = 4; block <= max_block_size; ++block) {
+ if (output_size_x % block == 0)
+ output_block_width = block;
+ }
+ if (output_block_width == 0 && output_size_x < max_block_size * 3) {
+ size_t min_overhang = max_block_size;
+ for (size_t block = 4; block <= max_block_size; ++block) {
+ size_t overhang = block - output_size_x % block;
+ if (overhang <= min_overhang) {
+ min_overhang = overhang;
+ output_block_width = block;
+ }
+ }
+ }
+
+ if (output_block_width == 0) {
+ output_block_width = max_block_size;
+ }
+ return output_block_width;
+}
+
+namespace kernel_selector {
+
+Convolution_kernel_b_fs_yx_fsv16_imad::BlockParams
+Convolution_kernel_b_fs_yx_fsv16_imad::GetBlockParams(const convolution_params& params) const {
+ constexpr float max_reg_pressure = 0.75f;
+
+ // TODO Investigate whether below algorithm for selecting optimal block params could be reduced to:
+ // 1. Enumerate possible block params as optimization space
+ // 2. Prune invalid params (too high register pressure, too big local memory usage)
+ // 3. Rank params according to some combination of:
+ // - compute/memory ratio
+ // - occupancy
+ // - register pressure
+ // - local memory usage
+ // 4. Select params with highest rank
+
+ // Select optimal block width
+ size_t block_width = getOutBlock_X(params.output.X().v, params.stride.x, params.filterSize.x, params.dilation.x);
+ size_t in_block_width = (block_width - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1;
+
+ // If possible increase features block size
+ size_t block_features = simd;
+ {
+ size_t tmp_block_features = simd * 2;
+ auto block2_params = BlockParams{ block_width, 1, tmp_block_features, in_block_width, 1, 1 };
+
+ bool c_mul_f = params.output.Feature().v % tmp_block_features == 0;
+ bool c_reg_pressure = EstimateRegPressure(params, block2_params) <= max_reg_pressure;
+
+ if (c_mul_f && c_reg_pressure) {
+ block_features = tmp_block_features;
+ }
+ }
+
+ // If not enough occupancy try to perform feature split or/and block reduction
+ size_t feature_slm_split = 1;
+ auto no_split_params = BlockParams{ block_width, 1, block_features, in_block_width, 1, 1 };
+ if (EstimateOccupancy(params, no_split_params) < 1.f) {
+ // Temporary variables for possible reductions in block sizes
+ bool update_block_params = false;
+ size_t split_block_width = block_width;
+ size_t split_in_block_width = in_block_width;
+ size_t split_block_features = block_features;
+
+ // Feature split requires extra registers, so check if it can be done with current block sizes
+ bool can_split =
+ EstimateRegPressure(params, BlockParams{ block_width, 1, block_features, in_block_width, 1, 2 }) <= max_reg_pressure;
+ // Has the occupancy reached sufficient level
+ bool enough_occupancy = false;
+ // Reductions to reduce register pressure
+ // Try to reduce block width to free some registers. Good compute/memory ratio will be pointless if barely any threads will run.
+ if (!can_split && block_width != 1) {
+ // At most twice reduction in output block width is acceptable
+ for (size_t w = block_width; w >= CeilDiv(block_width, 2); w -= 1) {
+ size_t tmp_in_width = (w - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1;
+ auto dummy_split_params = BlockParams{ w, 1, block_features, tmp_in_width, 1, 2 };
+
+ bool c_reg_pressure = EstimateRegPressure(params, dummy_split_params) <= max_reg_pressure;
+ bool c_mul_x = params.output.X().v % w == 0;
+
+ if (c_reg_pressure && c_mul_x) {
+ split_block_width = w;
+ split_in_block_width = tmp_in_width;
+ can_split = true;
+ break;
+ }
+ }
+ }
+ // Try to reduce block features.
+ // Done after attempting block width reduction, because bigger feature block allows more threads to write results in parallel.
+ if (!can_split) {
+ if (block_features / simd % 2 == 0) {
+ split_block_features = block_features / 2;
+ can_split = true;
+ }
+ }
+ // Check if previous reductions haven't improved occupancy enough
+ {
+ auto reduced_params = BlockParams{ split_block_width, 1, split_block_features, split_in_block_width, 1, 1 };
+ enough_occupancy = EstimateOccupancy(params, reduced_params) >= 1.f;
+ update_block_params = enough_occupancy;
+ }
+
+ if (can_split && !enough_occupancy) {
+ // TODO Try other split sizes
+ for (size_t split = 4; split < 5; ++split) {
+ auto tmp_params = BlockParams{ block_width, 1, block_features, in_block_width, 1, split };
+
+ bool c_ifm_mul = CeilDiv(params.weights.IFM().v, fsv) % split == 0;
+ bool c_slm = EstimateSLMUsage(params, tmp_params) <= 1.f;
+ bool c_lws = split * simd <= params.engineInfo.maxWorkGroupSize;
+ bool c_reg_pressure = EstimateRegPressure(params, tmp_params) <= max_reg_pressure;
+ bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f;
+
+ if (c_ifm_mul && c_slm && c_lws && c_reg_pressure) {
+ feature_slm_split = split;
+ update_block_params = true;
+ enough_occupancy = c_occupancy;
+ }
+
+ // slm usage and work group sizes will only grow with split, so no point in checking
+ if (!c_slm || !c_lws || split * fsv >= params.weights.IFM().v)
+ break;
+ }
+ }
+ // Splitting was not sufficient or couldn't be done
+ // Try to reduce block width if hasn't been done before
+ if (!enough_occupancy && split_block_width == block_width && block_width != 1) {
+ // At most twice reduction in output block width is acceptable
+ for (size_t w = block_width; w >= CeilDiv(block_width, 2); w -= 1) {
+ size_t tmp_in_width = (w - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1;
+ auto tmp_params = BlockParams{ w, 1, split_block_features, tmp_in_width, 1, feature_slm_split };
+
+ bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f;
+ bool c_mul_x = params.output.X().v % w == 0;
+
+ if (c_mul_x) {
+ split_block_width = w;
+ split_in_block_width = tmp_in_width;
+ update_block_params = true;
+ }
+ // Reached enough occupancy, don't reduce futher to not hurt compute/mem ratio
+ if (c_mul_x && c_occupancy)
+ break;
+ }
+ }
+ if (update_block_params) {
+ block_width = split_block_width;
+ in_block_width = split_in_block_width;
+ block_features = split_block_features;
+ }
+ }
+
+ // Select biggest block height that fits into registers
+ size_t block_height = 1;
+ size_t in_block_height = 1;
+ for (size_t h = 2; h < 16; ++h) {
+ if (params.output.Y().v % h != 0)
+ continue;
+
+ size_t tmp_in_block_height = (h - 1) * params.stride.y + (params.filterSize.y - 1) * params.dilation.y + 1;
+ auto tmp_params = BlockParams{ block_width, h, block_features, in_block_width, tmp_in_block_height, feature_slm_split };
+
+ bool c_reg_pressure = EstimateRegPressure(params, tmp_params) <= max_reg_pressure;
+ bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f;
+ bool c_slm = EstimateSLMUsage(params, tmp_params) <= 1.f;
+
+ if (c_reg_pressure && c_occupancy && c_slm) {
+ block_height = h;
+ in_block_height = tmp_in_block_height;
+ } else {
+ break;
+ }
+ }
+
+ return BlockParams{ block_width, block_height, block_features, in_block_width, in_block_height, feature_slm_split };
+}
+
+float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateRegPressure(const convolution_params& params, const BlockParams& block) const {
+ size_t bytes_used = 0;
+ // accumulator
+ size_t accumulator_elements = block.output_block_width * block.output_block_height * block.output_block_features;
+ bytes_used += accumulator_elements * BytesPerElement(GetAccumulatorType(params));
+ // input block
+ size_t input_block_elements = block.input_block_height * Align(block.input_block_width, simd) * fsv;
+ bytes_used += input_block_elements * BytesPerElement(params.inputs[0].GetDType());
+ // weights block
+ size_t weights_block_elements = block.output_block_features * fsv;
+ bytes_used += weights_block_elements * BytesPerElement(params.weights.GetDType());
+
+ // Experimentally selected number of registers needed for extra variables (eg. out_x, out_y, filter_idx, etc.)
+ constexpr size_t experimental_extra_regs = 8 * 32;
+ bytes_used += experimental_extra_regs;
+
+ // Experimentally selected number of registers needed for slm handling
+ constexpr size_t experimental_slm_regs = 4 * 32;
+ if (block.feature_slm_split != 1) {
+ bytes_used += experimental_slm_regs;
+ }
+
+ constexpr size_t reg_num = 128;
+ constexpr size_t bytes_per_reg = 32;
+ constexpr size_t max_reg_bytes = reg_num * bytes_per_reg;
+
+ return static_cast<float>(bytes_used) / static_cast<float>(max_reg_bytes);
+}
+
+float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateOccupancy(const convolution_params& params, const BlockParams& block) const {
+ size_t blocks_w = CeilDiv(params.output.X().v, block.output_block_width);
+ size_t blocks_h = CeilDiv(params.output.Y().v, block.output_block_height);
+ size_t blocks_f = CeilDiv(params.output.Feature().v, block.output_block_features) * block.feature_slm_split;
+ size_t block_b = params.output.Batch().v;
+
+ auto threads = blocks_w * blocks_h * blocks_f * block_b;
+ constexpr size_t max_threads_per_cu = 7;
+ size_t compute_units = params.engineInfo.computeUnitsCount;
+ size_t max_threads = compute_units * max_threads_per_cu;
+
+ return static_cast<float>(threads) / static_cast<float>(max_threads);
+}
+
+float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const {
+ size_t slm_elements = block.output_block_width * block.output_block_height * block.output_block_features * (block.feature_slm_split - 1);
+ size_t slm_bytes = slm_elements * BytesPerElement(GetAccumulatorType(params));
+
+ // TODO Actual maximum slm should also depend on number of work-groups, but this is device specific
+ size_t max_slm_bytes = params.engineInfo.maxLocalMemSize;
+
+ return static_cast<float>(slm_bytes) / static_cast<float>(max_slm_bytes);
+}
+
+ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad::GetSupportedKey() const {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+
+ k.EnableInputWeightsType(WeightsType::INT8);
+
+ k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+ k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+
+ k.EnableDifferentTypes();
+ k.EnableDifferentInputWeightsTypes();
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBiasPerFeature();
+ k.EnableNonBiasTerm();
+ k.EnableBatching();
+ k.EnableQuantization(QuantizationType::SYMMETRIC);
+ k.EnableDilation();
+ k.DisableTuning();
+ return k;
+}
+
+KernelsData Convolution_kernel_b_fs_yx_fsv16_imad::GetKernelsData(const Params& params,
+ const optional_params& options) const {
+ return GetCommonKernelsData(params, options);
+}
+
+JitConstants Convolution_kernel_b_fs_yx_fsv16_imad::GetJitConstants(const convolution_params& params,
+ const DispatchData& kd) const {
+ auto mem_consts = Parent::GetJitConstants(params, kd);
+
+ auto block_params = GetBlockParams(params);
+
+ bool unroll_filter_y = block_params.output_block_height != 1;
+
+ mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block_params.output_block_width));
+ mem_consts.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", block_params.input_block_width));
+ mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", block_params.output_block_height));
+ mem_consts.AddConstant(MakeJitConstant("IN_BLOCK_HEIGHT", block_params.input_block_height));
+ mem_consts.AddConstant(MakeJitConstant("FILTER_SIZE_Y_UNROLL", unroll_filter_y ? params.filterSize.y : 1));
+ mem_consts.AddConstant(MakeJitConstant("OFM_BLOCKS_PER_SIMD", block_params.output_block_features / simd));
+ mem_consts.AddConstant(MakeJitConstant("OFM_SIZE_PER_SIMD", block_params.output_block_features));
+ mem_consts.AddConstant(MakeJitConstant("FEATURE_SLM_SPLIT", block_params.feature_slm_split));
+ mem_consts.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR"));
+ mem_consts.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION"));
+
+ if (!params.fused_ops.empty()) {
+ auto input_dt = GetActivationType(params);
+ std::vector<std::string> idx_order = { "out_b", "(out_f + ofb * 16)", "(out_y + oh)", "(out_x + ow)" };
+ std::vector<Tensor::DataChannelName> loop_axes = { Tensor::DataChannelName::X };
+ if (block_params.output_block_height != 1) {
+ loop_axes.push_back(Tensor::DataChannelName::Y);
+ } else {
+ idx_order[idx_order.size() - 2] = "out_y";
+ }
+
+ FusedOpsConfiguration conf_scalar = { "_SCALAR",
+ idx_order,
+ "dequantized_val",
+ input_dt,
+ 1,
+ LoadType::LT_UNALIGNED,
+ BoundaryCheck::DISABLED };
+ conf_scalar.SetLoopAxes(loop_axes, true);
+
+ mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
+ }
+
+ return mem_consts;
+} // GetJitConstants
+
+ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad::SetDefault(const convolution_params& params,
+ int) const {
+ DispatchData kd;
+ const auto& output = params.output;
+ auto block_params = GetBlockParams(params);
+
+ kd.gws0 = CeilDiv(output.X().v, block_params.output_block_width);
+ kd.gws1 = CeilDiv(output.Y().v, block_params.output_block_height);
+ kd.gws2 = output.Batch().v * CeilDiv(output.Feature().v, block_params.output_block_features) * simd * block_params.feature_slm_split;
+
+ kd.lws0 = 1;
+ kd.lws1 = 1;
+ kd.lws2 = simd * block_params.feature_slm_split;
+
+ kd.cldnnStyle = {0, 0, 0, 0, 0};
+ kd.gemmStyle = {0, 0, 0, 0, 0, 0};
+
+ kd.efficiency = FORCE_PRIORITY_2;
+ // TODO Optimize 1x1, because this kernel is better in most cases
+ //if (params.filterSize.x == 1 && params.filterSize.y == 1)
+ // kd.efficiency = FORCE_PRIORITY_1;
+ if (static_cast<float>(params.weights.IFM().v) / static_cast<float>(Align(params.weights.IFM().v, fsv)) < 0.5f)
+ kd.efficiency = FORCE_PRIORITY_4;
+
+ return kd;
+} // SetDefault
+
+bool Convolution_kernel_b_fs_yx_fsv16_imad::Validate(const Params& params, const optional_params& options) const {
+ if (!Parent::Validate(params, options)) {
+ return false;
+ }
+
+ KernelData kd = KernelData::Default<convolution_params>(params);
+ convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
+
+ if (newParams.groups != 1 || newParams.split != 1)
+ return false;
+
+ return true;
+}
+} // namespace kernel_selector
namespace kernel_selector {
-class Convolution_kernel_b_fs_yx_fsv16_imad_3x3 : public ConvolutionKernelBase {
+class Convolution_kernel_b_fs_yx_fsv16_imad : public ConvolutionKernelBase {
public:
using Parent = ConvolutionKernelBase;
- Convolution_kernel_b_fs_yx_fsv16_imad_3x3() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_3x3") {}
- virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_3x3() {}
+ Convolution_kernel_b_fs_yx_fsv16_imad() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad") {}
+ virtual ~Convolution_kernel_b_fs_yx_fsv16_imad() {}
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
FusedOpType::SCALE,
FusedOpType::ACTIVATION };
}
+
+ struct BlockParams {
+ size_t output_block_width;
+ size_t output_block_height;
+ size_t output_block_features;
+
+ size_t input_block_width;
+ size_t input_block_height;
+
+ size_t feature_slm_split;
+ };
+
+ BlockParams GetBlockParams(const convolution_params& params) const;
+ float EstimateRegPressure(const convolution_params& params, const BlockParams& block) const;
+ float EstimateOccupancy(const convolution_params& params, const BlockParams& block) const;
+ float EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const;
};
} // namespace kernel_selector
+++ /dev/null
-// Copyright (c) 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
-#include <vector>
-#include <iostream>
-
-//
-// Kernel specific constants
-//
-#define SIMD_SIZE 16
-
-static size_t getOutBlock_X(const size_t output_size_x, const size_t stride_x, const size_t filter_size_x) {
- size_t output_block_width = 0;
- size_t max_block_size = std::min((SIMD_SIZE - filter_size_x) / stride_x + 1, output_size_x);
-
- if (output_size_x <= max_block_size)
- return output_size_x;
-
- for (size_t block = 4; block <= max_block_size; ++block) {
- if (output_size_x % block == 0)
- output_block_width = block;
- }
- if (output_block_width == 0 && output_size_x < max_block_size * 3) {
- size_t min_overhang = max_block_size;
- for (size_t block = 4; block <= max_block_size; ++block) {
- size_t overhang = block - output_size_x % block;
- if (overhang <= min_overhang) {
- min_overhang = overhang;
- output_block_width = block;
- }
- }
- }
-
- if (output_block_width == 0) {
- output_block_width = max_block_size;
- }
- return output_block_width;
-}
-
-static size_t get_ofm_per_wi(const size_t output_size_f) {
- if (output_size_f % 32 == 0)
- return 2;
- return 1;
-}
-
-namespace kernel_selector {
-
-ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetSupportedKey() const {
- ParamsKey k;
- k.EnableInputDataType(Datatype::INT8);
- k.EnableInputDataType(Datatype::UINT8);
-
- k.EnableOutputDataType(Datatype::INT8);
- k.EnableOutputDataType(Datatype::UINT8);
- k.EnableOutputDataType(Datatype::F32);
- k.EnableOutputDataType(Datatype::F16);
-
- k.EnableInputWeightsType(WeightsType::INT8);
-
- k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
- k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
-
- k.EnableDifferentTypes();
- k.EnableDifferentInputWeightsTypes();
- k.EnableTensorOffset();
- k.EnableTensorPitches();
- k.EnableBiasPerFeature();
- k.EnableNonBiasTerm();
- k.EnableBatching();
- k.EnableQuantization(QuantizationType::SYMMETRIC);
- k.DisableTuning();
- return k;
-}
-
-KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetKernelsData(const Params& params,
- const optional_params& options) const {
- return GetCommonKernelsData(params, options);
-}
-
-JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetJitConstants(const convolution_params& params,
- const DispatchData& kd) const {
- auto mem_consts = Parent::GetJitConstants(params, kd);
- const auto& output = params.output;
-
- mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", getOutBlock_X(output.X().v, params.stride.x, params.filterSize.x)));
- mem_consts.AddConstant(MakeJitConstant("OFM_BLOCKS_PER_SIMD", get_ofm_per_wi(output.Feature().v)));
- mem_consts.AddConstant(MakeJitConstant("OFM_SIZE_PER_SIMD", SIMD_SIZE * get_ofm_per_wi(output.Feature().v)));
-
- if (!params.fused_ops.empty()) {
- auto input_dt = GetActivationType(params);
- FusedOpsConfiguration conf_scalar = {"", {"out_b", "out_f + j * 16", "out_y", "out_x + i"}, "dequantized", input_dt, 1};
- conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true);
- mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
- }
-
- return mem_consts;
-} // GetJitConstants
-
-ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_3x3::SetDefault(const convolution_params& params,
- int) const {
- DispatchData kd;
- const auto& output = params.output;
- auto output_block_width = getOutBlock_X(output.X().v, params.stride.x, params.filterSize.x);
- auto ofm_blocks_per_simd = get_ofm_per_wi(output.Feature().v);
-
- kd.gws0 = CeilDiv(output.X().v, output_block_width);
- kd.gws1 = output.Y().v;
- kd.gws2 = output.Batch().v * Align(output.Feature().v / ofm_blocks_per_simd, SIMD_SIZE);
-
- kd.lws0 = 1;
- kd.lws1 = 1;
- kd.lws2 = SIMD_SIZE;
-
- kd.cldnnStyle = {0, 0, 0, 0, 0};
- kd.gemmStyle = {0, 0, 0, 0, 0, 0};
-
- if (params.filterSize.x == 3)
- kd.efficiency = FORCE_PRIORITY_2;
- else
- kd.efficiency = FORCE_PRIORITY_5;
-
- return kd;
-} // SetDefault
-
-bool Convolution_kernel_b_fs_yx_fsv16_imad_3x3::Validate(const Params& params, const optional_params& options) const {
- if (!Parent::Validate(params, options)) {
- return false;
- }
-
- KernelData kd = KernelData::Default<convolution_params>(params);
- convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
-
- if ((newParams.filterSize.x != newParams.filterSize.y) ||
- (newParams.filterSize.x != 3 && newParams.filterSize.x != 5)) {
- // Fitler size needs to be 3x3 or 5x5
- return false;
- }
-
- if ((newParams.stride.x != newParams.stride.y) ||
- (newParams.stride.x != 1 && newParams.stride.x != 2)) {
- // Strides must be 1x1 or 2x2
- return false;
- }
-
- if (newParams.groups != 1 || newParams.split != 1)
- return false;
-
- return true;
-}
-} // namespace kernel_selector
+++ /dev/null
-// Copyright (c) 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
-#include <vector>
-#include <iostream>
-
-//
-// Kernel specific constants
-//
-#define SIMD_SIZE 16
-
-static size_t getOutBlock_X(size_t output_size_x) {
- auto output_block_width = 7;
- if (output_size_x % 8 == 0)
- output_block_width = 8;
- return output_block_width;
-}
-
-
-namespace kernel_selector {
-
-ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetSupportedKey() const {
- ParamsKey k;
- k.EnableInputDataType(Datatype::INT8);
- k.EnableInputDataType(Datatype::UINT8);
-
- k.EnableOutputDataType(Datatype::INT8);
- k.EnableOutputDataType(Datatype::UINT8);
- k.EnableOutputDataType(Datatype::F32);
- k.EnableOutputDataType(Datatype::F16);
-
- k.EnableInputWeightsType(WeightsType::INT8);
-
- k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
- k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
-
- k.EnableDifferentTypes();
- k.EnableDifferentInputWeightsTypes();
- k.EnableTensorOffset();
- k.EnableTensorPitches();
- k.EnableBiasPerFeature();
- k.EnableNonBiasTerm();
- k.EnableBatching();
- k.EnableQuantization(QuantizationType::SYMMETRIC);
- k.DisableTuning();
- return k;
-}
-
-KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetKernelsData(const Params& params,
- const optional_params& options) const {
- return GetCommonKernelsData(params, options);
-}
-
-JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetJitConstants(const convolution_params& params,
- const DispatchData& kd) const {
- auto mem_consts = Parent::GetJitConstants(params, kd);
- const auto& output = params.output;
-
- mem_consts.AddConstants({MakeJitConstant("OUT_BLOCK_WIDTH", getOutBlock_X(output.X().v))});
-
- if (!params.fused_ops.empty()) {
- auto input_dt = GetActivationType(params);
- FusedOpsConfiguration conf_scalar = {"",
- {"out_b", "(out_f + get_sub_group_id() * 16)", "out_y", "out_x + i"},
- "dequantized",
- input_dt,
- 1};
- conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true);
- mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
- }
-
- return mem_consts;
-} // GetJitConstants
-
-ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::SetDefault(
- const convolution_params& params,
- int) const {
- DispatchData kd;
- const auto& output = params.output;
-
- auto output_block_width = getOutBlock_X(output.X().v);
- kd.gws0 = output.X().v / output_block_width;
- kd.gws1 = output.Y().v;
- kd.gws2 = output.Batch().v * output.Feature().v * 2;
-
- kd.lws0 = 1;
- kd.lws1 = 1;
- kd.lws2 = SIMD_SIZE * 4;
-
- kd.cldnnStyle = {0, 0, 0, 0, 0};
- kd.gemmStyle = {0, 0, 0, 0, 0, 0};
-
- kd.efficiency = FORCE_PRIORITY_1;
-
- return kd;
-} // SetDefault
-
-bool Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::Validate(const Params& params, const optional_params& options) const {
- if (!Parent::Validate(params, options)) {
- return false;
- }
-
- KernelData kd = KernelData::Default<convolution_params>(params);
- convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
-
- if (newParams.output.Feature().v % (2 * SIMD_SIZE) != 0) {
- return false;
- }
-
- if ((newParams.filterSize.x != newParams.filterSize.y) ||
- newParams.filterSize.x != 3) {
- // Fitler size needs to be 3x3
- return false;
- }
-
- if ((newParams.stride.x != newParams.stride.y) ||
- (newParams.stride.x != 1 && newParams.stride.x != 2)) {
- // Strides must be 1x1 or 2x2
- return false;
- }
-
- if (newParams.output.X().v % 8 != 0 && newParams.output.X().v % 7 != 0) {
- return false;
- }
-
- if (CeilDiv(newParams.inputs[0].Feature().v, 16) % 4 != 0) {
- return false;
- }
-
- const auto& output = newParams.output;
- auto output_block_width = getOutBlock_X(output.X().v);
- size_t eu_count = params.engineInfo.computeUnitsCount;
- auto global_size =
- (output.X().v / output_block_width) * output.Y().v * ((output.Batch().v * output.Feature().v));
- if ((global_size / 16) > (eu_count * 7)) {
- return false;
- }
-
- if (newParams.groups != 1 || newParams.split != 1)
- return false;
-
- return true;
-}
-} // namespace kernel_selector
+++ /dev/null
-/*
-// Copyright (c) 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#pragma once
-
-#include "convolution_kernel_base.h"
-#include <vector>
-
-namespace kernel_selector {
-
-class Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks : public ConvolutionKernelBase {
-public:
- using Parent = ConvolutionKernelBase;
- Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks") {}
- virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks() {}
-
- KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
- ParamsKey GetSupportedKey() const override;
-
-protected:
- bool Validate(const Params& params, const optional_params& options) const override;
- JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
- DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
- bool NeedPaddedInput() const override { return true; }
- WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
- return WeightsLayout::os_is_yx_osv16_isv16;
- }
-
- std::vector<FusedOpType> GetSupportedFusedOps() const override {
- return { FusedOpType::ELTWISE,
- FusedOpType::QUANTIZE,
- FusedOpType::SCALE,
- FusedOpType::ACTIVATION };
- }
-};
-} // namespace kernel_selector
#include "convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h"
#include "convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.h"
#include "convolution_kernel_b_fs_yx_fsv16_imad_1x1.h"
-#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3.h"
-#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h"
+#include "convolution_kernel_b_fs_yx_fsv16_imad.h"
#include "convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp"
namespace kernel_selector {
// b_fs_yx_fsv16 int8
Attach<Convolution_kernel_b_fs_yx_fsv16_imad_1x1>();
- Attach<Convolution_kernel_b_fs_yx_fsv16_imad_3x3>();
- Attach<Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks>();
+ Attach<Convolution_kernel_b_fs_yx_fsv16_imad>();
// b_fs_yx_fsv16 and b_fs_zyx_fsv16
Attach<ConvolutionKernel_b_fs_yx_fsv16_depthwise>();
// See the License for the specific language governing permissions and
// limitations under the License.
-
#include "mvn_kernel_b_fs_yx_fsv16_imad.hpp"
#include "common/common_tools.h"
ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const {
ParamsKey k;
+
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+ k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
+ k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableDifferentTypes();
// k.EnableMVNMode(MVNMode::ACROSS_CHANNELS);
k.EnableMVNMode(MVNMode::WITHIN_CHANNELS);
k.EnableMVNNormalizeVariance();
+
return k;
}
auto params = static_cast<const mvn_params&>(p);
// TODO Add support for input padding via iterating over y (parallel or in kernel).
- if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0)
+ if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0 ||
+ params.inputs[0].Z().pad.Total() != 0)
return false;
return true;
MVNKernelBase::DispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefault(const mvn_params& params) const {
auto kd = Parent::SetDefault(params);
- auto items_num = params.output.X().v * params.output.Y().v;
+ auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v;
auto max_wg = params.engineInfo.maxWorkGroupSize;
auto slm_per_sg = fsv * 4;
auto max_slm = params.engineInfo.maxLocalMemSize;
if (!params.fused_ops.empty()) {
std::vector<std::string> idx_order;
- idx_order = { "b", "(f + set_idx)", "(output_spatial / OUTPUT_SIZE_X)", "(output_spatial % OUTPUT_SIZE_X)" };
+
+ if (params.inputs[0].GetDims().size() <= 4) {
+ idx_order = {"b",
+ "(f + set_idx)",
+ "(output_spatial / OUTPUT_SIZE_X)",
+ "(output_spatial % OUTPUT_SIZE_X)"};
+ } else if (params.inputs[0].GetDims().size() == 5) {
+ idx_order = {"b",
+ "(f + set_idx)",
+ "(output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y))",
+ "((output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y)",
+ "(output_spatial % OUTPUT_SIZE_X)"};
+ }
+
auto conf = FusedOpsConfiguration("", idx_order, "normalized", activation_dt);
- jits.Merge(MakeFusedOpsJitConstants(params, { conf }));
+ jits.Merge(MakeFusedOpsJitConstants(params, {conf}));
}
return jits;
}
-MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti(const mvn_params& params) const {
+MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti(
+ const mvn_params& params) const {
MultiDispatchData md;
- auto items_num = params.output.X().v * params.output.Y().v;
+ auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v;
auto max_wg = params.engineInfo.maxWorkGroupSize;
auto slm_per_sg = fsv * 4;
auto max_slm = params.engineInfo.maxLocalMemSize;
return md;
}
-KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params, const optional_params& options, float estimated_time) const {
+KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params,
+ const optional_params& options,
+ float estimated_time) const {
if (!Validate(params, options))
return {};
0,
0);
kernel.arguments.clear(); // Clear original output argument
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
- kd.internalBufferSizes.push_back(
- params.output.Batch().v * Align(params.output.Feature().v, fsv) * runInfo.item_groups * intermidiate_bytes);
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
+ kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) *
+ runInfo.item_groups * intermidiate_bytes);
}
{
// Mean second stage
0,
0);
kernel.arguments.clear(); // Clear original output argument
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
- kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes);
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
+ kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) *
+ intermidiate_bytes);
}
if (params.mvnNormalizeVariance) {
// Variance first stage
0,
0);
kernel.arguments.clear(); // Clear original output argument
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0});
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
}
if (params.mvnNormalizeVariance) {
// Variance second stage
0,
0);
kernel.arguments.clear(); // Clear original output argument
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 2 });
- kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes);
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0});
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2});
+ kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) *
+ intermidiate_bytes);
}
{ // Final
auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_final);
false,
1,
GetFusedPrimitiveInputsCount(params));
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1});
if (params.mvnNormalizeVariance) {
- kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 2 });
+ kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2});
}
}
kd.intenralBufferDataType = Datatype::F32;
kd.estimatedTime = estimated_time;
- return { kd };
+ return {kd};
}
-
KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetKernelsData(const Params& params, const optional_params& optParams) const {
const mvn_params& orgParams = static_cast<const mvn_params&>(params);
auto max_slm = params.engineInfo.maxLocalMemSize;
auto slm_per_sg = fsv * 4;
auto max_lws = params.engineInfo.maxWorkGroupSize;
- auto items_num = orgParams.output.X().v * orgParams.output.Y().v;
+ auto items_num = orgParams.output.X().v * orgParams.output.Y().v * orgParams.output.Z().v;
auto enough_slm = max_lws / simd * simd * slm_per_sg <= max_slm;
auto enough_lws = max_lws / simd >= 1;
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
-} // namespace kernel_selector
\ No newline at end of file
+} // namespace kernel_selector
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/mmad.cl"
+#include "include/data_types.cl"
+
+#define AS_TYPE_N_(type, n, x) as_##type##n(x)
+#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
+#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
+
+#define AS_FILTER_TYPE_4(x) AS_TYPE_N(FILTER_TYPE, 4, x)
+
+#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
+#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
+
+#define SIMD 16
+#define FSV 16
+
+// int8 conv_input and weights data is packed to int32 "batches",
+// int/uint pointers here instead of INPUT0_TYPE/FILTER_TYPE for convenience
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+__attribute__((reqd_work_group_size(1, 1, FEATURE_SLM_SPLIT * SIMD)))
+KERNEL(convolution_gpu_b_fs_yx_fsv16_imad)(
+ const __global INPUT0_TYPE *conv_input,
+ __global OUTPUT_TYPE *output,
+ const __global FILTER_TYPE *weights,
+#if BIAS_TERM
+ const __global BIAS_TYPE *biases,
+#endif
+#if HAS_FUSED_OPS_DECLS
+ FUSED_OPS_DECLS,
+#endif
+ uint split_idx) {
+
+ #define LUT_VALUE_CLAMP(x) (( (IN_BLOCK_WIDTH % SIMD == 0) || ((x) < IN_BLOCK_WIDTH % SIMD) ) ? (x) : 0)
+ const int tmp = LUT_VALUE_CLAMP(get_sub_group_local_id());
+ #undef LUT_VALUE_CLAMP
+
+ const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
+ const uint out_y = (uint)get_global_id(1) * OUT_BLOCK_HEIGHT;
+ const uint out_b = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) / ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
+ uint out_fg = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) % ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
+ uint out_f = out_fg + get_sub_group_local_id();
+
+ const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
+ const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+#if FEATURE_SLM_SPLIT == 1
+ const uint k_start = 0;
+#else
+ const uint k_start = get_sub_group_id() * FSV;
+#endif
+
+ uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, k_start, 0, 0);
+ const uint filter_idx_diff = (ALIGN(FILTER_IFM_NUM, 16) * FILTER_SIZE_X * FILTER_SIZE_Y * 16);
+
+ uint input_start_idx = INPUT0_GET_INDEX(out_b, k_start, input_y, input_x);
+
+ ACCUMULATOR_TYPE dotProd[OFM_BLOCKS_PER_SIMD][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH] = { };
+ uint4 input_val[IN_BLOCK_HEIGHT][CEIL_DIV(IN_BLOCK_WIDTH, SIMD)];
+
+ __attribute__((opencl_unroll_hint(1)))
+ for (uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16) / FEATURE_SLM_SPLIT; k++) {
+ __attribute__((opencl_unroll_hint(1)))
+ for (uint fyn = 0; fyn < FILTER_SIZE_Y / FILTER_SIZE_Y_UNROLL; fyn++) {
+ // Load input block IN_BLOCK_HEIGHT x IN_BLOCK_WIDTH, scattering width along sub-group
+ __attribute__((opencl_unroll_hint))
+ for (uint iyb = 0; iyb < IN_BLOCK_HEIGHT; ++iyb) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ixb = 0; ixb < CEIL_DIV(IN_BLOCK_WIDTH, SIMD); ++ixb) {
+ uint input_idx = input_start_idx + iyb * INPUT0_Y_PITCH * FSV + ixb * SIMD * FSV;
+ if (ixb != CEIL_DIV(IN_BLOCK_WIDTH, SIMD) - 1) {
+ input_val[iyb][ixb] = vload4(0, (__global uint *)(conv_input + input_idx + get_sub_group_local_id() * 16));
+ } else {
+ input_val[iyb][ixb] = vload4(0, (__global uint*)(conv_input + input_idx + tmp * 16));
+ }
+ }
+ }
+
+ __attribute__((opencl_unroll_hint))
+ for (uint fyu = 0; fyu < FILTER_SIZE_Y_UNROLL; ++fyu) {
+ __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+ for (uint fx = 0; fx < FILTER_SIZE_X; fx++) {
+
+ uint4 weights_val[OFM_BLOCKS_PER_SIMD];
+ __attribute__((opencl_unroll_hint))
+ for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+ weights_val[ofb] = vload4(0, (__global uint *)(weights + filter_idx + ofb * filter_idx_diff));
+ }
+
+ __attribute__((opencl_unroll_hint))
+ for (uint ive = 0; ive < 4; ive++) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+ __attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
+ const uint ow_offset = ow + OUT_BLOCK_WIDTH;
+ const uint y_block_idx = oh * STRIDE_SIZE_Y + fyu * DILATION_SIZE_Y;
+ const uint x_block_idx = ow * STRIDE_SIZE_X + fx * DILATION_SIZE_X;
+ const uint shuffle_wi = x_block_idx % SIMD;
+ const uint shuffle_idx = x_block_idx / SIMD;
+
+ dotProd[ofb][oh][ow] = TO_ACCUMULATOR_TYPE(
+ IMAD(dotProd[ofb][oh][ow],
+ AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val[y_block_idx][shuffle_idx][ive], shuffle_wi)),
+ AS_FILTER_TYPE_4(weights_val[ofb][ive])));
+ }
+ }
+ }
+ }
+
+ filter_idx += FSV * FSV;
+ }
+ }
+ input_start_idx += DILATION_SIZE_Y * INPUT0_Y_PITCH * FSV;
+ }
+ input_start_idx += INPUT0_FEATURE_PITCH * FSV * FEATURE_SLM_SPLIT - (FILTER_SIZE_Y / FILTER_SIZE_Y_UNROLL) * DILATION_SIZE_Y * INPUT0_Y_PITCH * FSV;
+
+ filter_idx += FSV * FSV * FILTER_SIZE_X * FILTER_SIZE_Y * (FEATURE_SLM_SPLIT - 1);
+ }
+
+#if FEATURE_SLM_SPLIT != 1
+ // Additional local memory reduction for feature split mode
+# if FEATURE_SLM_SPLIT < OFM_BLOCKS_PER_SIMD
+# error convolution_gpu_b_fs_yx_fsv16_imad.cl - OFM_BLOCKS_PER_SIMD must be less or equal to FEATURE_SLM_SPLIT
+# endif
+
+ const uint partial_acc_size = (FEATURE_SLM_SPLIT - 1) * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH;
+ __local ACCUMULATOR_TYPE partial_acc[partial_acc_size];
+
+ uint sgid_start_idx = get_sub_group_id();
+ sgid_start_idx = sgid_start_idx == 0 ? 0 : sgid_start_idx - 1;
+ __local ACCUMULATOR_TYPE* partial_acc_ptr = partial_acc + sgid_start_idx * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH
+ + get_sub_group_local_id();
+
+ if (get_sub_group_id() < OFM_BLOCKS_PER_SIMD) {
+ __attribute__((opencl_unroll_hint))
+ for (uint wg = 0; wg < OFM_BLOCKS_PER_SIMD; ++wg) {
+ if (get_sub_group_id() == wg) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ofb = 0; ofb < wg; ++ofb) {
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+ const uint partial_acc_ptr_idx =
+ ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD +
+ oh * OUT_BLOCK_WIDTH * SIMD +
+ ow * SIMD;
+ partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow];
+ }
+ }
+ }
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+ dotProd[0][oh][ow] = dotProd[wg][oh][ow];
+ }
+ }
+ __attribute__((opencl_unroll_hint))
+ for (uint ofb = wg + 1; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+ const uint partial_acc_ptr_idx =
+ ((wg != 0) ? OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OFM_SIZE_PER_SIMD : 0) +
+ ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD +
+ oh * OUT_BLOCK_WIDTH * SIMD +
+ ow * SIMD;
+ partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow];
+ }
+ }
+ }
+ }
+ }
+ } else {
+ __attribute__((opencl_unroll_hint))
+ for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+ const uint partial_acc_ptr_idx =
+ ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD +
+ oh * OUT_BLOCK_WIDTH * SIMD +
+ ow * SIMD;
+ partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow];
+ }
+ }
+ }
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ if (get_sub_group_id() >= OFM_BLOCKS_PER_SIMD)
+ return;
+
+ partial_acc_ptr = partial_acc + get_sub_group_id() * OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * SIMD + get_sub_group_local_id();
+ __attribute__((opencl_unroll_hint))
+ for (uint wg = 0; wg < FEATURE_SLM_SPLIT - 1; ++wg) {
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+ const uint partial_acc_ptr_idx =
+ wg * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH +
+ oh * OUT_BLOCK_WIDTH * SIMD +
+ ow * SIMD;
+ dotProd[0][oh][ow] += partial_acc_ptr[partial_acc_ptr_idx];
+ }
+ }
+ }
+#endif
+
+#if FEATURE_SLM_SPLIT == 1
+# define OFM_VALUES_PER_WI (OFM_BLOCKS_PER_SIMD)
+#else
+# define OFM_VALUES_PER_WI 1
+ out_f += get_sub_group_id() * SIMD;
+ out_fg += get_sub_group_id() * SIMD;
+#endif
+
+#if BIAS_TERM
+ BIAS_TYPE bias[OFM_VALUES_PER_WI];
+ __attribute__((opencl_unroll_hint))
+ for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) {
+ bias[ofb] = biases[out_f + ofb * SIMD];
+ }
+#endif
+
+ ACTIVATION_TYPE dequantized[OFM_VALUES_PER_WI][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH];
+ __attribute__((opencl_unroll_hint))
+ for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) {
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+ dequantized[ofb][oh][ow] = TO_ACTIVATION_TYPE(dotProd[ofb][oh][ow]);
+#if BIAS_TERM
+ dequantized[ofb][oh][ow] += bias[ofb];
+#endif
+ }
+ }
+ }
+
+ OUTPUT_TYPE result[OFM_VALUES_PER_WI][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH];
+ __attribute__((opencl_unroll_hint))
+ for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) {
+#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD_SCALAR
+ FUSED_OPS_PRELOAD_SCALAR;
+#endif
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) {
+ ACTIVATION_TYPE dequantized_val = dequantized[ofb][oh][ow];
+#if HAS_FUSED_OPS
+# if FUSED_OPS_CAN_USE_PRELOAD_SCALAR
+ FUSED_OPS_CALC_SCALAR;
+# else
+ FUSED_OPS_SCALAR;
+# endif
+ result[ofb][oh][ow] = FUSED_OPS_RESULT_SCALAR;
+#else
+ result[ofb][oh][ow] = TO_OUTPUT_TYPE(dequantized_val);
+#endif
+ }
+ }
+ }
+
+ uint dst_index = OUTPUT_GET_INDEX(out_b, out_fg, out_y, out_x);
+
+ if ((OUTPUT_SIZE_X % OUT_BLOCK_WIDTH == 0 || out_x + OUT_BLOCK_WIDTH <= OUTPUT_SIZE_X)
+ && (OUTPUT_FEATURE_NUM % OFM_BLOCKS_PER_SIMD == 0) ) {
+ __attribute__((opencl_unroll_hint(OFM_VALUES_PER_WI)))
+ for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ofb++) {
+ bool good_of_block = (CEIL_DIV(OUTPUT_FEATURE_NUM, SIMD) % OFM_BLOCKS_PER_SIMD == 0) || (out_fg + ofb * SIMD <= OUTPUT_FEATURE_NUM);
+ if (good_of_block) {
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ bool good_y = (OUTPUT_SIZE_Y % OUT_BLOCK_HEIGHT == 0) || (out_y + oh < OUTPUT_SIZE_Y);
+ if (good_y) {
+ uint ow = 0;
+ #if OUTPUT_TYPE_SIZE == 1
+ __attribute__((opencl_unroll_hint))
+ for (; ow + 8 <= OUT_BLOCK_WIDTH; ow += 8) {
+ MAKE_VECTOR_TYPE(OUTPUT_TYPE, 8) result_val;
+ __attribute__((opencl_unroll_hint))
+ for (uint i = 0; i < 8; ++i) {
+ result_val[i] = result[ofb][oh][ow + i];
+ }
+ DT_OUTPUT_BLOCK_WRITE8(output, dst_index, result_val);
+ dst_index += 8 * SIMD;
+ }
+ #endif
+ #if OUTPUT_TYPE_SIZE <= 2
+ __attribute__((opencl_unroll_hint))
+ for (; ow + 4 <= OUT_BLOCK_WIDTH; ow += 4) {
+ MAKE_VECTOR_TYPE(OUTPUT_TYPE, 4) result_val;
+ __attribute__((opencl_unroll_hint))
+ for (uint i = 0; i < 4; ++i) {
+ result_val[i] = result[ofb][oh][ow + i];
+ }
+ DT_OUTPUT_BLOCK_WRITE4(output, dst_index, result_val);
+ dst_index += 4 * SIMD;
+ }
+ #endif
+
+ __attribute__((opencl_unroll_hint))
+ for (; ow + 2 <= OUT_BLOCK_WIDTH; ow += 2) {
+ MAKE_VECTOR_TYPE(OUTPUT_TYPE, 2) result_val;
+ __attribute__((opencl_unroll_hint))
+ for (uint i = 0; i < 2; ++i) {
+ result_val[i] = result[ofb][oh][ow + i];
+ }
+ DT_OUTPUT_BLOCK_WRITE2(output, dst_index, result_val);
+ dst_index += 2 * SIMD;
+ }
+
+ if (OUT_BLOCK_WIDTH % 2 == 1) {
+ OUTPUT_TYPE result_val = result[ofb][oh][ow];
+ DT_OUTPUT_BLOCK_WRITE(output, dst_index, result_val);
+ dst_index += 1 * SIMD;
+ }
+ } // if (good_y)
+ dst_index += OUTPUT_Y_PITCH * FSV - OUT_BLOCK_WIDTH * FSV;
+ } // for (OUT_BLOCK_HEIGHT)
+ } // if (good_of_block)
+ dst_index += OUTPUT_FEATURE_PITCH * FSV - OUTPUT_Y_PITCH * FSV * OUT_BLOCK_HEIGHT;
+ } // for (OFM_VALUES_PER_WI)
+ } else {
+ __attribute__((opencl_unroll_hint(OFM_VALUES_PER_WI)))
+ for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ofb++) {
+ bool good_of_block = (CEIL_DIV(OUTPUT_FEATURE_NUM, SIMD) % OFM_BLOCKS_PER_SIMD == 0) || (out_fg + ofb * SIMD <= OUTPUT_FEATURE_NUM);
+ if (good_of_block) {
+ const uint dst_index = OUTPUT_GET_INDEX(out_b, out_f + ofb * SIMD, out_y, out_x);
+ __attribute__((opencl_unroll_hint))
+ for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) {
+ bool good_y = (OUTPUT_SIZE_Y % OUT_BLOCK_HEIGHT == 0) || (out_y + oh < OUTPUT_SIZE_Y);
+ if (good_y) {
+ __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+ for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
+
+#if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0
+ if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && ow >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH)
+ break;
+#endif
+
+#if OUTPUT_FEATURE_NUM % SIMD != 0
+ if (out_fg + (ofb + 1) * SIMD >= OUTPUT_FEATURE_NUM && get_sub_group_local_id() >= OUTPUT_FEATURE_NUM % SIMD)
+ result[ofb][oh][ow] = (OUTPUT_TYPE)0;
+#endif
+ output[dst_index + ow * FSV + oh * OUTPUT_Y_PITCH * FSV] = result[ofb][oh][ow];
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+#undef AS_INPUT0_TYPE_4
+#undef AS_TYPE_N
+#undef AS_TYPE_N_
+#undef AS_FILTER_TYPE_4
+
+#undef CEIL_DIV
+#undef ALIGN
+
+#undef SIMD
+#undef FSV
+#undef OFM_VALUES_PER_WI
+++ /dev/null
-// Copyright (c) 2018-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "include/common.cl"
-#include "include/fetch.cl"
-#include "include/imad.cl"
-#include "include/mmad.cl"
-
-#if QUANTIZATION_TERM
-#define ACCUMULATOR_TYPE int
-#define TO_ACCUMULATOR_TYPE(x) convert_int(x)
-#define ACTIVATION_TYPE float
-#define TO_ACTIVATION_TYPE(x) convert_float(x)
-#else
-#define ACCUMULATOR_TYPE INPUT0_TYPE
-#define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x)
-#define ACTIVATION_TYPE INPUT0_TYPE
-#define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x)
-#endif
-
-#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
-#define AS_TYPE_N_(type, n, x) as_##type##n(x)
-#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
-#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
-
-#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
-#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
-
-// int8 conv_input and weights data is packed to int32 "batches",
-// int/uint pointers here instead of INPUT0_TYPE/FILTER_TYPE for convenience
-__attribute__((intel_reqd_sub_group_size(16)))
-__attribute__((reqd_work_group_size(1, 1, 16)))
-KERNEL(convolution_gpu_b_fs_yx_fsv16_imad_3x3)(
- const __global INPUT0_TYPE *conv_input,
- __global OUTPUT_TYPE *output,
- const __global FILTER_TYPE *weights,
-#if BIAS_TERM
- const __global BIAS_TYPE *biases,
-#endif
-#if HAS_FUSED_OPS_DECLS
- FUSED_OPS_DECLS,
-#endif
- uint split_idx) {
-
- #define LUT_VALUE_CLAMP(x) ((x) < (OUT_BLOCK_WIDTH - 1) * STRIDE_SIZE_X + FILTER_SIZE_X ? (x) : 0)
- const int tmp[16] = {
- LUT_VALUE_CLAMP(0),
- LUT_VALUE_CLAMP(1),
- LUT_VALUE_CLAMP(2),
- LUT_VALUE_CLAMP(3),
- LUT_VALUE_CLAMP(4),
- LUT_VALUE_CLAMP(5),
- LUT_VALUE_CLAMP(6),
- LUT_VALUE_CLAMP(7),
- LUT_VALUE_CLAMP(8),
- LUT_VALUE_CLAMP(9),
- LUT_VALUE_CLAMP(10),
- LUT_VALUE_CLAMP(11),
- LUT_VALUE_CLAMP(12),
- LUT_VALUE_CLAMP(13),
- LUT_VALUE_CLAMP(14),
- LUT_VALUE_CLAMP(15)
- };
- #undef LUT_VALUE_CLAMP
-
- const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
- const uint out_y = get_global_id(1);
- const uint out_b = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) / ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
- const uint out_fg = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) % ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
- const uint out_f = out_fg + get_sub_group_local_id();
- ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * OFM_BLOCKS_PER_SIMD] = {0};
- const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
-
- const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
-
- uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, 0, 0, 0);
-#if OFM_BLOCKS_PER_SIMD == 2
- uint filter_idx2 = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f + 16, 0, 0, 0);
-#endif
-
- __attribute__((opencl_unroll_hint(1)))
- for (uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16); k++) {
- __attribute__((opencl_unroll_hint(1)))
- for (uint j = 0; j < FILTER_SIZE_Y; j++) {
- uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, k * 16, input_y + j, input_x + tmp[get_sub_group_local_id()]);
- uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx));
-
- __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
- for (uint i = 0; i < FILTER_SIZE_X; i++) {
-
- uint4 weights_val = vload4(0, (__global uint *)(weights + filter_idx));
-#if OFM_BLOCKS_PER_SIMD == 2
- uint4 weights_val3 = vload4(0, (__global uint *)(weights + filter_idx2));
-#endif
-
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
- const uint ow_offset = ow + OUT_BLOCK_WIDTH;
- dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s0)));
- dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s1)));
- dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s2)));
- dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s3)));
-
-#if OFM_BLOCKS_PER_SIMD == 2
- dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s0)));
- dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s1)));
- dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s2)));
- dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s3)));
-#endif
- }
- filter_idx += 16 * 16;
-#if OFM_BLOCKS_PER_SIMD == 2
- filter_idx2 += 16 * 16;
-#endif
- }
- }
- }
-
-#if BIAS_TERM
- BIAS_TYPE bias[OFM_BLOCKS_PER_SIMD] = { biases[out_f]
-#if OFM_BLOCKS_PER_SIMD == 2
- , biases[out_f + 16]
-#endif
- };
-#endif
- __attribute__((opencl_unroll_hint(OFM_BLOCKS_PER_SIMD)))
- for (uint j = 0; j < OFM_BLOCKS_PER_SIMD; j++) {
- const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + j * 16, out_y, out_x);
-#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD
- FUSED_OPS_PRELOAD;
-#endif
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
-
-#if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0
- if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && i >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH)
- break;
-#endif
- ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0;
-#if BIAS_TERM
- dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i] + bias[j];
-#else
- dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i];
-#endif
- OUTPUT_TYPE result;
-#if HAS_FUSED_OPS
- #if FUSED_OPS_CAN_USE_PRELOAD
- FUSED_OPS_CALC;
- #else
- FUSED_OPS;
- #endif
- result = FUSED_OPS_RESULT;
-#else
- result = TO_OUTPUT_TYPE(dequantized);
-#endif
-
-#if OUTPUT_FEATURE_NUM % 16 != 0
- if (out_fg + j * 16 + 16 > OUTPUT_FEATURE_NUM && get_sub_group_local_id() >= OUTPUT_FEATURE_NUM % 16)
- result = (OUTPUT_TYPE)0;
-#endif
- output[dst_index + i * 16] = result;
- }
- }
-}
-
-#undef AS_INPUT0_TYPE_4
-#undef AS_TYPE_N
-#undef AS_TYPE_N_
-#undef MAKE_VECTOR_TYPE
-#undef TO_ACTIVATION_TYPE
-#undef ACTIVATION_TYPE
-#undef TO_ACCUMULATOR_TYPE
-#undef ACCUMULATOR_TYPE
-
-#undef CEIL_DIV
-#undef ALIGN
+++ /dev/null
-// Copyright (c) 2018-2019 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "include/common.cl"
-#include "include/fetch.cl"
-#include "include/imad.cl"
-#include "include/mmad.cl"
-
-#if QUANTIZATION_TERM
- #define ACCUMULATOR_TYPE int
- #define TO_ACCUMULATOR_TYPE(x) convert_int(x)
- #define ACTIVATION_TYPE float
- #define TO_ACTIVATION_TYPE(x) convert_float(x)
-#else
- #define ACCUMULATOR_TYPE INPUT0_TYPE
- #define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x)
- #define ACTIVATION_TYPE INPUT0_TYPE
- #define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x)
-#endif
-
-#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
-#define AS_TYPE_N_(type, n, x) as_##type##n(x)
-#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
-#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
-
-#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
-
-__attribute__((intel_reqd_sub_group_size(16)))
-KERNEL(convolution_gpu_b_fs_yx_fsv16_3x3_ks)(
- const __global INPUT0_TYPE *conv_input,
- __global OUTPUT_TYPE *output,
- const __global FILTER_TYPE *weights,
-#if BIAS_TERM
- const __global BIAS_TYPE *biases,
-#endif
-#if HAS_FUSED_OPS_DECLS
- FUSED_OPS_DECLS,
-#endif
- uint split_idx)
-{
-#if OUT_BLOCK_WIDTH == 7 && STRIDE_SIZE_X == 1
- const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0};
-#elif OUT_BLOCK_WIDTH == 7 && STRIDE_SIZE_X == 2
- const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0};
-#elif OUT_BLOCK_WIDTH == 8 && STRIDE_SIZE_X == 1
- const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0};
-#else // OUT_BLOCK_WIDTH == 8 && STRIDE_SIZE_X == 2
- const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-#endif
-
- const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
- const uint out_y = get_global_id(1);
- const uint out_f = (uint)(get_group_id(2) * 32 + get_sub_group_local_id());
- const uint subgroup_id = get_sub_group_id();
- const uint subgroup_local_id = get_sub_group_local_id();
- const uint feature_offset = subgroup_id * INPUT0_FEATURE_NUM / 4;
- const uint out_b = (uint)(get_group_id(2) * 32) / OUTPUT_FEATURE_NUM;
-
- ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * 2] = { 0 };
- const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
- const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
-
- uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, feature_offset, 0, 0);
- uint diff_filter_idx = 16*3*3*FILTER_IFM_NUM;
-
- __attribute__((opencl_unroll_hint(1)))
- for(uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16)/4; k++ ) {
- __attribute__((opencl_unroll_hint(1)))
- for(uint j = 0; j < FILTER_SIZE_Y; j++) {
- uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, feature_offset + k * 16, input_y + j, input_x + tmp[subgroup_local_id]);
- uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx));
-
- __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
- for(uint i = 0; i < FILTER_SIZE_X; i++) {
-
- uint4 weights_val = vload4(0, (__global uint*)(weights + filter_idx));
- uint4 weights_val3 = vload4(0, (__global uint *)(weights + filter_idx + diff_filter_idx));
-
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for(uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
- const uint ow_offset = ow + OUT_BLOCK_WIDTH;
- dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s0)));
- dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s1)));
- dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s2)));
- dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s3)));
-
- dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s0)));
- dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s1)));
- dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s2)));
- dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s3)));
- }
- filter_idx += 16 * 16;
- }
- }
- }
-
- //k slicing summing up with SLM
- __local ACCUMULATOR_TYPE partial_acc[16 * OUT_BLOCK_WIDTH * 6];
- if(subgroup_id == 0)
- {
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for(uint i = 0; i < OUT_BLOCK_WIDTH; i++)
- {
- partial_acc[16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
- }
- }
- else if(subgroup_id == 1)
- {
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for(uint i = 0; i < OUT_BLOCK_WIDTH; i++)
- {
- partial_acc[i * 16 + subgroup_local_id] = dotProd[i];
- dotProd[i] = dotProd[i + OUT_BLOCK_WIDTH];
- }
- }
- else if (subgroup_id == 2)
- {
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
- {
- partial_acc[2 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i];
- partial_acc[3 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
-
- }
- }
- else if (subgroup_id == 3)
- {
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
- {
- partial_acc[4 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i];
- partial_acc[5 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
- if (subgroup_id < 2) {
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
- {
- dotProd[i] += partial_acc[(i + subgroup_id * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
- dotProd[i] += partial_acc[(i + (subgroup_id + 2) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
- dotProd[i] += partial_acc[(i + (subgroup_id + 4) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
- }
-#if BIAS_TERM
- BIAS_TYPE bias = biases[out_f + get_sub_group_id() * 16];
-#endif
-
-#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD
- FUSED_OPS_PRELOAD;
-#endif
- const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + subgroup_id * 16, out_y, out_x);
- __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
- for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
- {
- ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0;
-#if BIAS_TERM
- dequantized = (ACTIVATION_TYPE)dotProd[i] + bias;
-#else
- dequantized = (ACTIVATION_TYPE)dotProd[i];
-#endif
-#if HAS_FUSED_OPS
- #if FUSED_OPS_CAN_USE_PRELOAD
- FUSED_OPS_CALC;
- #else
- FUSED_OPS;
- #endif
- output[dst_index + i * 16] = FUSED_OPS_RESULT;
-#else
- output[dst_index + i * 16] = TO_OUTPUT_TYPE(dequantized);
-#endif
- }
- }
-}
-
-#undef AS_INPUT0_TYPE_4
-#undef AS_TYPE_N
-#undef AS_TYPE_N_
-#undef MAKE_VECTOR_TYPE
-#undef TO_ACTIVATION_TYPE
-#undef ACTIVATION_TYPE
-#undef TO_ACCUMULATOR_TYPE
-#undef ACCUMULATOR_TYPE
-
-#undef CEIL_DIV
// If required analogously the mvn_var_1 and mvn_var_2 kernels should be enqueud, additionally providing results from
// mvn_mean_2 kernel.
//
-// Finally the mvn_final kernel should be enqueued with provided buffers with outputs from previous kernels (mvn_mean_2, mvn_var_2).
-// To enable parallel mode PRECALC_MEAN and optionally PRECALC_VARIANCE definitions should be used.
-// As at this stage there is no further need to synchronize and this kernel will perform simple normalization given known mean and inverse of variance.
-// Due to this this kernel can be enqueued with full paralellization, not limiting it to single work-group.
+// Finally the mvn_final kernel should be enqueued with provided buffers with outputs from previous kernels
+// (mvn_mean_2, mvn_var_2). To enable parallel mode PRECALC_MEAN and optionally PRECALC_VARIANCE definitions should be
+// used. As at this stage there is no further need to synchronize and this kernel will perform simple normalization
+// given known mean and inverse of variance. Due to this this kernel can be enqueued with full paralellization, not
+// limiting it to single work-group.
// lws: SIMD x 1 x 1
// gws: (x * y) / SIMD * SIMD x feature x batch
//
// In parallel mode this must be equal to LWS * ITEM_GROUPS, except in mvn_final kernel where it has no restrictions.
// ITEM_GROUPS - Number of work-groups performing accumulation in parallel mode. Should be the same in both stages of parallel kernels.
-
#define FSV 16
#define INPUT_SLICE_PITCH 16
#define SG_NUM (LWS / SIMD)
#define TO_MEAN_PACKED_TYPE CAT(convert_, MEAN_PACKED_TYPE)
-#define ITEMS_NUM (OUTPUT_SIZE_X * OUTPUT_SIZE_Y)
+#define ITEMS_NUM (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z)
#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
const uint sgid = get_sub_group_id();
const uint sglid = get_sub_group_local_id();
+#if INPUT0_DIMS == 5
+ const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0);
+#else // INPUT0_DIMS == 4
const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
-
+#endif
INT_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_input)(input, data_sets_offset, get_global_id(0));
const uint sgid = get_sub_group_id();
const uint sglid = get_sub_group_local_id();
+#if INPUT0_DIMS == 5
+ const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0);
+#else // INPUT0_DIMS == 4
const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
-
+#endif
MEAN_TYPE mean = means[flat_data_set_group * FSV + sglid];
MEAN_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_sq_dev)(input, data_sets_offset, get_global_id(0), mean);
const uint sgid = get_sub_group_id() + items_group * SG_NUM;
const uint sglid = get_sub_group_local_id();
+#if INPUT0_DIMS == 5
+ const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0);
+#else // INPUT0_DIMS == 4
const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
+#endif
uint input_offset;
#if (!PRECALC_MEAN || (NORMALIZE_VARIANCE && !PRECALC_VARIANCE)) && SG_NUM != 1
#if OUTPUT_IS_FP
input_offset = data_sets_offset + sgid * SIMD * FSV;
uint output_spatial_base = sgid * SIMD;
+#if OUTPUT_DIMS == 5
+ uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0, 0) + sgid * SIMD * FSV;
+#else // OUTPUT_DIMS == 4
uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0) + sgid * SIMD * FSV;
+#endif
// For fused ops to align with non-fp path
const uint set_idx = sglid;
uint output_spatial = output_spatial_base + si;
MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
OUTPUT_TYPE result;
-#if HAS_FUSED_OPS
+# if HAS_FUSED_OPS
FUSED_OPS;
result = FUSED_OPS_RESULT;
-#else
+# else
result = TO_OUTPUT_TYPE(normalized);
-#endif
+# endif
#if !OUTPUT_PAD_IN_ITEMS
DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
#else
+# if OUTPUT_DIMS == 5
+ uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+ uint x = output_spatial % OUTPUT_SIZE_X;
+ output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+# else // OUTPUT_DIMS == 4
uint x = output_spatial % OUTPUT_SIZE_X;
uint y = output_spatial / OUTPUT_SIZE_X;
output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+# endif
DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
#endif
}
uint output_spatial = output_spatial_base + si;
MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
OUTPUT_TYPE result;
-#if HAS_FUSED_OPS
+# if HAS_FUSED_OPS
FUSED_OPS;
result = FUSED_OPS_RESULT;
-#else
+# else
result = TO_OUTPUT_TYPE(normalized);
-#endif
+# endif
#if !OUTPUT_PAD_IN_ITEMS
DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
#else
+# if OUTPUT_DIMS == 5
+ uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+ uint x = output_spatial % OUTPUT_SIZE_X;
+ output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+# else // OUTPUT_DIMS == 4
uint x = output_spatial % OUTPUT_SIZE_X;
uint y = output_spatial / OUTPUT_SIZE_X;
output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+# endif
DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
#endif
}
- } else if (lws_uniform_leftovers > 0 &&
- sg_uniform_leftovers > 0 &&
- sgid == lws_uniform_leftovers_full_simds) {
+ } else if (lws_uniform_leftovers > 0 && sg_uniform_leftovers > 0 && sgid == lws_uniform_leftovers_full_simds) {
// TODO: May be worth to consider the data here as across sub-group
// Rest of leftovers, still use whole sub-group, but change addresses to not load extra data.
INPUT_PACKED_TYPE in_pack;
uint output_spatial = output_spatial_base + si;
MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
OUTPUT_TYPE result;
-#if HAS_FUSED_OPS
- FUSED_OPS;
- result = FUSED_OPS_RESULT;
-#else
- result = TO_OUTPUT_TYPE(normalized);
-#endif
+# if HAS_FUSED_OPS
+ FUSED_OPS;
+ result = FUSED_OPS_RESULT;
+# else
+ result = TO_OUTPUT_TYPE(normalized);
+# endif
#if !OUTPUT_PAD_IN_ITEMS
DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
#else
+# if OUTPUT_DIMS == 5
+ uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+ uint x = output_spatial % OUTPUT_SIZE_X;
+ output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+# else // OUTPUT_DIMS == 4
uint x = output_spatial % OUTPUT_SIZE_X;
uint y = output_spatial / OUTPUT_SIZE_X;
output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+# endif
DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
#endif
}
}
-#else // => !OUTPUT_IS_FP
+#else // => !OUTPUT_IS_FP
input_offset = data_sets_offset + sgid * SIMD * FSV;
+#if OUTPUT_DIMS == 5
+ uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0, 0) + sgid * SIMD * FSV;
+#else // OUTPUT_DIMS == 4
uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0) + sgid * SIMD * FSV;
+#endif
uint output_spatial = sgid * SIMD + sglid;
for (uint spatial_idx = 0; spatial_idx < ITEMS_NUM / GWS; ++spatial_idx) {
__attribute__((opencl_unroll_hint))
for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
- #if HAS_FUSED_OPS
+# if HAS_FUSED_OPS
FUSED_OPS;
result[set_idx] = FUSED_OPS_RESULT;
- #else
+# else
result[set_idx] = TO_OUTPUT_TYPE(normalized);
- #endif
+# endif
}
#if !OUTPUT_PAD_IN_ITEMS
((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
#else
+# if OUTPUT_DIMS == 5
+ uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+ uint x = output_spatial % OUTPUT_SIZE_X;
+ output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+# else // OUTPUT_DIMS == 4
uint x = output_spatial % OUTPUT_SIZE_X;
uint y = output_spatial / OUTPUT_SIZE_X;
output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+# endif
((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
#endif
__attribute__((opencl_unroll_hint))
for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
- #if HAS_FUSED_OPS
+# if HAS_FUSED_OPS
FUSED_OPS;
result[set_idx] = FUSED_OPS_RESULT;
- #else
+# else
result[set_idx] = TO_OUTPUT_TYPE(normalized);
- #endif
+# endif
}
#if !OUTPUT_PAD_IN_ITEMS
((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
#else
+# if OUTPUT_DIMS == 5
+ uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+ uint x = output_spatial % OUTPUT_SIZE_X;
+ output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+# else // OUTPUT_DIMS == 4
uint x = output_spatial % OUTPUT_SIZE_X;
uint y = output_spatial / OUTPUT_SIZE_X;
output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+# endif
((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
#endif
- } else if (lws_uniform_leftovers > 0 &&
- sg_uniform_leftovers > 0 &&
- sgid == lws_uniform_leftovers_full_simds) {
+ } else if (lws_uniform_leftovers > 0 && sg_uniform_leftovers > 0 && sgid == lws_uniform_leftovers_full_simds) {
// TODO: May be worth to consider the data here as across sub-group
// Rest of leftovers, still use whole sub-group, but change addresses to not load extra data.
INPUT_PACKED_TYPE in_pack = ((const __global INPUT_PACKED_TYPE*)(input + input_offset))[sglid % sg_uniform_leftovers];
__attribute__((opencl_unroll_hint))
for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
- #if HAS_FUSED_OPS
+# if HAS_FUSED_OPS
FUSED_OPS;
result[set_idx] = FUSED_OPS_RESULT;
- #else
+# else
result[set_idx] = TO_OUTPUT_TYPE(normalized);
- #endif
+# endif
}
if (sglid < sg_uniform_leftovers) {
#if !OUTPUT_PAD_IN_ITEMS
((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
#else
+# if OUTPUT_DIMS == 5
+ uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+ uint x = output_spatial % OUTPUT_SIZE_X;
+ output_offset = OUTPUT_GET_INDEX(b, f, z, y, x);
+# else // OUTPUT_DIMS == 4
uint x = output_spatial % OUTPUT_SIZE_X;
uint y = output_spatial / OUTPUT_SIZE_X;
output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+# endif
((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
#endif
}
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw },
+ { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw },
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw },
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
mvn_gpu::create);
+ implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
+ mvn_gpu::create);
+ implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
+ mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
mvn_gpu::create);
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
if (usr_layout.format == format::b_fs_yx_fsv16 &&
(opt_lower_pad % 16 != 0 || opt_upper_pad % 16 != 0))
return;
+ if (input_layout.data_padding.lower_size().batch[0] != 0 || input_layout.data_padding.upper_size().batch[0] != 0 ||
+ input_layout.data_padding.lower_size().spatial[0] != 0 || input_layout.data_padding.upper_size().spatial[0] != 0 ||
+ input_layout.data_padding.lower_size().spatial[1] != 0 || input_layout.data_padding.upper_size().spatial[1] != 0)
+ return;
}
if (format == format::bfyx && crop_size.batch[0] == input_layout.size.batch[0] &&
auto next_output_layout = next.get_output_layout();
auto prev_dt = prev.get_output_layout().data_type;
+ auto is_input_idx = [&](size_t idx) -> bool {
+ if (&next.get_dependency(idx) == &prev)
+ return true;
+ if (next.get_dependency(idx).is_type<reorder>() && &next.get_dependency(idx).get_dependency(0) == &prev)
+ return true;
+ return false;
+ };
+
if (next.is_type<reorder>())
return true;
if (next.is_type<convolution>() && fmt_prev == format::b_fs_yx_fsv4 && fmt_next == format::byxf_af32 && next.as<convolution>().get_groups() == 1)
return true;
+ if (next.is_type<convolution>() && fmt_prev == format::b_fs_yx_fsv16 && fmt_next == format::b_fs_yx_fsv4 && is_input_idx(0))
+ return true;
+
if (next.is_type<quantize>() && fmt_prev == format::bfyx && prev.is_input() && prev_dt == data_types::u8)
return true;
.all_test_params(format::b_fs_yx_fsv16)
.add(convolution_random_test_all_params{
1, 89, 3, { 1, 1 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 1, 1 }, true, 1, format::b_fs_yx_fsv4, false, false, false })
+ .add(convolution_random_test_all_params{
+ 1, 16, 32, { 3, 3 }, { 17, 17 }, { 1, 1 }, { -8, -8 }, { 1, 1 }, true, 1, format::b_fs_yx_fsv16, false, false, true })
),
to_string_convolution_all_params
);
#include <api/input_layout.hpp>
#include "api/crop.hpp"
#include <api/eltwise.hpp>
+#include <api/reorder.hpp>
#include <api/topology.hpp>
#include <api/network.hpp>
#include <api/engine.hpp>
EXPECT_EQ(output_ptr_2[i], out2[i]);
}
+TEST(crop_gpu, basic_in1x4x1x1_crop_pad) {
+ const auto& engine = get_test_engine();
+
+ auto batch_num = 1;
+ auto feature_num = 4;
+ auto x_size = 1;
+ auto y_size = 1;
+
+ auto crop_batch_num = 1;
+ auto crop_feature_num_1 = 3;
+ auto crop_x_size = 1;
+ auto crop_y_size = 1;
+ auto feature_offset_1 = 0;
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+
+ padding in_pad({0, 0, 1, 1}, {0, 0, 1, 1});
+ auto padded_layout = input.get_layout().with_padding(in_pad);
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ topology.add(reorder("input_reorder", "input", padded_layout));
+ topology.add(crop("crop1", "input_reorder", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) }));
+ topology.add(reorder("out_reorder", "crop1", format::bfyx, data_types::f32));
+
+ std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
+ std::vector<float> out1 = { -1.f, 2.f,-3.f };
+ set_values(input, input_vec);
+ build_options bo;
+ bo.set_option(build_option::optimize_data(true));
+
+ network network(engine, topology, bo);
+ network.set_input_data("input", input);
+ auto outputs = network.execute();
+
+ auto output = outputs.at("out_reorder").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ for (size_t i = 0; i < out1.size();i++)
+ EXPECT_EQ(output_ptr[i], out1[i]);
+}
+
TEST(crop_gpu, basic_int_in1x4x1x1_split) {
// Tests split with crop implementation
// _CROP_1(1x3x1x1,offset(0x0x0x0))
using namespace cldnn;
-class mvn_gpu_test : public ::testing::TestWithParam<cldnn::format>
-{
-};
+class mvn_gpu_test : public ::testing::TestWithParam<cldnn::format> {};
template <typename T>
-void mvn_compute_mean_accross_channels(cldnn::memory &output, bool normalize_variance) {
+void mvn_compute_mean_accross_channels(cldnn::memory& output, bool normalize_variance) {
auto output_size = output.get_layout().size;
uint32_t batch_size = output_size.batch[0];
uint32_t feature_size = output_size.feature[0];
+ uint32_t z_size = output_size.spatial[2];
uint32_t y_size = output_size.spatial[1];
uint32_t x_size = output_size.spatial[0];
float err_margin = output.get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F;
- for (uint32_t b = 0; b < batch_size; ++b)
- {
+ for (uint32_t b = 0; b < batch_size; ++b) {
float sum = 0.f;
float variance = 0.f;
- for (uint32_t f = 0; f < feature_size; ++f)
- {
- for (uint32_t y = 0; y < y_size; ++y)
- {
- for (uint32_t x = 0; x < x_size; ++x)
- {
- auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
- size_t data_index = output.get_layout().get_linear_offset(index_tensor);
- float data = static_cast<float>(buff[data_index]);
- sum += data;
- if (normalize_variance)
- variance += data*data;
+ for (uint32_t f = 0; f < feature_size; ++f) {
+ for (uint32_t z = 0; z < z_size; z++) {
+ for (uint32_t y = 0; y < y_size; ++y) {
+ for (uint32_t x = 0; x < x_size; ++x) {
+ auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, z, 0));
+ size_t data_index = output.get_layout().get_linear_offset(index_tensor);
+ float data = static_cast<float>(buff[data_index]);
+ sum += data;
+ if (normalize_variance)
+ variance += data * data;
+ }
}
}
}
- sum /= feature_size * y_size * x_size;
+ sum /= feature_size * y_size * x_size * z_size;
T result_sum = static_cast<T>(sum);
EXPECT_NEAR(result_sum, 0.f, err_margin) << "at b=" << b;
- if (normalize_variance)
- {
- variance /= feature_size * y_size * x_size;
+ if (normalize_variance) {
+ variance /= feature_size * y_size * x_size * z_size;
T result_variance = static_cast<T>(variance);
EXPECT_NEAR(result_variance, 1.f, err_margin) << " at b=" << b;
}
}
template <typename T>
-void mvn_compute_mean_within_channels(cldnn::memory &output, bool normalize_variance) {
+void mvn_compute_mean_within_channels(cldnn::memory& output, bool normalize_variance) {
auto output_size = output.get_layout().size;
uint32_t batch_size = output_size.batch[0];
uint32_t feature_size = output_size.feature[0];
+ uint32_t z_size = output_size.spatial[2];
uint32_t y_size = output_size.spatial[1];
uint32_t x_size = output_size.spatial[0];
float err_margin = output.get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F;
- for (uint32_t b = 0; b < batch_size; ++b)
- {
- for (uint32_t f = 0; f < feature_size; ++f)
- {
+ for (uint32_t b = 0; b < batch_size; ++b) {
+ for (uint32_t f = 0; f < feature_size; ++f) {
float sum = 0.f;
float variance = 0.f;
- for (uint32_t y = 0; y < y_size; ++y)
- {
- for (uint32_t x = 0; x < x_size; ++x)
- {
- auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
- size_t data_index = output.get_layout().get_linear_offset(index_tensor);
- float data = static_cast<float>(buff[data_index]);
- sum += data;
- if (normalize_variance)
- variance += data*data;
+ for (uint32_t z = 0; z < z_size; ++z) {
+ for (uint32_t y = 0; y < y_size; ++y) {
+ for (uint32_t x = 0; x < x_size; ++x) {
+ auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, z, 0));
+ size_t data_index = output.get_layout().get_linear_offset(index_tensor);
+ float data = static_cast<float>(buff[data_index]);
+ sum += data;
+ if (normalize_variance)
+ variance += data * data;
+ }
}
}
- sum /= y_size * x_size;
+ sum /= y_size * x_size * z_size;
T result_sum = static_cast<T>(sum);
EXPECT_NEAR(result_sum, 0.f, err_margin) << "at b=" << b << ", f=" << f;
- if (normalize_variance)
- {
- variance /= y_size * x_size;
+ if (normalize_variance) {
+ variance /= y_size * x_size * z_size;
T result_variance = static_cast<T>(variance);
EXPECT_NEAR(result_variance, 1.f, err_margin) << " at b=" << b << ", f=" << f;
}
}
}
-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx)
-{
- //mvn accross channels fp32 test with normalize_variance set to false
+TEST(mvn_gpu_test, mvn_test_across_channels_bfyx) {
+ // mvn accross channels fp32 test with normalize_variance set to false
using namespace cldnn;
using namespace tests;
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
+ auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<float>(input, true, 8, 100);
mvn_compute_mean_accross_channels<float>(output, false);
}
-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16)
-{
- //mvn accross channels fp16 test with normalize_variance set to false
+TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16) {
+ // mvn accross channels fp16 test with normalize_variance set to false
using namespace cldnn;
using namespace tests;
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
+ auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<FLOAT16>(input, true, 8, 100);
mvn_compute_mean_accross_channels<FLOAT16>(output, false);
}
-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance)
-{
- //mvn accross channels fp32 test with normalize_variance set to true
+TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance) {
+ // mvn accross channels fp32 test with normalize_variance set to true
using namespace cldnn;
using namespace tests;
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
+ auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<float>(input, true, 8, 100);
mvn_compute_mean_accross_channels<float>(output, true);
}
-TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16)
-{
- //mvn accross channels fp16 test with normalize_variance set to true
+TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16) {
+ // mvn accross channels fp16 test with normalize_variance set to true
using namespace cldnn;
using namespace tests;
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
+ auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<FLOAT16>(input, true, 8, 100);
mvn_compute_mean_accross_channels<FLOAT16>(output, true);
}
-TEST(mvn_gpu_test, mvn_test_within_channels_bfyx)
-{
- //mvn within channels fp32 test with normalize_variance set to false
+TEST(mvn_gpu_test, mvn_test_within_channels_bfyx) {
+ // mvn within channels fp32 test with normalize_variance set to false
using namespace cldnn;
using namespace tests;
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
+ auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<float>(input, true, 8, 100);
mvn_compute_mean_within_channels<float>(output, false);
}
-TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16)
-{
- //mvn within channels fp16 test with normalize_variance set to false
+TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16) {
+ // mvn within channels fp16 test with normalize_variance set to false
using namespace cldnn;
using namespace tests;
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
+ auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<FLOAT16>(input, true, 8, 100);
mvn_compute_mean_within_channels<FLOAT16>(output, false);
}
-TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance)
-{
- //mvn within channels fp32 test with normalize_variance set to true
+TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance) {
+ // mvn within channels fp32 test with normalize_variance set to true
using namespace cldnn;
using namespace tests;
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
+ auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<float>(input, true, 8, 100);
mvn_compute_mean_within_channels<float>(output, true);
}
-TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16)
-{
- //mvn within channels fp16 test with normalize_variance set to true
+TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16) {
+ // mvn within channels fp16 test with normalize_variance set to true
using namespace cldnn;
using namespace tests;
const auto& engine = get_test_engine();
- auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
+ auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<FLOAT16>(input, true, 8, 100);
struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
template <typename T>
- void fill_data(memory& mem, const tests::VVVVF<T>& data) {
+ void fill_data(memory& mem, const tests::VVVVVF<T>& data) {
auto size = mem.get_layout().size;
auto ptr = mem.pointer<T>();
for (size_t bi = 0; bi < static_cast<size_t>(size.batch[0]); ++bi) {
for (size_t fi = 0; fi < static_cast<size_t>(size.feature[0]); ++fi) {
- for (size_t yi = 0; yi < static_cast<size_t>(size.spatial[1]); ++yi) {
- for (size_t xi = 0; xi < static_cast<size_t>(size.spatial[0]); ++xi) {
- auto tensor_addr = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
- auto offset = mem.get_layout().get_linear_offset(tensor_addr);
- ptr[offset] = data[bi][fi][xi][yi];
+ for (size_t zi = 0; zi < static_cast<size_t>(size.spatial[2]); ++zi) {
+ for (size_t yi = 0; yi < static_cast<size_t>(size.spatial[1]); ++yi) {
+ for (size_t xi = 0; xi < static_cast<size_t>(size.spatial[0]); ++xi) {
+ auto tensor_addr = tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
+ auto offset = mem.get_layout().get_linear_offset(tensor_addr);
+ ptr[offset] = data[bi][fi][xi][yi][zi];
+ }
}
}
}
template <typename T>
void fill_random_data(memory& mem, int min, int max, int k = 8) {
auto size = mem.get_layout().size;
- auto input_data = tests::generate_random_4d<T>(size.batch[0], size.feature[0], size.spatial[0], size.spatial[1], min, max, k);
+ auto input_data = tests::generate_random_5d<T>(size.batch[0],
+ size.feature[0],
+ size.spatial[0],
+ size.spatial[1],
+ size.spatial[2],
+ min,
+ max,
+ k);
fill_data(mem, input_data);
}
auto& size = params.input_size;
auto& output_pad = params.output_pad;
- auto input = memory::allocate(eng, { params.input_type, params.input_format, size });
+ auto input = memory::allocate(eng, {params.input_type, params.input_format, size});
switch (params.input_type) {
- case data_types::f32:
- fill_random_data<float>(input, -127, 127);
- break;
- case data_types::f16:
- fill_random_data<FLOAT16>(input, -127, 127);
- break;
- case data_types::i8:
- fill_random_data<int8_t>(input, -127, 127);
- break;
- case data_types::u8:
- fill_random_data<uint8_t>(input, -127, 127);
- break;
- default:
- break;
+ case data_types::f32:
+ fill_random_data<float>(input, -127, 127);
+ break;
+ case data_types::f16:
+ fill_random_data<FLOAT16>(input, -127, 127);
+ break;
+ case data_types::i8:
+ fill_random_data<int8_t>(input, -127, 127);
+ break;
+ case data_types::u8:
+ fill_random_data<uint8_t>(input, -127, 127);
+ break;
+ default:
+ break;
}
topology topo;
}
mvn_test_case_generator& smoke_tests(format::type fmt, data_types in_dt) {
- push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, false, false, padding() });
- push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, false, true, padding() });
- push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, true, false, padding() });
- push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, true, true, padding() });
+ push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, false, false, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, false, true, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, true, false, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, true, true, padding()});
+ return *this;
+ }
+
+ mvn_test_case_generator& zyx_tests(format::type fmt, data_types in_dt) {
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 1, 67, 71}, false, false, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 1, 67, 71}, false, true, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 5, 67, 71}, false, false, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 5, 67, 71}, false, true, padding()});
return *this;
}
mvn_test_case_generator& extended_tests(format::type fmt, data_types in_dt) {
- push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, false, padding() });
- push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, true, padding() });
- push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, false, padding() });
- push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, true, padding() });
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, false, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, true, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, false, padding()});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, true, padding()});
// output padding
- push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, false, padding({0, 0, 1, 1}) });
- push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, true, padding({0, 0, 1, 1}) });
- push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, false, padding({0, 0, 1, 1}) });
- push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, true, padding({0, 0, 1, 1}) });
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, false, padding({0, 0, 1, 1})});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, true, padding({0, 0, 1, 1})});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, false, padding({0, 0, 1, 1})});
+ push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, true, padding({0, 0, 1, 1})});
return *this;
}
INSTANTIATE_TEST_CASE_P(smoke,
mvn_random_test,
- testing::ValuesIn(
- mvn_test_case_generator()
- .smoke_tests(format::b_fs_yx_fsv16, data_types::i8)
- .smoke_tests(format::b_fs_yx_fsv16, data_types::u8)
- ), );
+ testing::ValuesIn(mvn_test_case_generator()
+ .smoke_tests(format::b_fs_yx_fsv16, data_types::i8)
+ .smoke_tests(format::b_fs_yx_fsv16, data_types::u8)), );
+
+INSTANTIATE_TEST_CASE_P(zyx,
+ mvn_random_test,
+ testing::ValuesIn(mvn_test_case_generator()
+ .zyx_tests(format::b_fs_zyx_fsv16, data_types::i8)
+ .zyx_tests(format::b_fs_zyx_fsv16, data_types::u8)), );
INSTANTIATE_TEST_CASE_P(extended,
mvn_random_test,
- testing::ValuesIn(
- mvn_test_case_generator()
- .extended_tests(format::b_fs_yx_fsv16, data_types::i8)
- .extended_tests(format::b_fs_yx_fsv16, data_types::u8)
- ), );
+ testing::ValuesIn(mvn_test_case_generator()
+ .extended_tests(format::b_fs_yx_fsv16, data_types::i8)
+ .extended_tests(format::b_fs_yx_fsv16, data_types::u8)), );
-Subproject commit 1093f242dec18e9d45c60b14370e24431384ea54
+Subproject commit 9097834a5860fcf2ccbbd856e1e111bf0124c2de
target_include_directories(${TARGET_NAME}
PUBLIC
"include"
+ ${WATCHDOG_INCLUDE}
PRIVATE
${XLINK_INCLUDE}
- ${XLINK_PLATFORM_INCLUDE}
- ${WATCHDOG_INCLUDE})
+ ${XLINK_PLATFORM_INCLUDE})
target_compile_definitions(${TARGET_NAME}
PRIVATE
{
#endif
+#include "watchdog/watchdog.h"
+
#define NC_THERMAL_BUFFER_SIZE 100
#define NC_DEBUG_BUFFER_SIZE 120
#define NC_MAX_DEVICES (32)
char name[NC_MAX_NAME_SIZE];
};
+typedef struct ncDeviceOpenParams {
+ WatchdogHndl_t* watchdogHndl;
+ int watchdogInterval;
+ const char* customFirmwareDirectory;
+} ncDeviceOpenParams_t;
+
typedef enum {
NC_FIFO_HOST_RO = 0, // fifo can be read through the API but can not be
// written ( graphs can read and write data )
* If NULL or empty, default path searching behavior will be used.
*/
MVNC_EXPORT_API ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
- struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory);
+ struct ncDeviceDescr_t in_ncDeviceDesc, ncDeviceOpenParams_t deviceOpenParams);
/**
* @brief Returns a description of all available devices in the system
/**
* @brief Close device and destroy handler
*/
-MVNC_EXPORT_API ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandle);
+MVNC_EXPORT_API ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandle, WatchdogHndl_t* watchdogHndl);
// Graph
MVNC_EXPORT_API ncStatus_t ncGraphCreate(const char* name, struct ncGraphHandle_t **graphHandle);
deviceCapabilities_t dev_attr;
ncDeviceState_t state;
uint32_t device_id;
- wd_context watchdog_ctx;
+ WdDeviceHndl_t* watchdog_device;
int wd_interval;
};
#ifndef MVNC_WATCHDOG_H
#define MVNC_WATCHDOG_H
-#include <mvnc.h>
#ifdef __cplusplus
-# define WD_API extern "C"
-# else
-# define WD_API
+extern "C"
+{
#endif
-/**
-* @brief default ping interval is 1 second
-*/
-#define WATCHDOG_PING_INTERVAL_MS 1000
+typedef struct _WatchdogHndl_t WatchdogHndl_t;
-typedef struct wd_context_tag {
- void * opaque;
-} wd_context;
+typedef struct _WdDeviceHndl_t {
+ void* m_device;
+} WdDeviceHndl_t;
typedef enum {
WD_ERRNO = 0,
WD_NOTINITIALIZED,
- WD_DUPLICATE,
WD_FAIL
} wd_error_t;
-/**
- * @brief initializes watchdog context, required to be called before any other WD API calls
- * @return
- */
-WD_API wd_error_t watchdog_init_context(wd_context *ctx);
+wd_error_t watchdog_create(WatchdogHndl_t** out_watchdogHndl);
+void watchdog_destroy(WatchdogHndl_t* watchdogHndl);
/**
* @brief Creates watchdog thread, if not created, and registers new watchee device, and initialise opaque handle to it.
* To avoid a memory leak, the registered device must be unregister with watchdog_unregister_device().
- * @param d - newly connected device descriptor
+ * @param deviceHandle - newly connected device descriptor
* @return
*/
-WD_API wd_error_t watchdog_register_device(wd_context *ctx, devicePrivate_t *d);
+wd_error_t watchdog_register_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle);
/**
* @brief remove watch_dog device from the list, and might stop watchdog worker thread
* @return result of operation
*/
-WD_API wd_error_t watchdog_unregister_device(wd_context *ctx);
+wd_error_t watchdog_unregister_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle);
+#ifdef __cplusplus
+}
+#endif
#endif // MVNC_WATCHDOG_H
#pragma once
#include <chrono>
+#include <string>
+#include <cstring>
+#include <functional>
+#include <stdexcept>
+
+#define MVLOG_UNIT_NAME watchdog
+#include "XLinkLog.h"
+
+#if defined(_WIN32)
+#include "win_synchapi.h"
+#endif // defined(_WIN32)
namespace Watchdog {
* @brief represents watchdog device interface to be registered within watchdog worker
*/
class IDevice {
- public:
+public:
using time_point = std::chrono::steady_clock::time_point;
virtual ~IDevice() = default;
/**
- * @brief depending on implementation watchdog device shouldn't have interval longer than that
- */
- virtual void setInterval(const std::chrono::milliseconds msInterval) noexcept = 0;
- /**
* @brief watchdog request device to keep alive with current timestamp
*/
virtual void keepAlive(const time_point ¤t_time) noexcept = 0;
virtual void *getHandle() const noexcept = 0;
};
+class AutoScope {
+public:
+ explicit AutoScope(const std::function<void()>& func) : _func(func) {}
+ ~AutoScope() { _func(); }
+
+ AutoScope(const AutoScope&) = delete;
+ AutoScope(AutoScope&&) = delete;
+ AutoScope& operator=(const AutoScope&) = delete;
+ AutoScope& operator=(AutoScope&&) = delete;
+private:
+ std::function<void()> _func;
+};
+
+class CustomUniqueLock {
+public:
+ explicit CustomUniqueLock(pthread_mutex_t* mutex)
+ :m_mutex(mutex) {
+ if(m_mutex == nullptr) {
+ throw std::runtime_error("mutex should not be null");
+ }
+
+ int rc = pthread_mutex_lock(m_mutex);
+ if (rc != 0) {
+ throw std::runtime_error(std::string("failed to lock mutex. rc: ") + strerror(rc));
+ }
+ };
+
+ ~CustomUniqueLock() {
+ int rc = pthread_mutex_unlock(m_mutex);
+ if (rc != 0) {
+ mvLog(MVLOG_ERROR, "failed to unlock mutex. rc: %s", strerror(rc));
+ }
+ }
+
+ CustomUniqueLock(const CustomUniqueLock&) = delete;
+ CustomUniqueLock(const CustomUniqueLock&&) = delete;
+ CustomUniqueLock& operator=(const CustomUniqueLock&) = delete;
+ CustomUniqueLock& operator=(const CustomUniqueLock&&) = delete;
+
+private:
+ pthread_mutex_t* m_mutex = nullptr;
+};
+
} // namespace Watchdog
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef MVNC_XLINK_DEVICE_H
+#define MVNC_XLINK_DEVICE_H
+
+#include "mvnc.h"
+#include "watchdog.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define WATCHDOG_MAX_PING_INTERVAL_MS 1000
+
+wd_error_t xlink_device_create(WdDeviceHndl_t** out_deviceHandle, devicePrivate_t* pDevice);
+void xlink_device_destroy(WdDeviceHndl_t* deviceHandle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
#include "XLinkMacros.h"
#include "XLinkStringUtils.h"
#include "watchdog.h"
+#include "xlink_device.h"
#define THERMAL_BUFFER_SIZE 100
#define THERMAL_THROTTLING_BUFFER_SIZE (THERMAL_BUFFER_SIZE + sizeof(int))
}
ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr,
- struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory) {
+ struct ncDeviceDescr_t in_ncDeviceDesc, ncDeviceOpenParams_t deviceOpenParams) {
//----------------------------------------------------------
// Check input
deviceDesc_t in_deviceDesc = {0};
copyNcDeviceDescrToXLink(&in_ncDeviceDesc, &in_deviceDesc);
+ int watchdogInterval = deviceOpenParams.watchdogInterval;
+ const char* customFirmwareDirectory = deviceOpenParams.customFirmwareDirectory;
+
CHECK_HANDLE_CORRECT_RC(deviceHandlePtr, NC_INVALID_PARAMETERS);
+ CHECK_HANDLE_CORRECT_RC(deviceOpenParams.watchdogHndl, NC_INVALID_PARAMETERS);
if (watchdogInterval < 0) {
mvLog(MVLOG_ERROR, "Invalid watchdogInterval");
return NC_INVALID_PARAMETERS;
d->device_mon_stream_id = deviceMonitorStreamId;
#if !(defined(NO_BOOT))
- watchdog_init_context(&d->watchdog_ctx);
- watchdog_register_device(&d->watchdog_ctx, d);
+ wd_error_t wd_rc = xlink_device_create(&d->watchdog_device, d);
+ if (wd_rc) {
+ mvLog(MVLOG_WARN, "watchdog is not started for device %p", d->xlink);
+ } else {
+ watchdog_register_device(deviceOpenParams.watchdogHndl, d->watchdog_device);
+ }
#endif
getDevAttributes(d);
CHECK_STREAM_ID(graphMonitorStreamId, {
printfOverXLinkClose(d);
// TODO NO_BOOT case
- watchdog_unregister_device(&d->watchdog_ctx);
+ if (d->watchdog_device != NULL) {
+ watchdog_unregister_device(deviceOpenParams.watchdogHndl, d->watchdog_device);
+ xlink_device_destroy(d->watchdog_device);
+ }
CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m));
CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m));
#else
CHECK_STREAM_ID(graphMonitorStreamId, {
// TODO NO_BOOT case
- watchdog_unregister_device(&d->watchdog_ctx);
+ if (d->watchdog_device != NULL) {
+ watchdog_unregister_device(deviceOpenParams.watchdogHndl, d->watchdog_device);
+ xlink_device_destroy(d->watchdog_device);
+ }
CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m));
CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m));
CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m));
}
-ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) {
+ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr, WatchdogHndl_t* watchdogHndl) {
int found = 0;
XLinkError_t rc = X_LINK_SUCCESS;
#endif
#if !defined(NO_BOOT)
- watchdog_unregister_device(&d->watchdog_ctx);
+ if (d->watchdog_device != NULL) {
+ watchdog_unregister_device(watchdogHndl, d->watchdog_device);
+ xlink_device_destroy(d->watchdog_device);
+ }
#endif
// Save all devices before reset
// SPDX-License-Identifier: Apache-2.0
//
+#include "watchdog.h"
+#include "watchdogPrivate.hpp"
+
#include <thread>
-#include <future>
#include <vector>
#include <ostream>
#include <iostream>
-#include <utility>
-#include <watchdog.h>
-#include <watchdogPrivate.hpp>
-#include <algorithm>
+#include <atomic>
#include <memory>
-#include <string>
-#include <cstring>
-#include <ncCommPrivate.h>
-#include <mvnc.h>
-#include <ncPrivateTypes.h>
-#include <list>
+#include <algorithm>
+#include <unordered_map>
#define MVLOG_UNIT_NAME watchdog
#include "XLinkLog.h"
-#include "XLink.h"
-#include "XLinkPrivateDefines.h"
-#include "XLinkErrorUtils.h"
-
-#if defined(_WIN32)
-#include "win_synchapi.h"
-#endif // defined(_WIN32)
namespace {
using namespace Watchdog;
/**
- * @brief implementation of watchdog device using xlink representation of it
+ * @brief when device just added into watchdog, it should not be due interval at all
*/
-class XLinkDevice : public IDevice {
- _devicePrivate_t privateDevice;
- using time_point = std::chrono::steady_clock::time_point;
- time_point lastPongTime = time_point::min();
- time_point lastPingTime = time_point::min();
- enum : int { deviceHangTimeout = 12000};
-
+class NoDueOnFirstCall : public IDevice {
public:
- explicit XLinkDevice(devicePrivate_t *pDevice)
- : privateDevice(*pDevice) {
- setInterval(milliseconds(privateDevice.wd_interval));
- }
+ NoDueOnFirstCall(IDevice* original) : m_originalPtr(original) {}
- void setInterval(const std::chrono::milliseconds msInterval) noexcept override {
- privateDevice.wd_interval = std::max(static_cast<int>(msInterval.count()), WATCHDOG_PING_INTERVAL_MS);
+ void keepAlive(const time_point& current_time) noexcept override {
+ m_originalPtr->keepAlive(current_time);
+ m_firstCall = true;
}
- void keepAlive(const time_point ¤t_time) noexcept override {
- bool bPong = sendPingMessage();
- // we consider that as first pong time even if it wasn't happen as beginning of boot
- if (lastPongTime == time_point::min()) {
- lastPongTime = current_time;
- }
-
- lastPingTime = current_time;
-
- int diff = duration_cast<milliseconds>(current_time - lastPongTime).count();
-
- if (bPong) {
- lastPongTime = current_time;
- mvLog(MVLOG_INFO, "[%p] device, ping succeed after %d ms\n", privateDevice.xlink, diff);
- } else {
- mvLog(MVLOG_WARN, "[%p] device, no response for %d ms\n", privateDevice.xlink, diff);
- }
- }
-
- milliseconds dueIn(const time_point ¤t_time) const noexcept override {
- if (lastPingTime == time_point::min())
- return milliseconds::zero();
-
- // overdue
- if (current_time - lastPingTime > std::chrono::milliseconds(privateDevice.wd_interval)) {
+ milliseconds dueIn(const time_point& current_time) const noexcept override {
+ if (!m_firstCall) {
return milliseconds::zero();
}
- return duration_cast<milliseconds>(lastPingTime + std::chrono::milliseconds(privateDevice.wd_interval) - current_time);
+ return m_originalPtr->dueIn(current_time);
}
- /**
- * @brief means device is hanging
- */
bool isTimeout() const noexcept override {
- if (lastPongTime > lastPingTime) return false;
- if (lastPingTime - lastPongTime > milliseconds(deviceHangTimeout)) {
- // cleaning xlink connection - allowing abort all semaphores waiting in other threads
- XLinkResetAll();
- return true;
- }
- return false;
+ return m_originalPtr->isTimeout();
}
- /**
- * @brief gets some opaque handle that clearly destinguesh one device previate_t from another
- */
- void *getHandle() const noexcept override {
- return privateDevice.xlink;
+ void* getHandle() const noexcept override {
+ return m_originalPtr->getHandle();
}
private:
- bool sendPingMessage() {
- XLinkError_t rc = X_LINK_SUCCESS;
- XLINK_RET_ERR_IF(pthread_mutex_lock(&privateDevice.dev_stream_m), false);
-
- deviceCommand_t config = {};
- config.type = DEVICE_WATCHDOG_PING;
-
- // xlink ping acknowledge interval shouldn't be more then expected ping interval
- rc = XLinkWriteDataWithTimeout(privateDevice.device_mon_stream_id, (const uint8_t*)&config, sizeof(config), deviceHangTimeout);
-
- if(pthread_mutex_unlock(&privateDevice.dev_stream_m) != 0) {
- mvLog(MVLOG_ERROR, "Failed to unlock privateDevice.dev_stream_m");
- }
-
- if (rc != X_LINK_SUCCESS) {
- mvLog(MVLOG_ERROR, "Failed send ping message: %s", XLinkErrorToStr(rc));
- return false;
- }
- return true;
- }
+ IDevice* m_originalPtr;
+ bool m_firstCall = false;
};
-/**
- * @brief when device just added into watchdog, it should not be due interval at all
- */
-class NoDueOnFirstCall : public IDevice {
- std::shared_ptr<IDevice> original;
- bool bFirstCall = false;
- public:
- NoDueOnFirstCall(const std::shared_ptr<IDevice> & original) : original(original) {}
- void setInterval(const std::chrono::milliseconds msInterval) noexcept override {
- original->setInterval(msInterval);
- }
- void keepAlive(const time_point ¤t_time) noexcept override {
- original->keepAlive(current_time);
- bFirstCall = true;
- }
- std::chrono::milliseconds dueIn(const time_point ¤t_time) const noexcept override {
- if (!bFirstCall) {
- return milliseconds::zero();
- }
- return original->dueIn(current_time);
- }
- bool isTimeout() const noexcept override {
- return original->isTimeout();
- }
- void *getHandle() const noexcept override {
- return original->getHandle();
- }
-};
-
-class CustomUniqueLock {
+class WatchdogImpl {
public:
- explicit CustomUniqueLock(pthread_mutex_t* mutex)
- :m_mutex(mutex) {
- if(m_mutex == nullptr) {
- throw std::runtime_error("mutex should not be null");
- }
+ WatchdogImpl();
+ ~WatchdogImpl();
- int rc = pthread_mutex_lock(m_mutex);
- if (rc != 0) {
- throw std::runtime_error(std::string("failed to lock mutex. rc: ") + strerror(rc));
- }
- };
-
- ~CustomUniqueLock() {
- int rc = pthread_mutex_unlock(m_mutex);
- if (rc != 0) {
- mvLog(MVLOG_ERROR, "failed to unlock mutex. rc: %s", strerror(rc));
- }
- }
+ bool registerDevice(IDevice* device);
+ bool removeDevice(IDevice* device);
- CustomUniqueLock(const CustomUniqueLock&) = delete;
- CustomUniqueLock(const CustomUniqueLock&&) = delete;
- CustomUniqueLock& operator=(const CustomUniqueLock&) = delete;
- CustomUniqueLock& operator=(const CustomUniqueLock&&) = delete;
+ WatchdogImpl(const WatchdogImpl&) = delete;
+ WatchdogImpl(WatchdogImpl&&) = delete;
+ WatchdogImpl& operator = (const WatchdogImpl&) = delete;
+ WatchdogImpl& operator = (WatchdogImpl&&) = delete;
private:
- pthread_mutex_t* m_mutex = nullptr;
-};
+ void waitFor(const milliseconds sleepInterval);
+ void watchdogRoutine() noexcept;
-static void * WD_OPAQUE_MAGIC = reinterpret_cast<void*>(0xdeadbeaf);
-
-struct wd_context_opaque {
- void * magic = WD_OPAQUE_MAGIC;
- IDevice * actual = nullptr;
- bool destroyed = false;
- void *handleCached = nullptr;
-};
-
-class WatchdogImpl {
- using wd_context_as_tuple = std::tuple<std::shared_ptr<IDevice>, bool*, void*>;
+private:
+ using Devices = std::vector<std::shared_ptr<IDevice>>;
+ using DevicesMap = std::unordered_map<void*, std::shared_ptr<IDevice>>;
- using Devices = std::list<wd_context_as_tuple>;
Devices watchedDevices;
+ DevicesMap removedDevices;
std::atomic_bool threadRunning {false};
pthread_mutex_t routineLock;
pthread_cond_t wakeUpPingThread;
std::thread poolThread;
+};
- WatchdogImpl(const WatchdogImpl&) = delete;
- WatchdogImpl(WatchdogImpl&&) = delete;
- WatchdogImpl& operator = (const WatchdogImpl&) = delete;
- WatchdogImpl& operator = (WatchdogImpl&&) = delete;
-
- class AutoScope {
- public:
- explicit AutoScope(const std::function<void()>& func) : _func(func) {}
- ~AutoScope() { _func(); }
-
- AutoScope(const AutoScope&) = delete;
- AutoScope& operator=(const AutoScope&) = delete;
- private:
- std::function<void()> _func;
- };
+//------------- Watchdog implementation -------------
-private:
-
- WatchdogImpl() {
- int rc = pthread_mutex_init(&routineLock, NULL);
- if (rc != 0) {
- throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc));
- }
+WatchdogImpl::WatchdogImpl() {
+ int rc = pthread_mutex_init(&routineLock, NULL);
+ if (rc != 0) {
+ throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc));
+ }
#if !(defined(__APPLE__) || defined(_WIN32))
- pthread_condattr_t attr;
- rc = pthread_condattr_init(&attr);
- if (rc != 0) {
- throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc));
- }
- AutoScope attrDestroy([&attr]{
- if (pthread_condattr_destroy(&attr) != 0)
- mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute.");
- });
+ pthread_condattr_t attr;
+ rc = pthread_condattr_init(&attr);
+ if (rc != 0) {
+ throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc));
+ }
- rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
- if (rc != 0) {
- throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc));
- }
+ AutoScope attrDestroy([&attr]{
+ if (pthread_condattr_destroy(&attr) != 0)
+ mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute.");
+ });
+
+ rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+ if (rc != 0) {
+ throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc));
+ }
#endif // !(defined(__APPLE__) || defined(_WIN32))
- rc = pthread_cond_init(&wakeUpPingThread, NULL);
- if (rc != 0) {
- throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc));
- }
+ rc = pthread_cond_init(&wakeUpPingThread, NULL);
+ if (rc != 0) {
+ throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc));
}
+}
-public:
+WatchdogImpl::~WatchdogImpl() {
+ mvLog(MVLOG_INFO, "watchdog terminated\n");
+ try
+ {
+ CustomUniqueLock lock {&routineLock};
+ for (auto &item : watchedDevices) {
+ mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", item->getHandle());
+ }
+ } catch (const std::exception & ex) {
+ mvLog(MVLOG_ERROR, "error %s", ex.what());
+ } catch (...) {
+ mvLog(MVLOG_ERROR, "unknown error");
+ }
- static WatchdogImpl &instance() {
- static WatchdogImpl watchdog;
- return watchdog;
+ threadRunning = false;
+ int rc = pthread_cond_broadcast(&wakeUpPingThread);
+ if (rc != 0) {
+ mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
}
+ if (poolThread.joinable()) {
+ poolThread.join();
+ }
- ~WatchdogImpl() {
- mvLog(MVLOG_INFO, "watchdog terminated\n");
- try
- {
- CustomUniqueLock lock {&routineLock};
- for (auto &item : watchedDevices) {
- *std::get<1>(item) = true;
- mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", std::get<2>(item));
- }
- } catch (const std::exception & ex) {
- mvLog(MVLOG_ERROR, "error %s", ex.what());
- } catch (...) {
- mvLog(MVLOG_ERROR, "unknown error");
- }
+ rc = pthread_mutex_destroy(&routineLock);
+ if (rc != 0) {
+ mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc);
+ }
- threadRunning = false;
- int rc = pthread_cond_broadcast(&wakeUpPingThread);
- if (rc != 0) {
- mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
- }
+ rc = pthread_cond_destroy(&wakeUpPingThread);
+ if (rc != 0) {
+ mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc);
+ }
+}
- rc = pthread_mutex_destroy(&routineLock);
- if (rc != 0) {
- mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc);
- }
+bool WatchdogImpl::registerDevice(IDevice* device) {
+ mvLog(MVLOG_INFO, "register device: %p\n", &device);
- rc = pthread_cond_destroy(&wakeUpPingThread);
- if (rc != 0) {
- mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc);
- }
+ CustomUniqueLock lock {&routineLock};
+ if (!threadRunning) {
if (poolThread.joinable()) {
poolThread.join();
}
- }
-
-public:
- void *register_device(std::shared_ptr<IDevice> device) {
- CustomUniqueLock lock {&routineLock};
- std::unique_ptr<wd_context_opaque> ctx (new wd_context_opaque);
-
- // rare case of exact pointer address collision
- if (ctx.get() == WD_OPAQUE_MAGIC) {
- std::unique_ptr<wd_context_opaque> ctx2(new wd_context_opaque);
- ctx.reset(ctx2.release());
- }
-
- if (!threadRunning) {
- if (poolThread.joinable()) {
- poolThread.join();
- }
- threadRunning = true;
+ threadRunning = true;
- poolThread = std::thread([this]() {
- if (pthread_setname_np(
+ poolThread = std::thread([this]() {
+ if (pthread_setname_np(
#ifndef __APPLE__
- pthread_self(),
+ pthread_self(),
#endif
- "WatchdogThread") != 0) {
- perror("Setting name for watchdog thread failed");
- }
- watchdog_routine();
- });
- } else {
- // wake up thread
- int rc = pthread_cond_broadcast(&wakeUpPingThread);
- if (rc != 0) {
- mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
+ "WatchdogThread") != 0) {
+ perror("Setting name for watchdog thread failed");
}
- }
-
- ctx->handleCached = device->getHandle();
- watchedDevices.emplace_back(device, &ctx->destroyed, ctx->handleCached);
+ watchdogRoutine();
+ });
+ }
- ctx->actual = std::get<0>(watchedDevices.back()).get();
+ auto it = std::find_if(std::begin(watchedDevices),
+ std::end(watchedDevices),
+ [&device](const std::shared_ptr<IDevice>& item) {
+ return item->getHandle() == device->getHandle();
+ });
- return ctx.release();
+ bool found = it != std::end(watchedDevices);
+ if (!found) {
+ watchedDevices.emplace_back(std::make_shared<NoDueOnFirstCall>(device));
}
- void *register_device(devicePrivate_t *device) {
- return register_device(std::make_shared<NoDueOnFirstCall>(std::make_shared<XLinkDevice>(device)));
+ int rc = pthread_cond_broadcast(&wakeUpPingThread);
+ if (rc != 0) {
+ mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
}
- bool remove_device(void *opaque) {
- mvLog(MVLOG_INFO, "remove_device : %p\n", opaque);
- auto ptr = reinterpret_cast<wd_context_opaque *>(opaque);
- if (ptr == nullptr) {
- return false;
- }
+ return !found;
+}
- bool bFound = false;
- {
- CustomUniqueLock lock {&routineLock};
+bool WatchdogImpl::removeDevice(IDevice* device) {
+ mvLog(MVLOG_INFO, "remove device: %p\n", &device);
- // thread already removed
- if (ptr->destroyed) {
- delete ptr;
- return true;
- }
+ CustomUniqueLock lock {&routineLock};
- auto idx = std::find_if(std::begin(watchedDevices),
- std::end(watchedDevices),
- [ptr](const wd_context_as_tuple &item) {
- return std::get<0>(item)->getHandle() == ptr->actual->getHandle();
- });
- bFound = idx != std::end(watchedDevices);
- if(bFound) {
- watchedDevices.erase(idx);
- delete ptr;
- }
- }
+ auto it = std::find_if(std::begin(watchedDevices),
+ std::end(watchedDevices),
+ [&device](const std::shared_ptr<IDevice>& item) {
+ return item->getHandle() == device->getHandle();
+ });
- // wake up thread since we might select removed device as nex to be ping, and there is no more devices available
- int rc = pthread_cond_broadcast(&wakeUpPingThread);
- if (rc != 0) {
- mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
- }
+ bool removed = it != std::end(watchedDevices);
+ if (removed) {
+ watchedDevices.erase(it);
+ } else if (removedDevices.count(device->getHandle())) {
+ removedDevices.erase(device->getHandle());
+ removed = true;
+ }
- return bFound;
+ // wake up thread since we might select removed device as nex to be ping, and there is no more devices available
+ int rc = pthread_cond_broadcast(&wakeUpPingThread);
+ if (rc != 0) {
+ mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
}
- private:
- /// @note: We are using here pthread_cond_timedwait as a replacement for condition_variable::wait_for,
- /// as libstdc++ has bug not using monotonic clock. When GCC 10.x became minimum supported version,
- /// that code could be removed.
- void wait_for(const milliseconds sleepInterval) {
- struct timespec timeToWait = {0, 0};
+ return removed;
+}
+
+void WatchdogImpl::waitFor(const milliseconds sleepInterval) {
+ struct timespec timeToWait = {0, 0};
- const auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
+ const auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
#if (defined(__APPLE__) || defined(_WIN32))
- timeToWait.tv_sec = sec.count();
- timeToWait.tv_nsec =
- std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
- std::chrono::nanoseconds(sec).count();
+ timeToWait.tv_sec = sec.count();
+ timeToWait.tv_nsec =
+ std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+ std::chrono::nanoseconds(sec).count();
#else
- clock_gettime(CLOCK_MONOTONIC, &timeToWait);
- const auto secondInNanoSeconds = 1000000000L;
- const auto nsecSum = std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
- std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec;
- timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds;
- timeToWait.tv_nsec = nsecSum % secondInNanoSeconds;
+ clock_gettime(CLOCK_MONOTONIC, &timeToWait);
+ const auto secondInNanoSeconds = 1000000000L;
+ const auto nsecSum = std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+ std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec;
+ timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds;
+ timeToWait.tv_nsec = nsecSum % secondInNanoSeconds;
#endif // (defined(__APPLE__) || defined(_WIN32))
#if defined(__APPLE__)
- const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
+ const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
#else
- const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
+ const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
#endif // defined(__APPLE__)
- if (rc != 0 && rc != ETIMEDOUT) {
- throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc));
- }
+
+ if (rc != 0 && rc != ETIMEDOUT) {
+ throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc));
}
+}
- void watchdog_routine() noexcept {
- try {
- mvLog(MVLOG_INFO, "thread started\n");
-
- milliseconds sleepInterval;
-
- CustomUniqueLock lock {&routineLock};
-
- do {
- for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end(); ) {
- auto &device = std::get<0>(*deviceIt);
- auto isReady = device->dueIn(steady_clock::now()).count() == 0;
- if (isReady) {
- auto now = high_resolution_clock::now();
- device->keepAlive(steady_clock::now());
- mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n", duration_cast<std::chrono::milliseconds>(high_resolution_clock ::now()-now).count());
- }
- if (device->isTimeout()) {
- mvLog(MVLOG_ERROR, "[%p] device, not respond, removing from watchdog\n", device->getHandle());
- // marking device as deleted, to prevent double resource free from wd_unregister_device
- *std::get<1>(*deviceIt) = true;
- deviceIt = watchedDevices.erase(deviceIt);
- }
- else {
- ++deviceIt;
- }
+void WatchdogImpl::watchdogRoutine() noexcept {
+ try {
+ mvLog(MVLOG_INFO, "thread started\n");
+
+ milliseconds sleepInterval;
+ CustomUniqueLock lock{&routineLock};
+
+ do {
+ for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end();) {
+ auto &device = *deviceIt;
+ auto isReady = device->dueIn(steady_clock::now()).count() <= 0;
+ if (isReady) {
+ auto now = steady_clock::now();
+ device->keepAlive(steady_clock::now());
+ mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n",
+ duration_cast<std::chrono::milliseconds>(steady_clock::now() - now).count());
}
- auto currentTime = steady_clock::now();
- auto minInterval = std::min_element(watchedDevices.begin(),
- watchedDevices.end(),
- [¤tTime] (const Devices::value_type & device1, const Devices::value_type & device2) {
- return std::get<0>(device1)->dueIn(currentTime).count()
- < std::get<0>(device2)->dueIn(currentTime).count();
- });
- // if for some reason we have empty devices list but watchdog is active
- if (minInterval == watchedDevices.end()) {
- mvLog(MVLOG_INFO, "no active devices to watch, stopping Watchdog thread\n");
- threadRunning = false;
- break;
+ if (device->isTimeout()) {
+ mvLog(MVLOG_ERROR, "[%p] device, not respond, removing from watchdog\n", device->getHandle());
+ // marking device as deleted, to prevent double resource free from wd_unregister_device
+ removedDevices[device->getHandle()] = device;
+ deviceIt = watchedDevices.erase(deviceIt);
+ } else {
+ ++deviceIt;
}
- // TODO: no timer coalescing feature, to minimized thread wakes
- sleepInterval = std::get<0>(*minInterval)->dueIn(currentTime);
- if (sleepInterval.count() <= 0)
- continue;
-
- mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
- wait_for(sleepInterval);
-
- mvLog(MVLOG_DEBUG, "waiting completed in %ld ms\n",
- duration_cast<std::chrono::milliseconds>(steady_clock::now() - currentTime).count());
- } while (threadRunning);
- } catch (const std::exception & ex) {
- mvLog(MVLOG_ERROR, "error %s", ex.what());
- } catch (...) {
- mvLog(MVLOG_ERROR, "unknown error");
- }
+ }
+ auto currentTime = steady_clock::now();
+ auto minInterval = std::min_element(watchedDevices.begin(), watchedDevices.end(),
+ [¤tTime](const Devices::value_type& device1,
+ const Devices::value_type& device2) {
+ return device1->dueIn(currentTime).count() <
+ device2->dueIn(currentTime).count();
+ });
+ // if for some reason we have empty devices list but watchdog is active
+ if (minInterval == watchedDevices.end()) {
+ mvLog(MVLOG_INFO, "no active devices to watch, stopping Watchdog thread\n");
+ threadRunning = false;
+ break;
+ }
+
+ sleepInterval = (*minInterval)->dueIn(currentTime);
+ if (sleepInterval.count() <= 0) {
+ continue;
+ }
+
+ mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
+
+ waitFor(sleepInterval);
- mvLog(MVLOG_INFO, "thread ended\n");
+ mvLog(MVLOG_DEBUG, "waiting completed in %ld ms\n",
+ duration_cast<std::chrono::milliseconds>(steady_clock::now() - currentTime).count());
+
+ } while (threadRunning);
+ } catch (const std::exception &ex) {
+ mvLog(MVLOG_ERROR, "error %s", ex.what());
+ } catch (...) {
+ mvLog(MVLOG_ERROR, "unknown error");
}
-};
+
+ mvLog(MVLOG_INFO, "thread ended\n");
+}
} // namespace
-WD_API wd_error_t watchdog_init_context(wd_context *ctx) {
+struct _WatchdogHndl_t {
+ WatchdogImpl* m_watchdog;
+};
+
+wd_error_t watchdog_create(WatchdogHndl_t** out_watchdogHndl) {
+ if (out_watchdogHndl == nullptr) {
+ return WD_NOTINITIALIZED;
+ }
+
+ *out_watchdogHndl = nullptr;
+ auto tmpWdHndl =
+ static_cast<WatchdogHndl_t*>(malloc(sizeof(WatchdogHndl_t)));
+ if(tmpWdHndl == nullptr) {
+ return WD_FAIL;
+ }
+
try {
- mvLogLevelSet(MVLOG_ERROR);
- mvLogDefaultLevelSet(MVLOG_ERROR);
- if (!ctx) {
- return WD_NOTINITIALIZED;
- }
- // opaque pointer initialized
- if (ctx->opaque == WD_OPAQUE_MAGIC) {
- mvLog(MVLOG_INFO, "watchdog context (%p) already initialized \n", ctx);
- } else {
- ctx->opaque = WD_OPAQUE_MAGIC;
- }
+ tmpWdHndl->m_watchdog = new WatchdogImpl();
+ *out_watchdogHndl = tmpWdHndl;
return WD_ERRNO;
- } catch (...) {
- mvLog(MVLOG_ERROR, "failed initialize watchdog context: %p\n", ctx);
+ } catch (const std::exception& ex) {
+ mvLog(MVLOG_ERROR, "error %s", ex.what());
+ } catch (...) {
+ mvLog(MVLOG_ERROR, "unknown error");
}
+
+ free(tmpWdHndl);
return WD_FAIL;
}
-WD_API wd_error_t watchdog_register_device(wd_context * ctx, devicePrivate_t *device) {
- try {
- if (!ctx) {
- mvLog(MVLOG_ERROR, "watchdog context is null\n");
- return WD_NOTINITIALIZED;
- }
- // opaque pointer initialized
- if (ctx->opaque == nullptr) {
- mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx);
- return WD_NOTINITIALIZED;
- }
- if (device && device->wd_interval <= 0) {
- mvLog(MVLOG_ERROR, "watchdog interval should be > 0, but was (%d)\n", device->wd_interval);
- return WD_NOTINITIALIZED;
- }
- // opaque pointer initialized
- if (ctx->opaque != WD_OPAQUE_MAGIC) {
- auto watchee = reinterpret_cast<wd_context_opaque*>(ctx->opaque);
- // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor
- if (watchee->magic == WD_OPAQUE_MAGIC) {
- // actually this can represent already registered context, so need to check
- // since we are adding NoDue wrapper, lets check for it
- if (nullptr != dynamic_cast<NoDueOnFirstCall*>(watchee->actual)) {
- mvLog(MVLOG_ERROR, "watchdog context (%p) already registered within watchdog\n", ctx);
- return WD_DUPLICATE;
- }
+void watchdog_destroy(WatchdogHndl_t* watchdogHndl) {
+ if (watchdogHndl == nullptr) {
+ return;
+ }
- // transferring interval from context
- if (device) {
- watchee->actual->setInterval(milliseconds(device->wd_interval));
- }
- ctx->opaque = WatchdogImpl::instance().register_device(
- shared_ptr<IDevice>(new NoDueOnFirstCall(shared_ptr<IDevice>(watchee->actual, [](IDevice*){}))));
+ if (watchdogHndl->m_watchdog != nullptr) {
+ delete(watchdogHndl->m_watchdog);
+ }
- if (ctx->opaque == nullptr) {
- mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx);
- } else {
- return WD_ERRNO;
- }
- }
- mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx);
- return WD_NOTINITIALIZED;
- }
+ free(watchdogHndl);
+}
+
+wd_error_t watchdog_register_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle) {
+ if (watchdogHndl == nullptr) {
+ mvLog(MVLOG_ERROR, "watchdog handle is null\n");
+ return WD_NOTINITIALIZED;
+ }
+
+ if (deviceHandle == nullptr) {
+ mvLog(MVLOG_ERROR, "watchdog device handle is null\n");
+ return WD_NOTINITIALIZED;
+ }
- if (device && device->wd_interval > 0) {
- ctx->opaque = WatchdogImpl::instance().register_device(device);
- } else {
- ctx->opaque = nullptr;
+ if (deviceHandle->m_device == nullptr) {
+ mvLog(MVLOG_ERROR, "watchdog device not initialized. handle=%p\n", deviceHandle);
+ return WD_NOTINITIALIZED;
+ }
+
+ try {
+ WatchdogImpl* watchdog = watchdogHndl->m_watchdog;
+ auto device = reinterpret_cast<IDevice*>(deviceHandle->m_device);
+ if (!watchdog->registerDevice(device)) {
+ mvLog(MVLOG_WARN, "cannot register device\n");
+ return WD_FAIL;
}
return WD_ERRNO;
} catch (const std::exception & ex) {
mvLog(MVLOG_ERROR, "failed to register device: %s\n", ex.what());
} catch (...) {
- mvLog(MVLOG_ERROR, "failed to register device context (%p)\n", ctx);
+ mvLog(MVLOG_ERROR, "failed to register device (%p)\n", deviceHandle);
}
+
return WD_FAIL;
}
-WD_API wd_error_t watchdog_unregister_device(wd_context *ctx) {
- try {
- if (ctx == nullptr || ctx->opaque == nullptr) {
- return WD_NOTINITIALIZED;
- } else {
- if (ctx->opaque != WD_OPAQUE_MAGIC) {
- auto watchee = reinterpret_cast<wd_context_opaque *>(ctx->opaque);
- // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor
- if (watchee->magic == WD_OPAQUE_MAGIC) {
- if (!WatchdogImpl::instance().remove_device(ctx->opaque)) {
- mvLog(MVLOG_WARN, "cannot remove device\n");
- return WD_FAIL;
- }
- }
- }
- }
+wd_error_t watchdog_unregister_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle) {
+ if (watchdogHndl == nullptr) {
+ mvLog(MVLOG_ERROR, "watchdog handle is null\n");
+ return WD_NOTINITIALIZED;
+ }
- if (ctx != nullptr) {
- // opaque pointer deleted
- ctx->opaque = nullptr;
- }
+ if (deviceHandle == nullptr) {
+ mvLog(MVLOG_ERROR, "watchdog device handle is null\n");
+ return WD_NOTINITIALIZED;
+ }
+
+ if (deviceHandle->m_device == nullptr) {
+ mvLog(MVLOG_ERROR, "watchdog device not initialized. handle=%p\n", deviceHandle);
+ return WD_NOTINITIALIZED;
+ }
+ try {
+ WatchdogImpl* watchdog = watchdogHndl->m_watchdog;
+ auto device = reinterpret_cast<IDevice*>(deviceHandle->m_device);
+ if (!watchdog->removeDevice(device)) {
+ mvLog(MVLOG_WARN, "cannot remove device\n");
+ return WD_FAIL;
+ }
return WD_ERRNO;
} catch (const std::exception & ex) {
- mvLog(MVLOG_WARN, "error %s", ex.what());
+ mvLog(MVLOG_ERROR, "error %s", ex.what());
} catch (...) {
- mvLog(MVLOG_WARN, "unknown error");
+ mvLog(MVLOG_ERROR, "unknown error");
}
return WD_FAIL;
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "xlink_device.h"
+#include "watchdog.h"
+#include "watchdogPrivate.hpp"
+
+#include "XLink.h"
+#include "XLinkPrivateDefines.h"
+#include "XLinkErrorUtils.h"
+
+#include <ncPrivateTypes.h>
+
+#include <algorithm>
+
+namespace {
+
+using namespace std;
+using namespace chrono;
+using namespace Watchdog;
+
+class XLinkDevice : public IDevice {
+public:
+ explicit XLinkDevice(devicePrivate_t* pDevice);
+
+ void keepAlive(const time_point& current_time) noexcept override;
+
+ milliseconds dueIn(const time_point& current_time) const noexcept override;
+ bool isTimeout() const noexcept override;
+
+ /**
+ * @brief gets some opaque handle that clearly distinguish one device private_t from another
+ */
+ void* getHandle() const noexcept override;
+
+ ~XLinkDevice() = default;
+
+private:
+ bool sendPingMessage();
+
+private:
+ const int kDeviceHangTimeout = 12000;
+
+ _devicePrivate_t m_devicePrivate;
+
+ time_point m_lastPongTime = time_point::min();
+ time_point m_lastPingTime = time_point::min();
+};
+
+//----------------- XLinkDevice implementation ---------------------
+
+XLinkDevice::XLinkDevice(devicePrivate_t* pDevice)
+ : m_devicePrivate(*pDevice) {
+ if (m_devicePrivate.wd_interval <= 0) {
+ throw runtime_error(
+ "watchdog interval should be > 0, but was " + std::to_string(m_devicePrivate.wd_interval));
+ }
+ m_devicePrivate.wd_interval = std::max(m_devicePrivate.wd_interval, WATCHDOG_MAX_PING_INTERVAL_MS);
+}
+
+void XLinkDevice::keepAlive(const time_point ¤t_time) noexcept {
+ bool bPong = sendPingMessage();
+ // we consider that as first pong time even if it wasn't happen as beginning of boot
+ if (m_lastPongTime == time_point::min()) {
+ m_lastPongTime = current_time;
+ }
+
+ m_lastPingTime = current_time;
+
+ int diff = duration_cast<milliseconds>(current_time - m_lastPongTime).count();
+
+ if (bPong) {
+ m_lastPongTime = current_time;
+ mvLog(MVLOG_INFO, "[%p] device, ping succeed after %d ms\n", m_devicePrivate.xlink, diff);
+ } else {
+ mvLog(MVLOG_WARN, "[%p] device, no response for %d ms\n", m_devicePrivate.xlink, diff);
+ }
+}
+
+milliseconds XLinkDevice::dueIn(const time_point& current_time) const noexcept {
+ if (m_lastPingTime == time_point::min()) {
+ return milliseconds::zero();
+ }
+
+ // overdue
+ if (current_time - m_lastPingTime > std::chrono::milliseconds(m_devicePrivate.wd_interval)) {
+ return milliseconds::zero();
+ }
+
+ return duration_cast<milliseconds>(m_lastPingTime +
+ std::chrono::milliseconds(m_devicePrivate.wd_interval) - current_time);
+}
+
+bool XLinkDevice::isTimeout() const noexcept {
+ if (m_lastPongTime > m_lastPingTime) {
+ return false;
+ }
+
+ if (m_lastPingTime - m_lastPongTime > milliseconds(kDeviceHangTimeout)) {
+ // cleaning xlink connection - allowing abort all semaphores waiting in other threads
+ XLinkResetAll();
+ return true;
+ }
+
+ return false;
+}
+
+void* XLinkDevice::getHandle() const noexcept {
+ return m_devicePrivate.xlink;
+}
+
+bool XLinkDevice::sendPingMessage() {
+ XLINK_RET_ERR_IF(pthread_mutex_lock(&m_devicePrivate.dev_stream_m), false);
+
+ deviceCommand_t config = {};
+ config.type = DEVICE_WATCHDOG_PING;
+
+ // xlink ping acknowledge interval shouldn't be more then expected ping interval
+ XLinkError_t rc = XLinkWriteDataWithTimeout(m_devicePrivate.device_mon_stream_id,
+ (const uint8_t*)&config, sizeof(config), kDeviceHangTimeout);
+
+ if(pthread_mutex_unlock(&m_devicePrivate.dev_stream_m) != 0) {
+ mvLog(MVLOG_ERROR, "Failed to unlock m_devicePrivate.dev_stream_m");
+ }
+
+ if (rc != X_LINK_SUCCESS) {
+ mvLog(MVLOG_ERROR, "Failed send ping message: %s", XLinkErrorToStr(rc));
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace
+
+wd_error_t xlink_device_create(WdDeviceHndl_t** out_deviceHandle, devicePrivate_t* pDevice) {
+ if (out_deviceHandle == nullptr || pDevice == nullptr) {
+ return WD_NOTINITIALIZED;
+ }
+
+ *out_deviceHandle = nullptr;
+ auto tmpWdDeviceHndl =
+ static_cast<WdDeviceHndl_t*>(malloc(sizeof(WdDeviceHndl_t)));
+ if(tmpWdDeviceHndl == nullptr) {
+ return WD_FAIL;
+ }
+
+ try {
+ tmpWdDeviceHndl->m_device = new XLinkDevice(pDevice);
+ *out_deviceHandle = tmpWdDeviceHndl;
+ return WD_ERRNO;
+ } catch (const std::exception& ex) {
+ mvLog(MVLOG_ERROR, "error %s", ex.what());
+ } catch (...) {
+ mvLog(MVLOG_ERROR, "unknown error");
+ }
+
+ free(tmpWdDeviceHndl);
+ return WD_FAIL;
+}
+
+void xlink_device_destroy(WdDeviceHndl_t* deviceHandle) {
+ if (deviceHandle == nullptr) {
+ return;
+ }
+
+ if (deviceHandle->m_device != nullptr) {
+ delete(reinterpret_cast<XLinkDevice*>(deviceHandle->m_device));
+ }
+
+ free(deviceHandle);
+}
initialize_usb_boot();
ASSERT_NO_ERROR(setLogLevel(ncLogLevel));
availableDevices_ = getAmountOfDevices();
+
+ ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl));
+
+ m_ncDeviceOpenParams.watchdogInterval = watchdogInterval;
+ m_ncDeviceOpenParams.customFirmwareDirectory = firmwarePath;
+ m_ncDeviceOpenParams.watchdogHndl = m_watchdogHndl;
}
void MvncTestsCommon::TearDown() {
ncDeviceResetAll();
+ watchdog_destroy(m_watchdogHndl);
}
int MvncTestsCommon::setLogLevel(const mvLog_t logLevel) {
ncDeviceDesc.platform = NC_ANY_PLATFORM;
for (int index = 0; index < devicesToBoot; ++index) {
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandlers[index], ncDeviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandlers[index], ncDeviceDesc, m_ncDeviceOpenParams));
ASSERT_TRUE(deviceHandlers[index] != nullptr);
++amountOfBooted;
}
_deviceDesc.platform = NC_ANY_PLATFORM;
for (int index = 0; index < availableDevices_; ++index) {
- ASSERT_NO_ERROR(ncDeviceOpen(&_deviceHandles[index], _deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&_deviceHandles[index], _deviceDesc, m_ncDeviceOpenParams));
}
setbuf(stdout, buff);
void MvncLoggingTests::TearDown() {
setbuf(stdout, NULL);
for (int index = 0; index < availableDevices_; ++index) {
- ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandles[index]));
+ ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandles[index], m_watchdogHndl));
}
}
void MvncGraphAllocations::TearDown() {
for (int index = 0; index < _bootedDevices; ++index) {
- ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[index]));
+ ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[index], m_watchdogHndl));
}
_bootedDevices = 0;
}
mvLog_t ncLogLevel = MVLOG_INFO;
int watchdogInterval = 1000;
int availableDevices_ = 0;
+ WatchdogHndl_t* m_watchdogHndl = nullptr;
+ ncDeviceOpenParams_t m_ncDeviceOpenParams = {};
~MvncTestsCommon() override = default;
MvncTestsCommon();
for (int i = 0; i < iterations; ++i) {
printf("Iteration %d of %d\n", i, iterations);
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
deviceHandle = nullptr;
}
}
// Open device
ncDeviceHandle_t *deviceHandle = nullptr;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
for (int i = 0; i < iterations; ++i) {
printf("Iteration %d of %d\n", i, iterations);
// Destroy graph
ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle));
}
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
}
for (int i = 0; i < iterations; i++) {
ncDeviceHandle_t *deviceHandle = nullptr;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
ncGraphHandle_t* graphHandle = nullptr;
std::string graphName = "graph";
ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
}
}
deviceDesc.protocol = NC_USB;
deviceDesc.platform = NC_ANY_PLATFORM;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, m_ncDeviceOpenParams));
actDeviceName = deviceHandle_USB->private_data->dev_addr;
ASSERT_TRUE(actDeviceName.size());
// Open PCIe device
deviceDesc.protocol = NC_PCIE;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, m_ncDeviceOpenParams));
actDeviceName = deviceHandle_PCIe->private_data->dev_addr;
ASSERT_TRUE(actDeviceName.size());
ASSERT_TRUE(isMyriadPCIeDevice(actDeviceName));
// Close all
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe, m_watchdogHndl));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB, m_watchdogHndl));
}
/**
deviceDesc.platform = NC_ANY_PLATFORM;
// Open PCIe device
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc,
- watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, m_ncDeviceOpenParams));
actDeviceName = deviceHandle_PCIe->private_data->dev_addr;
ASSERT_TRUE(actDeviceName.size());
// Open USB device
deviceDesc.protocol = NC_USB;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc,
- watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, m_ncDeviceOpenParams));
actDeviceName = deviceHandle_USB->private_data->dev_addr;
ASSERT_TRUE(actDeviceName.size());
// Close all
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe, m_watchdogHndl));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB, m_watchdogHndl));
}
//------------------------------------------------------------------------------
deviceDesc.protocol = _deviceProtocol;
deviceDesc.platform = NC_ANY_PLATFORM;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
ASSERT_TRUE(deviceHandle != nullptr);
ASSERT_TRUE(deviceHandle->private_data != nullptr);
ASSERT_TRUE(isSameProtocolDevice(deviceName, _deviceProtocol));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
}
/**
deviceDesc.protocol = _deviceProtocol;
deviceDesc.platform = NC_ANY_PLATFORM;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
- watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
ASSERT_TRUE(deviceHandle != nullptr);
ASSERT_TRUE(device->dev_addr_booted != nullptr);
ASSERT_TRUE(device->xlink != nullptr);
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
}
/**
unsigned int data_lenght_second = MAX_DEV_NAME;
// First open, get device name
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
dev_addr_first_open, &data_lenght_first));
// Second open, get device name
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
dev_addr_second_open, &data_lenght_second));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
// Should be the same device
ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
}
deviceDesc.protocol = _deviceProtocol;
deviceDesc.platform = NC_ANY_PLATFORM;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle1, deviceDesc,
- watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle1, deviceDesc, m_ncDeviceOpenParams));
// Till we don't have multiple device support, this function would try to open same device
- ASSERT_ERROR(ncDeviceOpen(&deviceHandle2, deviceDesc,
- watchdogInterval, firmwarePath));
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle2, deviceDesc, m_ncDeviceOpenParams));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle1));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle1, m_watchdogHndl));
}
deviceDesc.protocol = _deviceProtocol;
deviceDesc.platform = NC_ANY_PLATFORM;
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
- watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
actDeviceName = deviceHandle->private_data->dev_addr;
ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
// Second open
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
- watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
actDeviceName = deviceHandle->private_data->dev_addr;
ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
}
//------------------------------------------------------------------------------
setLogLevel(MVLOG_INFO);
ncDeviceHandle_t * deviceHandle = nullptr;
- ASSERT_ERROR(ncDeviceOpen(&deviceHandle, _deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle, _deviceDesc, m_ncDeviceOpenParams));
std::string content(buff);
for (int i = MVLOG_WARN; i < MVLOG_LAST; i++) {
*/
TEST_F(MvncCloseDevice, EmptyDeviceHandler) {
ncDeviceHandle_t *deviceHandle = nullptr;
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
}
/**
deviceHandlePtr = dH.get();
}
- ASSERT_EQ(ncDeviceClose(&deviceHandlePtr), NC_INVALID_PARAMETERS);
+ ASSERT_EQ(ncDeviceClose(&deviceHandlePtr, m_watchdogHndl), NC_INVALID_PARAMETERS);
}
//------------------------------------------------------------------------------
ASSERT_NO_ERROR(ncGraphDestroy(&_graphHandle[0]));
- ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[0]));
+ ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[0], m_watchdogHndl));
}
deviceDesc.platform = NC_ANY_PLATFORM;
ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(0));
- ASSERT_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
std::this_thread::sleep_for(3_sec);
ASSERT_NO_ERROR(ncDeviceResetAll());
ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(30));
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl));
ASSERT_NO_ERROR(ncDeviceResetAll());
}
GTEST_SKIP();
// Use custom firmware dir path as parameter for ncDeviceOpen
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
}
/**
ncDeviceHandle_t * deviceHandles[MAX_DEVICES] = {nullptr};
for (int index = 0; index < availableDevices_; ++index) {
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandles[index], deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandles[index], deviceDesc_, m_ncDeviceOpenParams));
}
for (int index = 0; index < availableDevices_; ++index) {
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandles[index]));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandles[index], m_watchdogHndl));
}
}
for (int i = 0; i < availableDevices_; ++i) {
requests[i] = std::thread([i, &rc, &deviceHandle, this]() {
- rc[i] = ncDeviceOpen(&deviceHandle[i], deviceDesc_, watchdogInterval, firmwarePath);
+ rc[i] = ncDeviceOpen(&deviceHandle[i], deviceDesc_, m_ncDeviceOpenParams);
});
}
}
for (int i = 0; i < availableDevices_; ++i) {
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle[i]));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle[i], m_watchdogHndl));
}
}
const char invalidPath[MAX_PATH] = "./InvalidPath/";
// Use custom firmware dir path as parameter for ncDeviceOpen
- ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, invalidPath));
+ m_ncDeviceOpenParams.customFirmwareDirectory = invalidPath;
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
ASSERT_EQ(deviceHandle_, nullptr);
}
ASSERT_TRUE(availableDevices.size());
strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
dev_addr_open, &data_lenght));
ASSERT_TRUE(strncmp(dev_addr_open, deviceDesc_.name, NC_MAX_NAME_SIZE) == 0);
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
}
TEST_F(MvncOpenUSBDevice, ErrorWhenWrongDeviceName) {
auto availableDevices = getDevicesList();
ASSERT_TRUE(availableDevices.size());
- ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
}
TEST_F(MvncOpenUSBDevice, OpenTwiceSameHandlerByName) {
ASSERT_TRUE(availableDevices.size());
strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
dev_addr_first_open, &data_lenght_first));
// Second open, get device name
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
dev_addr_second_open, &data_lenght_second));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
// Should be the same device
ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
}
strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
deviceDesc_.platform = wrongPlatform;
- ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
}
//------------------------------------------------------------------------------
GTEST_SKIP();
ASSERT_NO_ERROR(ncDeviceOpen(
- &deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ &deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
ASSERT_TRUE(deviceHandle_);
};
strcpy(deviceDesc_.name, deviceHandle_->private_data->dev_addr);
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
deviceDesc_t foundDevice = {};
XLinkError_t rc = XLinkFindFirstSuitableDevice(
if (available_myriad2_ == 0 || available_myriadX_ == 0)
GTEST_SKIP();
- ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams));
char deviceName[MAX_DEV_NAME];
unsigned int size = MAX_DEV_NAME;
EXPECT_TRUE(isSamePlatformUSBDevice(deviceName, devicePlatform_));
- ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl));
}
extensions/front/AttributedGatherNormalizer.py
extensions/front/AttributedPadToPad.py
extensions/front/binary_quantize_normalization.py
+extensions/front/broadcast_with_range.py
extensions/front/caffe/__init__.py
extensions/front/caffe/accum_ext.py
extensions/front/caffe/argmax_ext.py
extensions/front/caffe/reorgyolo_ext.py
extensions/front/caffe/resample_ext.py
extensions/front/caffe/reshape.py
-extensions/front/caffe/ShuffleChannel.py
extensions/front/caffe/shufflechannel_ext.py
extensions/front/caffe/sigmoid.py
extensions/front/caffe/simplernms_ext.py
extensions/front/mxnet/conv_ext.py
extensions/front/mxnet/copy_ext.py
extensions/front/mxnet/crop_ext.py
+extensions/front/mxnet/cumsum.py
+extensions/front/mxnet/cumsum_ext.py
extensions/front/mxnet/custom.py
extensions/front/mxnet/custom_rpn_proposal.py
extensions/front/mxnet/deformable_conv_ext.py
extensions/front/onnx/constant_of_shape_to_broadcast.py
extensions/front/onnx/conv_ext.py
extensions/front/onnx/crop_ext.py
+extensions/front/onnx/cumsum_ext.py
extensions/front/onnx/deformable_conv_ext.py
extensions/front/onnx/detection_output.py
extensions/front/onnx/detectionoutput_ext.py
extensions/front/onnx/proposal_ext.py
extensions/front/onnx/quantize_dequantize_linear.py
extensions/front/onnx/quantize_ext.py
+extensions/front/onnx/range_ext.py
extensions/front/onnx/reduce_max_ext.py
extensions/front/onnx/reduce_mean_ext.py
extensions/front/onnx/reduce_min_ext.py
extensions/front/tf/CropAndResizeReplacement.py
extensions/front/tf/CTCGreedyDecoder.py
extensions/front/tf/CTCGreedyDecoder_ext.py
+extensions/front/tf/cumsum_ext.py
extensions/front/tf/deconv_ext.py
extensions/front/tf/depth_to_space.py
extensions/front/tf/elementwise_ext.py
extensions/front/tf/placeholder_with_default_ext.py
extensions/front/tf/pooling_ext.py
extensions/front/tf/prelu.py
+extensions/front/tf/range_ext.py
extensions/front/tf/reduce_ext.py
extensions/front/tf/reshape_related_ext.py
extensions/front/tf/resize_bilinear.py
extensions/front/tf/UnpackPackReverseInputChannels.py
extensions/front/tf/variable_ext.py
extensions/front/tf/variables_values_freezing.py
+extensions/front/tf/WhereDecomposition.py
extensions/front/tf/yolo_v1.json
extensions/front/tf/yolo_v1_tiny.json
extensions/front/tf/yolo_v2.json
extensions/ops/copyop.py
extensions/ops/correlation.py
extensions/ops/ctc_greedy_decoder.py
+extensions/ops/cumsum.py
extensions/ops/data_augmentation.py
extensions/ops/depth_to_space.py
extensions/ops/DetectionOutput.py
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from extensions.ops.gather import Gather
+from extensions.ops.range import Range
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.tf.graph_utils import create_op_with_const_inputs, create_op_node_with_second_input
+from mo.graph.graph import Graph, rename_nodes, Node
+from mo.ops.unsqueeze import Unsqueeze
+
+
+class ExpandRangeConstant(FrontReplacementSubgraph):
+ """
+ Searches for Constant operations filled with range values starting from 0 and replaces it with Range operation
+ Faced in ONNX BERT -- replacing it makes model reshape-able by sequence length
+
+ WARNING: true BIDIRECTIONAL mode of Broadcast could cause issues
+ (the probability is small, so we decided to keep the optimization)
+
+ value_input[1, X] (value=range(0,X)) shape_input[Y, 1]
+ \ /
+ Broadcast(mode='bidirectional') [Y, X]
+ """
+ enabled = True
+
+ def find_and_replace_pattern(self, graph: Graph):
+ for node in graph.get_op_nodes(type='Broadcast'):
+ value = node.in_port(0).get_source().node
+ if value.soft_get('type') == 'Const':
+ self.replace(node, value)
+
+ @staticmethod
+ def replace(node: Node, const: Node):
+ graph = node.graph
+ shape = const.shape
+ const_name = const.soft_get('name', const.id)
+
+ non_one_dims = np.argwhere(shape != 1).flatten()
+ one_dims = np.argwhere(shape == 1).flatten()
+
+ if not (non_one_dims.size == 1 and 5 < np.prod(shape) < 500):
+ # (5;500) range is deduced to affect less models
+ return
+
+ value = const.value
+ if not np.array_equal(np.arange(0, np.prod(shape), 1).reshape(shape), value):
+ return
+
+ positive_idx = non_one_dims.item(0)
+ negative_idx = positive_idx - len(shape)
+ gather = create_op_with_const_inputs(graph, Gather, {1: int64_array(negative_idx), 2: int64_array(0)},
+ {'name': node.soft_get('name', node.id) + '/BroadcastingDim'})
+
+ range_node = create_op_with_const_inputs(graph, Range,
+ {0: np.array(0, dtype=value.dtype),
+ 2: np.array(1, dtype=value.dtype)},
+ {'name': const_name + '/Range', 'dtype': value.dtype})
+
+ node.in_port(1).get_connection().add_destination(gather.in_port(0))
+ gather.out_port(0).connect(range_node.in_port(1))
+ node.in_port(0).get_connection().set_source(range_node.out_port(0))
+
+ if one_dims.size:
+ unsqueeze = create_op_node_with_second_input(graph, Unsqueeze, one_dims,
+ {'name': const_name + '/KeepShape'})
+ range_node.out_port(0).get_connection().insert_node(unsqueeze)
+ rename_nodes([(const, const_name + '/ToBeDeleted'), (unsqueeze, const_name)])
+ else:
+ rename_nodes([(const, const_name + '/ToBeDeleted'), (range_node, const_name)])
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.front.broadcast_with_range import ExpandRangeConstant
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect, \
+ regular_op_with_empty_data, connect_data
+
+
+class TestRangeBroadcast(unittest.TestCase):
+ def test_broadcast_with_range_positive_test(self):
+ graph = build_graph({
+ **regular_op_with_shaped_data('shape', [2], {'type': 'Parameter'}),
+ **valued_const_with_data('value', np.arange(0, 384).reshape((1, 384))),
+ **regular_op_with_empty_data('bc', {'type': 'Broadcast'}),
+ **result(),
+ }, [
+ *connect('value', '0:bc'),
+ *connect('shape', '1:bc'),
+ *connect('bc', 'output'),
+ ], nodes_with_edges_only=True)
+ ExpandRangeConstant().find_and_replace_pattern(graph)
+
+ graph_ref = build_graph({
+ **regular_op_with_shaped_data('shape', [2], {'type': 'Parameter'}),
+
+ # start
+ **valued_const_with_data('start', np.array(0)),
+ # limit
+ **valued_const_with_data('minus_one', np.array(-1)),
+ **valued_const_with_data('zero', np.array(0)),
+ **regular_op_with_empty_data('range_dim', {'type': 'Gather'}),
+ # delta
+ **valued_const_with_data('delta', np.array(1)),
+ **regular_op_with_empty_data('range', {'type': 'Range'}),
+
+ # keep dims
+ **valued_const_with_data('axes', np.array([0])),
+ **regular_op_with_empty_data('keep_shape', {'type': 'Unsqueeze'}),
+
+ **regular_op_with_empty_data('bc', {'type': 'Broadcast'}),
+ **result(),
+ }, [
+ *connect('start', '0:range'),
+ *connect('shape', '0:range_dim'),
+ *connect('minus_one', '1:range_dim'),
+ *connect('zero', '2:range_dim'),
+ *connect('range_dim', '1:range'),
+ *connect('delta', '2:range'),
+ *connect('range', '0:keep_shape'),
+ *connect('axes', '1:keep_shape'),
+ *connect('keep_shape', '0:bc'),
+ *connect_data('shape', '1:bc'),
+ *connect('bc', 'output'),
+ ], nodes_with_edges_only=True)
+
+ (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+ self.assertTrue(flag, resp)
+++ /dev/null
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import numpy as np
-
-from extensions.ops.Cast import Cast
-from extensions.ops.elementwise import Div
-from extensions.ops.transpose import Transpose
-from mo.front.common.partial_infer.utils import int64_array
-from mo.front.common.replacement import FrontReplacementPattern
-from mo.front.tf.graph_utils import create_op_node_with_second_input
-from mo.graph.graph import Graph, Node, rename_node
-from mo.ops.const import Const
-from mo.ops.reshape import Reshape
-from mo.ops.shape import Shape
-from mo.utils.shape import node_to_get_features_dimension_value, node_to_get_batch_value, \
- new_shape_node_from_shape_nodes
-
-
-class ShuffleChannel(FrontReplacementPattern):
- """
- Before:
- ShuffleChannel(group)
-
- After:
- Reshape[input_batch, group, input_channels/group, -1]
- \/
- Transpose[0, 2, 1, 3]
- \/
- Reshape[input_shape]
- """
- enabled = True
- graph_condition = [lambda graph: graph.graph['layout'] == 'NCHW']
-
- @staticmethod
- def decompose_shuffle_channel(node: Node):
- graph = node.graph
- name = node.soft_get('name', node.id)
-
- rename_node(node, name + '/to_be_removed')
-
- shape = Shape(graph, dict(name=name + '/InputShape')).create_node()
- shape.in_port(0).connect(node.in_port(0).get_source())
-
- # Reshape [input_batch, group, input_channels/group, -1]
- batch = node_to_get_batch_value(shape)
- group = Const(graph, dict(name=name + '/Rows', value=int64_array([node.group]))).create_node()
- const = Const(graph, dict(name=name + '/Const', value=int64_array([-1]))).create_node()
-
- input_channels = node_to_get_features_dimension_value(shape)
- output_channels = create_op_node_with_second_input(
- graph, Div, np.int64(node.group), {'name': name + '/Cols'}, input_node=input_channels)
- i_output_channels = Cast(graph, {'name': output_channels.name + '/Convert', 'dst_type': np.int64}).create_node()
- output_channels.out_port(0).connect(i_output_channels.in_port(0))
-
- reshape_split_dim = new_shape_node_from_shape_nodes([batch, group, i_output_channels, const])
- reshape_split_node = Reshape(graph, dict(name=name + '/Reshape_split_')).create_node()
- reshape_split_dim.out_port(0).connect(reshape_split_node.in_port(1))
-
- # Transpose(0, 2, 1, 3)
- transpose_node = create_op_node_with_second_input(
- graph, Transpose, int64_array([0, 2, 1, 3]), {'name': name + '/Transpose_'}, input_node=reshape_split_node)
-
- # Reshape back to input shape
- reshape_concat = Reshape(graph, dict(name=name)).create_node()
- rename_node(reshape_concat, name)
-
- shape.out_port(0).connect(reshape_concat.in_port(1))
- transpose_node.out_port(0).connect(reshape_concat.in_port(0))
-
- # Final connections
- node.in_port(0).get_connection().set_destination(reshape_split_node.in_port(0))
- node.out_port(0).get_connection().set_source(reshape_concat.out_port(0))
-
- def find_and_replace_pattern(self, graph: Graph):
- for shuffle_channel in graph.get_op_nodes(op='ShuffleChannel'):
- self.decompose_shuffle_channel(shuffle_channel)
+++ /dev/null
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-import unittest
-
-from extensions.front.caffe.ShuffleChannel import ShuffleChannel
-from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Node
-from mo.utils.ir_engine.compare_graphs import compare_graphs
-from mo.utils.unittest.graph import build_graph
-
-nodes_attributes = {
- 'placeholder': {'kind': 'op', 'op': 'Parameter', 'shape': int64_array([1, 48, 28, 28])},
- 'shuffle_channel': {'kind': 'op', 'op': 'ShuffleChannel', 'group': int64_array(2), 'name': 'scname'},
- 'result': {'kind': 'op', 'op': 'Result'},
-
- 'shape': {'op': 'ShapeOf', 'kind': 'op'},
- 'batch_gather': {'op': 'Gather', 'kind': 'op'},
- 'batch_gather_idx': {'value': int64_array([0]), 'kind': 'op', 'type': 'Const'},
- 'batch_gather_axis': {'value': int64_array(0), 'kind': 'op', 'type': 'Const'},
-
- 'group': {'value': int64_array([2]), 'kind': 'op', 'type': 'Const'},
-
- 'channel_gather': {'op': 'Gather', 'kind': 'op'},
- 'channel_gather_idx': {'value': int64_array([1]), 'kind': 'op', 'type': 'Const'},
- 'channel_gather_axis': {'value': int64_array(0), 'kind': 'op', 'type': 'Const'},
-
- 'output_channels': {'op': 'Div', 'kind': 'op'},
- 'div_group': {'value': int64_array([2]), 'kind': 'op', 'type': 'Const'},
- 'convert': {'op': 'Cast', 'kind': 'op'},
- 'const': {'value': int64_array([-1]), 'kind': 'op', 'type': 'Const'},
- 'concat': {'op': 'Concat', 'kind': 'op'},
- 'reshape_split': {'op': 'Reshape', 'kind': 'op'},
- 'transpose': {'op': 'Transpose', 'kind': 'op'},
- 'transpose_const': {'value': int64_array([0, 2, 1, 3]), 'kind': 'op', 'type': 'Const'},
- 'reshape_concat': {'op': 'Reshape', 'kind': 'op'}
-}
-
-
-class ShuffleChannelTests(unittest.TestCase):
- def test_1(self):
- graph = build_graph(nodes_attributes,
- [
- ('placeholder', 'shuffle_channel'),
- ('shuffle_channel', 'result')
- ],
- nodes_with_edges_only=True)
- graph.graph['layout'] = 'NCHW'
- graph.stage = 'front'
-
- ref_graph = build_graph(nodes_attributes,
- [
- ('placeholder', 'shape', {'in': 0, 'out': 0}),
-
- ('shape', 'batch_gather', {'in': 0, 'out': 0}),
- ('batch_gather_idx', 'batch_gather', {'in': 1, 'out': 0}),
- ('batch_gather_axis', 'batch_gather', {'in': 2, 'out': 0}),
-
- ('shape', 'channel_gather', {'in': 0, 'out': 0}),
- ('channel_gather_idx', 'channel_gather', {'in': 1, 'out': 0}),
- ('channel_gather_axis', 'channel_gather', {'in': 2, 'out': 0}),
-
- ('channel_gather', 'output_channels', {'in': 0, 'out': 0}),
- ('div_group', 'output_channels', {'in': 1, 'out': 0}),
- ('output_channels', 'convert', {'in': 0, 'out': 0}),
-
- ('batch_gather', 'concat', {'in': 0, 'out': 0}),
- ('group', 'concat', {'in': 1, 'out': 0}),
- ('convert', 'concat', {'in': 2, 'out': 0}),
- ('const', 'concat', {'in': 3, 'out': 0}),
-
- ('placeholder', 'reshape_split', {'in': 0, 'out': 0}),
- ('concat', 'reshape_split', {'in': 1, 'out': 0}),
-
- ('reshape_split', 'transpose', {'in': 0, 'out': 0}),
- ('transpose_const', 'transpose', {'in': 1, 'out': 0}),
-
- ('transpose', 'reshape_concat', {'in': 0, 'out': 0}),
- ('shape', 'reshape_concat', {'in': 1, 'out': 0}),
-
- ('reshape_concat', 'result')
- ],
- nodes_with_edges_only=True)
-
- ShuffleChannel().find_and_replace_pattern(graph)
- (flag, resp) = compare_graphs(graph, ref_graph, 'result', check_op_attrs=True)
- self.assertTrue(flag, resp)
- self.assertTrue(Node(graph, 'result').in_port(0).get_source().node.name == 'scname')
See the License for the specific language governing permissions and
limitations under the License.
"""
-from extensions.ops.shufflechannel import ShuffleChannelOp
+from extensions.ops.shufflechannel import ShuffleChannels
from mo.front.caffe.collect_attributes import collect_attributes
from mo.front.common.extractors.utils import layout_attrs
from mo.front.extractor import FrontExtractorOp
mapping_rule.update(layout_attrs())
# update the attributes of the node
- ShuffleChannelOp.update_node_stat(node, mapping_rule)
+ ShuffleChannels.update_node_stat(node, mapping_rule)
return cls.enabled
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.Cast import Cast
+from extensions.ops.cumsum import CumSum
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs, mxnet_str_dtype_to_np
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Graph, rename_node, Node
+from mo.ops.const import Const
+
+
+class CumSumFrontReplacer(FrontReplacementOp):
+ op = 'MXNetCumSum'
+ enabled = True
+
+ def replace_op(self, graph: Graph, node: Node):
+ name = node.soft_get('name', node.id)
+ axis = node.soft_get('axis', 0)
+
+ rename_node(node=node, name=name + '/to_be_removed')
+ cumsum_node = create_op_node_with_second_input(graph, CumSum, int64_array(axis),
+ {'name': name, 'reverse': False, 'exclusive': False})
+ rename_node(cumsum_node, name)
+
+ node.in_port(0).get_connection().set_destination(cumsum_node.in_port(0))
+ if node.has_valid('mx_out_type') and node['mx_out_type'] is not None:
+ rename_node(node=cumsum_node, name=name + '/Clamp')
+ convert = Cast(graph, {'name': name, 'dst_type': node['mx_out_type']}).create_node()
+ rename_node(convert, name)
+ cumsum_node.out_port(0).connect(convert.in_port(0))
+ return [convert.id]
+ else:
+ return [cumsum_node.id]
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+from extensions.ops.cumsum import MXNetCumSum
+from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs, mxnet_str_dtype_to_np
+
+
+class CumSumExtractor(FrontExtractorOp):
+ op = '_np_cumsum'
+ enabled = True
+
+ @classmethod
+ def extract(cls, node):
+ attrs = get_mxnet_layer_attrs(node.symbol_dict)
+
+ update_attrs = {
+ 'axis': attrs.int('axis', 0),
+ 'mx_out_type': attrs.dtype('dtype', None)
+ }
+
+ MXNetCumSum.update_node_stat(node, update_attrs)
+ return cls.enabled
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.cumsum import CumSum
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class CumSumFrontExtractor(FrontExtractorOp):
+ op = 'CumSum'
+ enabled = True
+
+ @classmethod
+ def extract(cls, node):
+ exclusive = onnx_attr(node, 'exclusive', 'i', 0)
+ reverse = onnx_attr(node, 'reverse', 'i', 0)
+ CumSum.update_node_stat(node, {'exclusive': exclusive, 'reverse': reverse})
+ return cls.enabled
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.range import Range
+from mo.front.extractor import FrontExtractorOp
+from mo.graph.graph import Node
+
+
+class RangeFrontExtractor(FrontExtractorOp):
+ op = 'Range'
+ enabled = True
+
+ @classmethod
+ def extract(cls, node: Node):
+ Range.update_node_stat(node, {})
+ return cls.enabled
+
--- /dev/null
+"""
+ Copyright (C) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from extensions.ops.non_zero import NonZero
+from extensions.ops.transpose import Transpose
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementOp
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Node, Graph, rename_nodes
+
+
+class WhereDecomposition(FrontReplacementOp):
+ """
+ This transformation decomposes the TF layer Where (when x = None, y = None) using the formula
+ Where(condition) = Transpose(NonZero(condition), [1, 0])
+ """
+ op = 'Where'
+ enabled = True
+
+ def run_after(self):
+ from extensions.front.tf.sparse_weighted_sum import ExperimentalSparseWeightedSumFrontReplacer
+ from extensions.front.TransposeOrderNormalizer import TransposeOrderNormalizer
+ return [ExperimentalSparseWeightedSumFrontReplacer, TransposeOrderNormalizer]
+
+ def replace_op(self, graph: Graph, node: Node):
+ node_name = node.soft_get('name', node.id)
+ non_zero_node = NonZero(graph, {'name': node_name + '/NonZero_', 'output_type': np.int64}).create_node()
+ transpose_node = create_op_node_with_second_input(graph, Transpose, int64_array([1, 0]), op_attrs={})
+ non_zero_node.out_port(0).connect(transpose_node.in_port(0))
+ rename_nodes([(node, node_name + '/delete'), (transpose_node, node_name)])
+
+ non_zero_node.in_port(0).connect(node.in_port(0).get_source())
+ return [transpose_node.id]
--- /dev/null
+"""
+ Copyright (C) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+
+import unittest
+
+import numpy as np
+
+from generator import generator, generate
+
+from extensions.front.tf.WhereDecomposition import WhereDecomposition
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
+
+
+graph_node_attrs = {
+ 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+ 'placeholder_data': {
+ 'value': None,
+ 'shape': None,
+ 'kind': 'data',
+ 'data_type': None
+ },
+ 'tf_where': {'op': 'Where', 'kind': 'op'},
+ 'tf_where_data': {'kind': 'data'},
+ 'output': {'kind': 'op', 'op': 'Result'},
+}
+
+
+graph_edges = [
+ ('placeholder', 'placeholder_data'),
+ ('placeholder_data', 'tf_where'),
+ ('tf_where', 'tf_where_data'),
+ ('tf_where_data', 'output'),
+]
+
+
+ref_graph_node_attrs = {
+ 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
+ 'placeholder_data': {
+ 'value': None,
+ 'shape': None,
+ 'kind': 'data',
+ 'data_type': None
+ },
+ 'non_zero': {'kind': 'op', 'op': 'NonZero', 'output_type': np.int64},
+ 'non_zero_data': {'kind': 'data'},
+ 'transpose': {'kind': 'op', 'op': 'Transpose'},
+ 'transpose_data': {'kind': 'data'},
+ 'perm_const': {'kind': 'op', 'op': 'Const', 'shape': [2], 'value': int64_array([1, 0])},
+ 'perm_const_data': {'kind': 'data', 'shape': [2], 'value': int64_array([1, 0])},
+ 'output': {'kind': 'op', 'op': 'Result'},
+}
+
+ref_graph_edges = [
+ ('placeholder', 'placeholder_data'),
+ ('placeholder_data', 'non_zero'),
+ ('non_zero', 'non_zero_data'),
+ ('non_zero_data', 'transpose', {'in': 0}),
+ ('perm_const', 'perm_const_data'),
+ ('perm_const_data', 'transpose', {'in': 1}),
+ ('transpose', 'transpose_data'),
+ ('transpose_data', 'output'),
+]
+
+
+@generator
+class TFWhereDecompositionTest(unittest.TestCase):
+ @generate(*[[1, 100, 120, 150], [16, 125, 14]])
+ def test_1(self, input_shape):
+ in_shape = int64_array(input_shape)
+ graph = build_graph(graph_node_attrs,
+ graph_edges,
+ update_attributes={
+ 'placeholder_data': {'shape': in_shape}
+ })
+ WhereDecomposition().find_and_replace_pattern(graph)
+ ref_graph = build_graph(ref_graph_node_attrs,
+ ref_graph_edges,
+ update_attributes={
+ 'placeholder_data': {'shape': in_shape}
+ })
+ (flag, resp) = compare_graphs(graph, ref_graph, 'output')
+ self.assertTrue(flag, resp)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.cumsum import CumSum
+from mo.front.extractor import FrontExtractorOp
+
+
+class CumSumExtractor(FrontExtractorOp):
+ op = 'Cumsum'
+ enabled = True
+
+ @classmethod
+ def extract(cls, node):
+ exclusive = node.pb.attr['exclusive'].b
+ reverse = node.pb.attr['reverse'].b
+ CumSum.update_node_stat(node, {'exclusive': exclusive, 'reverse': reverse})
+ return cls.enabled
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.range import Range
+from mo.front.extractor import FrontExtractorOp
+from mo.front.tf.extractors.utils import tf_dtype_extractor
+from mo.graph.graph import Node
+
+
+class RangeFrontExtractor(FrontExtractorOp):
+ op = 'Range'
+ enabled = True
+
+ @classmethod
+ def extract(cls, node: Node):
+ Range.update_node_stat(node, {'dtype': tf_dtype_extractor(node.pb.attr['type'].type)})
+ return cls.enabled
+
limitations under the License.
"""
-import logging as log
-
import numpy as np
from mo.graph.graph import Node, Graph
for flag in flags:
node[flag] = False
- def find_and_replace_pattern(self, graph: Graph):
+ def mark_eltwise_node(self, node, feature_channel=None):
+ tensor_port, value_port = get_tensor_in_port(node), get_value_in_port(node)
+ if tensor_port is None or value_port is None:
+ self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
+ return
+
+ connected_in_ports = {idx: port for idx, port in node.in_ports().items() if not port.disconnected()}
+ if len(connected_in_ports) != 2:
+ return
+
+ tensor_shape = tensor_port.data.get_shape()
+ out_shape = node.out_port(0).data.get_shape()
+ assert tensor_shape is not None and out_shape is not None
+ if not np.array_equal(tensor_shape, out_shape):
+ # ScaleShift operation doesn't support broadcasting
+ self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
+ return
+
+ value_shape = value_port.data.get_shape()
+ assert value_shape is not None
+ assert len(value_shape) <= len(tensor_shape), \
+ "No broadcasting was done for elementwise node {} due to previous checks in EltwiseChecker class. " \
+ "But constant input rank is larger than tensor input rank, that is inconsistent".format(node.name)
+
+ # if both tensors are 0D they cannot be converted to scaleshift
+ if len(tensor_shape) == 0 and len(value_shape) == 0:
+ self.set_flags_to_false(node, ['can_be_scaleshift'])
+ return
+
+ broadcasted_value_shape = np.insert(value_shape, 0, [1] * (len(tensor_shape) - len(value_shape)))
+
+ feature_dim = min(1, tensor_shape.size - 1) if node.graph.graph['layout'] == 'NCHW' else -1
+ if feature_channel is not None:
+ feature_dim = feature_channel
+ ones = np.ones(len(tensor_shape))
+ possible_shape = ones.copy()
+ np.put(possible_shape, feature_dim, tensor_shape.item(feature_dim))
+
+ if not np.array_equal(broadcasted_value_shape, ones) and \
+ not np.array_equal(broadcasted_value_shape, possible_shape):
+ # ScaleShift weights should have [1,C,1,1]-like or [1,1,1,1]-like shape
+ self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
+ return
+
+ if len(tensor_shape) not in [2, 4, 5]:
+ # ScaleShift operation is supported for 2D, 4D or 5D tensor inputs
+ self.set_flags_to_false(node, ['can_be_scaleshift'])
+ return
+
+ def find_and_replace_pattern(self, graph: Graph, feature_channel=None):
for node in graph.get_op_nodes(is_eltwise=True):
- log.debug('Checking eltwise op {}'.format(node.soft_get('name', node.id)))
- tensor_port, value_port = get_tensor_in_port(node), get_value_in_port(node)
- if tensor_port is None or value_port is None:
- self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
- continue
-
- connected_in_ports = {idx: port for idx, port in node.in_ports().items() if not port.disconnected()}
- if len(connected_in_ports) != 2:
- continue
-
- tensor_shape = tensor_port.data.get_shape()
- out_shape = node.out_port(0).data.get_shape()
- assert tensor_shape is not None and out_shape is not None
- if not np.array_equal(tensor_shape, out_shape):
- # ScaleShift operation doesn't support broadcasting
- self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
- continue
-
- value_shape = value_port.data.get_shape()
- assert value_shape is not None
- assert len(value_shape) <= len(tensor_shape), \
- "No broadcasting was done for elementwise node {} due to previous checks in EltwiseChecker class. " \
- "But constant input rank is larger than tensor input rank, that is inconsistent".format(node.name)
-
- # if both tensors are 0D they cannot be converted to scaleshift
- if len(tensor_shape) == 0 and len(value_shape) == 0:
- self.set_flags_to_false(node, ['can_be_scaleshift'])
- continue
-
- broadcasted_value_shape = np.insert(value_shape, 0, [1] * (len(tensor_shape) - len(value_shape)))
-
- feature_dim = min(1, tensor_shape.size - 1) if node.graph.graph['layout'] == 'NCHW' else -1
- ones = np.ones(len(tensor_shape))
- possible_shape = ones.copy()
- np.put(possible_shape, feature_dim, tensor_shape.item(feature_dim))
-
- if not np.array_equal(broadcasted_value_shape, ones) and \
- not np.array_equal(broadcasted_value_shape, possible_shape):
- # ScaleShift weights should have [1,C,1,1]-like or [1,1,1,1]-like shape
- self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift'])
- continue
-
- if len(tensor_shape) not in [2, 4, 5]:
- # ScaleShift operation is supported for 2D, 4D or 5D tensor inputs
- self.set_flags_to_false(node, ['can_be_scaleshift'])
- continue
+ self.mark_eltwise_node(node)
See the License for the specific language governing permissions and
limitations under the License.
"""
+import numpy as np
+
from extensions.middle.BinarizeWeightsM1P1 import BinarizeWeightsM1P1
from extensions.middle.DeleteControlFlowEdges import DeleteControlFlowEdges
from extensions.middle.EltwiseChecker import EltwiseChecker
from mo.graph.graph import Graph
+from mo.middle.passes.fusing.helpers import get_value_in_port
from mo.middle.replacement import MiddleReplacementPattern
def run_before(self):
return []
+ @staticmethod
+ def mark_fusable_muls_on_weights(graph):
+ for node in graph.get_op_nodes(op='Mul'):
+ children = node.out_port(0).get_destinations()
+ if len(children) > 1 or children[0].node.soft_get('type') not in ['Convolution', 'Deconvolution', 'MatMul']:
+ continue
+ value_in_port = get_value_in_port(node)
+ if value_in_port is None:
+ continue
+ value_shape = value_in_port.data.get_shape()
+ non_one_axis = np.argwhere(value_shape != 1)
+ if non_one_axis.size != 1:
+ continue
+ non_one_axis = non_one_axis.item(0)
+ node['can_be_fused'] = True
+ EltwiseChecker().mark_eltwise_node(node, non_one_axis)
+
def find_and_replace_pattern(self, graph: Graph):
# to prevent fusing of non per channel lin ops, we run EltwiseChecker to mark nodes with can_be_fused attribute
EltwiseChecker().find_and_replace_pattern(graph)
+ self.mark_fusable_muls_on_weights(graph)
eltwise_nodes = graph.get_op_nodes(op='Mul', can_be_fused=True) + \
graph.get_op_nodes(op='Sub', can_be_fused=True) + \
graph.get_op_nodes(op='Add', can_be_fused=True)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+def cumsum(a, axis=None, exclusive=False, reverse=False):
+ if reverse:
+ a = np.flip(a, axis)
+ res = np.cumsum(a, axis=axis)
+ if exclusive:
+ res -= a
+ if reverse:
+ res = np.flip(res, axis)
+ return res
+
+
+class CumSum(Op):
+ enabled = False
+ op = 'CumSum'
+ version = 'opset3'
+
+ def __init__(self, graph: Graph, attrs: dict):
+ super().__init__(graph, {
+ 'op': self.op,
+ 'type': self.op,
+ 'version': self.version,
+
+ 'infer': self.infer,
+
+ 'in_ports_count': 2,
+ 'out_ports_count': 1,
+ }, attrs)
+
+ def supported_attrs(self):
+ return ["exclusive", "reverse"]
+
+ @staticmethod
+ def infer(node: Node):
+ node_name = node.soft_get('name', node.id)
+
+ input_shape = node.in_port(0).data.get_shape()
+ assert input_shape is not None, 'Input shape is None for node "{}"'.format(node_name)
+ if not node.in_port(1).disconnected():
+ assert len(node.in_port(1).data.get_shape()) == 0, 'Axis is not scalar for node: {}'.format(node_name)
+
+ node.out_port(0).data.set_shape(input_shape.copy())
+
+ input_value = node.in_port(0).data.get_value()
+ if input_value is not None:
+ axis = None if node.in_port(1).disconnected() else node.in_port(1).data.get_value()
+ reverse = node.reverse if node.has_valid('reverse') else False
+ exclusive = node.exclusive if node.has_valid('exclusive') else False
+ node.out_port(0).data.set_value(cumsum(input_value, axis=axis, reverse=reverse, exclusive=exclusive))
+
+
+class MXNetCumSum(Op):
+ enabled = False
+ op = 'MXNetCumSum'
+
+ def __init__(self, graph: Graph, attrs: dict):
+ super().__init__(graph, {
+ 'op': self.op,
+ 'type': None,
+
+ 'infer': None,
+
+ 'in_ports_count': 1,
+ 'out_ports_count': 1,
+ }, attrs)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.cumsum import CumSum
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph, valued_const_with_data, regular_op_with_shaped_data, result, connect
+
+nodes_attributes = {
+ **regular_op_with_shaped_data('data', [1, 3, 224, 224], {'type': 'Parameter', 'value': None,
+ '_out_port_data_type': {0: np.float32}}),
+ **valued_const_with_data('axis', int64_array(0)),
+ **regular_op_with_shaped_data('cumsum', None, {'op': 'CumSum', 'type': 'CumSum', 'name': 'cumsum'}),
+ **regular_op_with_shaped_data('identity', None, {'op': 'Identity', 'name': 'identity'}),
+ **result('output'),
+}
+
+
+class TestCumSum(unittest.TestCase):
+ def test_cumsum_axis(self):
+ graph = build_graph(nodes_attributes,
+ [*connect('data', '0:cumsum'),
+ *connect('axis', '1:cumsum'),
+ *connect('cumsum', '0:identity'),
+ ('identity', 'identity_d', {'out': 0}),
+ ('identity_d', 'output'),
+ ],
+ {'cumsum': {'reverse': False, 'exclusive': False}
+ }, nodes_with_edges_only=True)
+
+ cumsum_node = Node(graph, 'cumsum')
+ CumSum.infer(cumsum_node)
+ self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_shape(), int64_array([1, 3, 224, 224])))
+
+ def test_cumsum_value_prop(self):
+ graph = build_graph(nodes_attributes,
+ [*connect('data', '0:cumsum'),
+ *connect('axis', '1:cumsum'),
+ ('cumsum', 'cumsum_d', {'out': 0}),
+ ('cumsum_d', 'output'),
+ ],
+ {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]},
+ 'cumsum': {'reverse': False, 'exclusive': False}
+ }, nodes_with_edges_only=True)
+
+ cumsum_node = Node(graph, 'cumsum')
+ CumSum.infer(cumsum_node)
+ self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+ np.array([1., 3., 6., 10., 15.]).astype(np.float32)))
+
+ def test_cumsum_value_prop_exclusive(self):
+ graph = build_graph(nodes_attributes,
+ [*connect('data', '0:cumsum'),
+ *connect('axis', '1:cumsum'),
+ ('cumsum', 'cumsum_d', {'out': 0}),
+ ('cumsum_d', 'output'),
+ ],
+ {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]},
+ 'cumsum': {'reverse': False, 'exclusive': True}
+ }, nodes_with_edges_only=True)
+
+ cumsum_node = Node(graph, 'cumsum')
+ CumSum.infer(cumsum_node)
+ self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+ np.array([0., 1., 3., 6., 10.]).astype(np.float32)))
+
+ def test_cumsum_value_prop_reverse(self):
+ graph = build_graph(nodes_attributes,
+ [*connect('data', '0:cumsum'),
+ *connect('axis', '1:cumsum'),
+ ('cumsum', 'cumsum_d', {'out': 0}),
+ ('cumsum_d', 'output'),
+ ],
+ {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]},
+ 'cumsum': {'reverse': True, 'exclusive': False}
+ }, nodes_with_edges_only=True)
+
+ cumsum_node = Node(graph, 'cumsum')
+ CumSum.infer(cumsum_node)
+ self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+ np.array([15., 14., 12., 9., 5.]).astype(np.float32)))
+
+ def test_cumsum_value_prop_exclusive_reverse(self):
+ graph = build_graph(nodes_attributes,
+ [*connect('data', '0:cumsum'),
+ *connect('axis', '1:cumsum'),
+ ('cumsum', 'cumsum_d', {'out': 0}),
+ ('cumsum_d', 'output'),
+ ],
+ {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]},
+ 'cumsum': {'reverse': True, 'exclusive': True}
+ }, nodes_with_edges_only=True)
+
+ cumsum_node = Node(graph, 'cumsum')
+ CumSum.infer(cumsum_node)
+ self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+ np.array([14., 12., 9., 5., 0.]).astype(np.float32)))
+
+ def test_cumsum_value_prop_axis_1(self):
+ graph = build_graph(nodes_attributes,
+ [*connect('data', '0:cumsum'),
+ *connect('axis', '1:cumsum'),
+ ('cumsum', 'cumsum_d', {'out': 0}),
+ ('cumsum_d', 'output'),
+ ],
+ {'data_d': {'value': np.array([[1., 2., 3.], [4., 5., 6.]]).astype(np.float32),
+ 'shape': [2, 3]},
+ 'axis_d': {'value': int64_array(1),
+ 'shape': []},
+ 'cumsum': {'reverse': False, 'exclusive': False}
+ }, nodes_with_edges_only=True)
+
+ cumsum_node = Node(graph, 'cumsum')
+ CumSum.infer(cumsum_node)
+ self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(),
+ np.array([[1., 3., 6.], [4., 9., 15.]]).astype(np.float32)))
class Range(Op):
+ """
+ Some notes on the automatic result data type infer. The tf.range does is differently than np.arange. Numpy
+ by default creates array with elements of type int64 and float64, but TF does not widen data types and
+ keep them int32 and float32.
+ Compare:
+
+ >>> tf.range(1, 5, 0.5)
+ <tf.Tensor 'range_1:0' shape = (8,) dtype = float32>
+ >>> tf.range(1, 5, 2)
+ <tf.Tensor 'range_2:0' shape = (2,) dtype = int32>
+
+ >>> np.array([0.5], dtype=np.float32)
+ array([0.5], dtype=float32)
+ >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([2], dtype=np.int32)).dtype
+ dtype('int64')
+ >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([0.5], dtype=np.float32)).dtype
+ dtype('float64')
+ """
op = 'Range'
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
- 'op': __class__.op,
+ 'type': self.op,
+ 'op': self.op,
+
'version': 'opset1',
+ 'infer': self.infer,
+
'in_ports_count': 3,
'out_ports_count': 1,
- 'infer': __class__.infer,
}
super().__init__(graph, mandatory_props, attrs)
@staticmethod
def infer(node: Node):
- start = node.in_node(0)
- limit = node.in_node(1)
- delta = node.in_node(2)
- output = node.out_node()
-
- if not start.has_valid('value') or not limit.has_valid('value') or not delta.has_valid('value'):
- log.error("Range operation is supported with constant inputs only")
- return
- if node.has_valid('pb') and 'type' in node.pb.attr:
- from mo.front.tf.extractors.utils import tf_dtype_extractor
- result_data_type = tf_dtype_extractor(node.pb.attr["type"].type)
- elif node.has_valid('dtype'):
- result_data_type = node.dtype
- else:
- result_data_type = start.value.dtype
- output.value = np.arange(start.value, limit.value, delta.value, dtype=result_data_type)
- output.shape = np.array(output.value.shape, dtype=np.int64)
-
- # Some notes on the automatic result data type infer. The tf.range does is differently than np.arange. Numpy
- # by default creates array with elements of type int64 and float64, but TF does not widen data types and keep them
- # int32 and float32.
- # Compare:
-
- # >>> tf.range(1, 5, 0.5)
- # <tf.Tensor 'range_1:0' shape = (8,) dtype = float32>
- # >>> tf.range(1, 5, 2)
- # <tf.Tensor 'range_2:0' shape = (2,) dtype = int32>
-
- # >>> np.array([0.5], dtype=np.float32)
- # array([0.5], dtype=float32)
- # >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([2], dtype=np.int32)).dtype
- # dtype('int64')
- # >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([0.5], dtype=np.float32)).dtype
- # dtype('float64')
+ name = node.soft_get('name', node.id)
+ connected_input_ports = [in_port.idx for in_port in node.in_ports().values() if not in_port.disconnected()]
+ assert len(connected_input_ports) == 3 and [0, 1, 2] == sorted(connected_input_ports), \
+ 'Range operation should have 3 inputs, {} found for {}'.format(len(connected_input_ports), name)
+
+ start = node.in_port(0).data.get_value()
+ limit = node.in_port(1).data.get_value()
+ delta = node.in_port(2).data.get_value()
+
+ assert start is not None and limit is not None and delta is not None, \
+ 'Range operation {} with dynamic inputs is not supported'.format(name)
+ node.out_port(0).data.set_value(np.arange(start, limit, delta, dtype=node.soft_get('dtype', start.dtype)))
See the License for the specific language governing permissions and
limitations under the License.
"""
-
-from mo.graph.graph import Graph
+from mo.graph.graph import Graph, Node
from mo.ops.op import Op
-class ShuffleChannelOp(Op):
- op = 'ShuffleChannel'
+class ShuffleChannels(Op):
+ op = 'ShuffleChannels'
enabled = False
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
'op': self.op,
- 'type': None,
+ 'type': self.op,
+ 'version': 'opset3',
+
+ 'infer': self.infer,
- # operation should be resolved on the front phase, partial inference is not needed
- 'infer': None,
+ 'axis': 1,
+ 'group': None,
'in_ports_count': 1,
'out_ports_count': 1,
}, attrs)
+
+ def backend_attrs(self):
+ return ['group', 'axis']
+
+ @staticmethod
+ def infer(node: Node):
+ node_name = node.soft_get('name', node.id)
+ assert node.soft_get('group') is not None, 'The attribute "group" must be set for node {}'.format(node_name)
+ node.out_port(0).data.set_shape(node.in_port(0).data.get_shape())
"""
import mxnet as mx
+import numpy as np
from extensions.ops.elementwise import Elementwise
from mo.graph.graph import Node, Graph
return self._dict[key]
return default
+ def dtype(self, key, default=None):
+ if self.is_valid and key in self._dict:
+ return mxnet_str_dtype_to_np(self._dict[key])
+ return default
+
def bool(self, key, default=None):
attr = self.str(key, default)
if isinstance(attr, str):
return json_dic[attr]
-def load_params(input_model, data_names = ('data',)):
+def load_params(input_model, data_names=('data',)):
arg_params = {}
aux_params = {}
arg_keys = []
if file_format == 'params':
for key in loaded_weight:
keys = key.split(':')
- if len(keys)>1 and 'aux' == keys[0]:
+ if len(keys) > 1 and 'aux' == keys[0]:
aux_keys.append(keys[1])
aux_params[keys[1]] = loaded_weight[key]
- elif len(keys)>1 and 'arg' == keys[0]:
+ elif len(keys) > 1 and 'arg' == keys[0]:
arg_keys.append(keys[1])
arg_params[keys[1]] = loaded_weight[key]
else:
lin_node.in_port(1).get_connection().set_source(scalar_value.out_port(0))
node.out_port(0).get_connection().set_source(lin_node.out_port(0))
return lin_node
+
+
+MXNET_DATA_TYPES = {
+ 'float16': np.float16,
+ 'float32': np.float32,
+ 'float64': np.float64,
+ 'int8': np.int8,
+ 'int32': np.int32,
+ 'int64': np.int64,
+}
+
+
+def mxnet_str_dtype_to_np(dtype: str):
+ return MXNET_DATA_TYPES[dtype]
@staticmethod
def infer(node: Node):
+ input_shape = node.in_port(0).data.get_shape()
axis = None
steps = None
if len(node.in_nodes()) == 1:
end = start + size
axis = None
+ # Check for situation when size[i] == -1 in TF
+ for i in range(start.size):
+ if end[i] < start[i]:
+ end[i] = input_shape[i]
+
# Delete edges to start, size nodes
node.graph.remove_edge(node.in_node(1).id, node.id)
node.graph.remove_edge(node.in_node(2).id, node.id)
log.warning('Incorrect number of input nodes in slice operation')
return
- input_shape = node.in_node(0).shape
- # Check for situation when size[i] == -1 in TF
- for i in range(start.size):
- if end[i] < start[i]:
- end[i] = input_shape[i]
# Update end param
node.end = end
value = node.in_node(0).value
- # If value is None create dummy vaue for shape propogation
+ # If value is None create dummy value for shape propagation
if value is None:
value = np.zeros(input_shape)
# Ranged for output value for specified axis
slice_idx[axis[id]] = slice(start[id], end[id], steps[id])
- # TODO: check whether this check is really important
for axis, s in enumerate(slice_idx):
if s is None:
slice_idx[axis] = slice(0, input_shape[axis], 1)
'shape': None,
'value': None,
},
+ 'starts': {
+ 'kind': 'data',
+ 'shape': None,
+ 'value': None,
+ },
+ 'ends': {
+ 'kind': 'data',
+ 'shape': None,
+ 'value': None,
+ },
'slice': {
'op': 'Slice',
'axis': None,
self.assertTrue(np.array_equal(slice_node['slices'], np.array([slice(1, 4, 1), slice(2, 3, 1), slice(0, 6, 1)])))
def test_slice_infer_multiply_params(self):
- # Test case when size[i] == -1 (that means all
+ # Test case for TF when size[i] == -1 (that means all
# remaining elements in dimension i are included in the slice)
graph = build_graph(nodes_attributes,
[('data_1', 'slice'),
self.assertTrue(np.array_equal(slice_node.out_node().value, None))
self.assertTrue(np.array_equal(slice_node.out_node().shape, np.array([3, 3, 6])))
self.assertTrue(np.array_equal(slice_node['slices'], np.array([slice(1, 4, 1), slice(2, 5, 1), slice(0, 6, 1)])))
+
+ def test_slice_onnx_10_opset_case(self):
+ # check for negative end value in the case of ONNX 10 opset
+ input = np.array([[4, 5, 6, 7], [2, 3, 5, 6], [5, 6, 8, 9], [5, 6, 8, 9]])
+ starts = np.array([0, 1])
+ ends = np.array([3, -2])
+ expected_values = np.array([[5], [3], [6]])
+
+ graph = build_graph(nodes_attributes,
+ [('data_1', 'slice'),
+ ('starts', 'slice'),
+ ('ends', 'slice'),
+ ('slice', 'data_2')],
+ {'data_1': {'value': input, 'shape': input.shape},
+ 'starts': {'value': starts, 'shape': starts.shape},
+ 'ends': {'value': ends, 'shape': ends.shape},
+ 'slice': {'format': 'onnx'}})
+
+ slice_node = Node(graph, 'slice')
+
+ Slice.infer(slice_node)
+ self.assertTrue(np.array_equal(slice_node.out_node().value, expected_values))
About nGraph Compiler stack
===========================
-nGraph Compiler stack architecture
-----------------------------------
-
-The diagram below represents our current release stack. In the diagram,
-nGraph components are colored in gray. Please note
-that the stack diagram is simplified to show how nGraph executes deep
-learning workloads with two hardware backends; however, many other
-deep learning frameworks and backends currently are functioning.
-
-![](doc/sphinx/source/graphics/ngraph_arch_diag.png)
-
-
## Bridge
Starting from the top of the stack, nGraph receives a computational graph
to MXNet* or ONNX* implementations of ResNet.
-## Hybrid Transformer
-
-Hybrid transformer takes the nGraph IR, and partitions it into
-subgraphs, which can then be assigned to the best-performing backend.
-There are two hardware backends shown in the stack diagram to demonstrate
-this graph partitioning. The Hybrid transformer assigns complex operations
-(subgraphs) to Intel® Nervana™ Neural Network Processor (NNP) to expedite the
-computation, and the remaining operations default to CPU. In the future,
-we will further expand the capabilities of Hybrid transformer
-by enabling more features, such as localized cost modeling and memory
-sharing.
-
-Once the subgraphs are assigned, the corresponding backend will
-execute the IR.
-
Features
--------
available device.
- **Data reuse** -- Save results and reuse for subgraphs with the
same input.
-- **Graph scheduling** -- Run similar subgraphs in parallel via
- multi-threading.
-- **Graph partitioning** -- Partition subgraphs to run on different
- devices to speed up computation; make better use of spare CPU cycles
- with nGraph.
-- **Memory management** -- Prevent peak memory usage by intercepting
- a graph with or by a "saved checkpoint," and to enable data auditing.
-
-Limitations
------------
-
-The Beta release of nGraph only supports Just-In-Time (JiT) compilation;
-Ahead-of Time (AoT) compilation will be supported in the official release.
-nGraph currently has limited support for dynamic shapes.
-
-
-Current nGraph Compiler full stack
-----------------------------------
-
-![](doc/sphinx/source/graphics/ngraph_full_stack_diagrams.png)
-
option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE)
option(NGRAPH_TEST_UTIL_ENABLE "Control the building of test utility" TRUE)
-option(NGRAPH_DOC_BUILD_ENABLE "Control the building of documentation" FALSE)
option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" FALSE)
option(NGRAPH_DEPRECATED_ENABLE "Enable compiler deprecation pragmas for deprecated APIs (recommended only for development use)" FALSE)
NORMALIZE_BOOL(NGRAPH_UNIT_TEST_ENABLE)
NORMALIZE_BOOL(NGRAPH_TEST_UTIL_ENABLE)
-NORMALIZE_BOOL(NGRAPH_DOC_BUILD_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE)
NORMALIZE_BOOL(NGRAPH_DEBUG_ENABLE)
NORMALIZE_BOOL(NGRAPH_DEPRECATED_ENABLE)
message(STATUS "NGRAPH_CODE_COVERAGE_ENABLE: ${NGRAPH_CODE_COVERAGE_ENABLE}")
message(STATUS "NGRAPH_DEBUG_ENABLE: ${NGRAPH_DEBUG_ENABLE}")
message(STATUS "NGRAPH_DEPRECATED_ENABLE: ${NGRAPH_DEPRECATED_ENABLE}")
-message(STATUS "NGRAPH_DOC_BUILD_ENABLE: ${NGRAPH_DOC_BUILD_ENABLE}")
message(STATUS "NGRAPH_DYNAMIC_COMPONENTS_ENABLE: ${NGRAPH_DYNAMIC_COMPONENTS_ENABLE}")
message(STATUS "NGRAPH_EXPORT_TARGETS_ENABLE: ${NGRAPH_EXPORT_TARGETS_ENABLE}")
message(STATUS "NGRAPH_IE_ENABLE: ${NGRAPH_IE_ENABLE}")
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${CMAKE_POSITION_INDEPENDENT_CODE}
)
+if(CMAKE_TOOLCHAIN_FILE)
+ set(NGRAPH_FORWARD_CMAKE_ARGS
+ ${NGRAPH_FORWARD_CMAKE_ARGS}
+ -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
+ )
+endif()
+
if (CMAKE_OSX_SYSROOT)
set(NGRAPH_FORWARD_CMAKE_ARGS
${NGRAPH_FORWARD_CMAKE_ARGS}
add_subdirectory(test)
-if (NGRAPH_DOC_BUILD_ENABLE)
- add_subdirectory(doc)
-endif()
-
if (NGRAPH_PYTHON_BUILD_ENABLE)
add_subdirectory(python)
endif()
+++ /dev/null
-Contributor Guidelines
-======================
-
-The latest version of this file can be found at:
-
-https://www.ngraph.ai/documentation/contributing/guide
-
-License
--------
-
-All contributed code must be compatible with the [Apache
-2](https://www.apache.org/licenses/LICENSE-2.0) license, preferably by
-being contributed under the Apache 2 license. Code contributed with
-another license will need the license reviewed by Intel before it can be
-accepted.
-
-Code formatting
----------------
-
-All C/C++ source code in the repository, including the test code, must
-adhere to the source-code formatting and style guidelines described
-here. The coding style described here applies to the nGraph repository.
-Related repositories may make adjustements to better match the coding
-styles of libraries they are using.
-
-### Adding ops to nGraph Core
-
-Our design philosophy is that the graph is not a script for running
-optimized kernels; rather, the graph is a specification for a
-computation composed of basic building blocks which we call `ops`.
-Compilation should match groups of `ops` to appropriate optimal and
-semantically-equivalent groups of kernels for the backend(s) in use.
-Thus, we expect that adding of new Core ops should be infrequent and
-that most functionality instead gets added with new functions that build
-sub-graphs from existing core ops.
-
-### Coding style
-
-We have a coding standard to help us to get development done. If part of
-the standard is impeding progress, we either adjust that part or remove
-it. To this end, we employ coding standards that facilitate
-understanding of *what nGraph components are doing*. Programs are
-easiest to understand when they can be understood locally; if most local
-changes have local impact, you do not need to dig through multiple files
-to understand what something does and if it is safe to modify.
-
-#### Names
-
-Names should *briefly* describe the thing being named and follow these
-casing standards:
-
-- Define C++ class or type names with `CamelCase`.
-- Assign template parameters with `UPPER_SNAKE_CASE`.
-- Case variable and function names with `lower_snake_case`.
-
-Method names for basic accessors are prefixed by `get_`, `is_`, or
-`set_` and should have simple $\mathcal{O}(1)$ implementations:
-
-- A `get_` method should be externally idempotent. It may perform some
- simple initialization and cache the result for later use. Trivial
- `get_` methods can be defined in a header file. If a method is
- non-trivial, that is often a sign that it is not a basic accessor.
-- An `is_` may be used instead of `get_` for boolean accessors.
-- A `set_` method should change the value returned by the
- corresponding `get_` method.
- - Use `set_is_` if using `is_` to get a value.
- - Trivial `set_` methods may be defined in a header file.
-- Names of variables should indicate the use of the variable.
- - Member variables should be prefixed with `m_`.
- - Static member variables should be rare and be prefixed with
- `s_`.
-- Do not use `using` to define a type alias at top-level in header
- file. If the abstraction is useful, give it a class.
- - C++ does not enforce the abstraction. For example if `X` and `Y`
- are aliases for the same type, you can pass an `X` to something
- expecting a `Y`.
- - If one of the aliases were later changed, or turned into a real
- type, many callers could require changes.
-
-#### Namespaces
-
-- `ngraph` is for the public API, although this is not
- currently enforced.
- - Use a nested namespace for implementation classes.
- - Use an unnamed namespace or `static` for file-local names. This
- helps prevent unintended name collisions during linking and when
- using shared and dynamically-loaded libraries.
- - Never use `using` at top-level in a header file.
-
- - Doing so leaks the alias into users of the header, including
- headers that follow.
-
- - It is okay to use `using` with local scope, such as inside a class
- definiton.
-
- - Be careful of C++'s implicit namespace inclusions. For example,
- if a parameter's type is from another namespace, that namespace
- can be visible in the body.
- - Only use `using std` and/or `using ngraph` in `.cpp` files.
- `using` a nested namespace has can result in
- unexpected behavior.
-
-#### File Names
-
-- Do not use the same file name in multiple directories. At least one
- IDE/debugger ignores the directory name when setting breakpoints.
-- Use `.hpp` for headers and `.cpp` for implementation.
-- Reflect the namespace nesting in the directory hierarchy.
-- Unit test files are in the `tests` directory.
- - Transformer-dependent tests are tests running on the default
- transformer or specifying a transformer. For these, use the form
-
- ```
- TEST(file_name, test_name)
- ```
-
- - Transformer-independent tests:
- - File name is `file_name.in.cpp`
- - Add `#include "test_control.hpp"` to the file's includes
- - Add the line
- `static std::string s_manifest = "${MANIFEST}";` to the top
- of the file.
- - Use
-
- ```
- NGRAPH_TEST(${BACKEND_NAME}, test_name)
- ```
-
- for each test. Files are generated for each transformer and
- the `${BACKEND_NAME}` is replaced with the transformer name.
-
- Individual unit tests may be disabled by adding the name of
- the test to the `unit_test.manifest` file found in the
- transformer's source file directory.
-
-#### Formatting
-
-Things that look different should look different because they are
-different. We use **clang format** to enforce certain formatting.
-Although not always ideal, it is automatically enforced and reduces
-merge conflicts.
-
-- The .clang-format file located in the root of the project specifies
- our format.
- - The script maint/apply-code-format.sh enforces that formatting
- at the C/C++ syntactic level.
- - The script at maint/check-code-format.sh verifies that the
- formatting rules are met by all C/C++ code (again, at the
- syntax level). The script has an exit code of `0` when code
- meets the standard and non-zero otherwise. This script does
- *not* modify the source code.
-- Formatting with `#include` files:
- - Put headers in groups separated by a blank line. Logically order
- the groups downward from system-level to 3rd-party to `ngraph`.
- - Formatting will keep the files in each group in
- alphabetic order.
- - Use this syntax for files that **do not change during nGraph
- development**; they will not be checked for changes
- during builds. Normally this will be everything but the ngraph
- files:
-
- ```
- #include <file>
- ```
-
- - Use this syntax for files that **are changing during nGraph
- development**; they will be checked for changes during builds.
- Normally this will be ngraph headers:
-
- ```
- #include "file"
- ```
-
- - Use this syntax for system C headers with C++ wrappers:
-
- ```
- #include <c...>
- ```
-
-- To guard against multiple inclusion, use:
-
- ```
- #pragma once
- ```
-
- - The syntax is a compiler extension that has been adopted by all
- supported compilers.
-- The initialization
-
- ```
- Foo x{4, 5};
- ```
-
- is preferred over
-
- ```
- Foo x(4, 5);
- ```
-
-- Indentation should be accompanied by braces; this includes
- single-line bodies for conditionals and loops.
-- Exception checking:
- - Throw an exception to report a problem.
- - Nothing that calls `abort`, `exit` or `terminate` should
- be used. Remember that ngraph is a guest of the framework.
- - Do not use exclamation points in messages!
- - Be as specific as practical. Keep in mind that the person who
- sees the error is likely to be on the other side of the
- framework and the message might be the only information they see
- about the problem.
-- If you use `auto`, know what you are doing. `auto` uses the same
- type-stripping rules as template parameters. If something returns a
- reference, `auto` will strip the reference unless you use `auto&`:
- - Don't do things like
-
- ```
- auto s = Shape{2,3};
- ```
-
- Instead, use
-
- ```
- Shape s{2, 3};
- ```
-
- - Indicate the type in the variable name.
-
-- One variable declaration/definition per line
- - Don't use the C-style
-
- ```
- int x, y, *z;
- ```
-
- Instead, use:
-
- ```
- int x;
- int y;
- int* z;
- ```
+++ /dev/null
-![nGraph Compiler stack](doc/sphinx/source/graphics/ngraph_header.png)
-[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/NervanaSystems/ngraph/blob/master/LICENSE) [![Build Status][build-status-badge]][build-status]
-
-<div align="left">
- <h4>
- <a href="./ABOUT.md">Architecture & features</a> | <a href="./ecosystem-overview.md" >Ecosystem</a> | <a href="https://www.ngraph.ai/documentation/project/release-notes">Release notes</a><span> | </span> <a href="https://www.ngraph.ai/documentation">Documentation</a><span> | </span> <a href="#How-to-contribute" >Contribution guide</a>
- </h4>
-</div>
-
-## Quick start
-
-To begin using nGraph with popular frameworks, please refer to the links below.
-
-| Framework (Version) | Installation guide | Notes
-|----------------------------|----------------------------------------|-----------------------------------
-| TensorFlow* | [Pip install](https://www.ngraph.ai/tutorials/tensorflow-tutorial#use-pre-built-packages) or [Build from source](https://www.ngraph.ai/tutorials/tensorflow-tutorial#build-from-source) | 20 [Validated workloads]
-| ONNX 1.5 | [Pip install](https://www.ngraph.ai/tutorials/onnx-tutorial#use-pre-built-packages) | 17 [Validated workloads]
-
-
-#### Python wheels for nGraph
-
-The Python wheels for nGraph have been tested and are supported on the following
-64-bit systems:
-
-* Ubuntu 16.04 or later
-* CentOS 7.6
-* Debian 10
-* macOS 10.14.3 (Mojave)
-
-To install via pip, run:
-
-```
-pip install --upgrade pip==19.3.1
-pip install ngraph-core
-```
-
-
-Frameworks using nGraph Compiler stack to execute workloads have shown
-[**up to 45X**](https://ai.intel.com/ngraph-compiler-stack-beta-release/)
-performance boost when compared to native framework implementations. We've also
-seen performance boosts running workloads that are not included on the list of
-[Validated workloads], thanks to nGraph's powerful subgraph pattern matching.
-
-Additionally we have integrated nGraph with [PlaidML] to provide deep learning
-performance acceleration on Intel, nVidia, & AMD GPUs. More details on current
-architecture of the nGraph Compiler stack can be found in [Architecture and features],
-and recent changes to the stack are explained in the [Release Notes].
-
-## What is nGraph Compiler?
-
-nGraph Compiler aims to accelerate developing AI workloads using any deep learning
-framework and deploying to a variety of hardware targets. We strongly believe in
-providing freedom, performance, and ease-of-use to AI developers.
-
-The diagram below shows deep learning frameworks and hardware targets
-supported by nGraph. NNP-T and NNP-I in the diagram refer to Intel's next generation
-deep learning accelerators: Intel® Nervana™ Neural Network Processor for Training and
-Inference respectively. Future plans for supporting addtional deep learning frameworks
-and backends are outlined in the [ecosystem] section.
-
-![](doc/sphinx/source/graphics/nGraph_main.png)
-
-
-Our documentation has extensive information about how to use nGraph Compiler
-stack to create an nGraph computational graph, integrate custom frameworks,
-and to interact with supported backends. If you wish to contribute to the
-project, please don't hesitate to ask questions in [GitHub issues] after
-reviewing our contribution guide below.
-
-
-## How to contribute
-
-We welcome community contributions to nGraph. If you have an idea how
-to improve it:
-
-* See the [contrib guide] for code formatting and style guidelines.
-* Share your proposal via [GitHub issues].
-* Ensure you can build the product and run all the examples with your patch.
-* In the case of a larger feature, create a test.
-* Submit a [pull request].
-* Make sure your PR passes all CI tests. Note: You can test locally with `make check`.
-
- We will review your contribution and, if any additional fixes or modifications are
- necessary, may provide feedback to guide you. When accepted, your pull request will
- be merged to the repository.
-
-
-[Ecosystem]: ./ecosystem-overview.md
-[Architecture and features]: ./ABOUT.md
-[Documentation]: https://www.ngraph.ai/documentation
-[build the Library]: https://www.ngraph.ai/documentation/buildlb
-[Getting Started Guides]: Getting-started-guides
-[Validated workloads]: https://www.ngraph.ai/documentation/frameworks/validated/list
-[Functional]: https://github.com/NervanaSystems/ngraph-onnx/
-[How to contribute]: How-to-contribute
-[framework integration guides]: https://ngraph.ai/documentation/frameworks/overview
-[release notes]: https://www.ngraph.ai/documentation/project/release-notes
-[Github issues]: https://github.com/NervanaSystems/ngraph/issues
-[contrib guide]: https://www.ngraph.ai/documentation/contributing/guide
-[pull request]: https://github.com/NervanaSystems/ngraph/pulls
-[how to import]: https://www.ngraph.ai/tutorials/onnx-tutorial#import-a-model-with-onnx-and-ngraph
-[ngraph_wireframes_with_notice]: doc/sphinx/source/graphics/nGraph_main.png "nGraph components"
-[build-status]: https://travis-ci.org/NervanaSystems/ngraph/branches
-[build-status-badge]: https://travis-ci.org/NervanaSystems/ngraph.svg?branch=master
-[PlaidML]: https://github.com/plaidml/plaidml
-[Source compile]: https://github.com/NervanaSystems/ngraph-mxnet/blob/master/README.md
-[nGraph-ONNX]: https://github.com/NervanaSystems/ngraph-onnx/blob/master/README.md
-[nGraph-ONNX adaptable]: https://ai.intel.com/adaptable-deep-learning-solutions-with-ngraph-compiler-and-onnx/
-[nGraph for PyTorch developers]: https://ai.intel.com/investing-in-the-pytorch-developer-community
set(CMAKE_CXX_FLAGS ${CMAKE_ORIGINAL_CXX_FLAGS})
if (WIN32)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4251")
string(REPLACE "/W3" "/W0" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
+++ /dev/null
-
-# Framework & runtime support
-
-One of nGraph’s key features is framework neutrality. We currently support
-popular deep learning frameworks such as TensorFlow and MXNet with stable
-bridges to pass computational graphs to nGraph. Additionally nGraph
-Compiler has a functional bridge to PaddlePaddle.
-For these frameworks, we have successfully tested functionality with a few
-deep learning workloads, and we plan to bring stable support for them in the
-upcoming releases.
-
-To further promote framework neutrality, the nGraph team has been actively
-contributing to the ONNX project. Developers who already have a "trained"
-DNN (Deep Neural Network) model can use nGraph to bypass significant
-framework-based complexity and [import it] to test or run on targeted and
-efficient backends with our user-friendly Python-based API.
-
-nGraph is also integrated as an execution provider for [ONNX Runtime],
-which is the first publicly available inference engine for ONNX.
-
-The table below summarizes our current progress on supported frameworks.
-If you are an architect of a framework wishing to take advantage of speed
-and multi-device support of nGraph Compiler, please refer to [Framework integration guide] section.
-
-
-| Framework & Runtime | Supported | Validated
-|----------------------------|--------------------|-------------
-| TensorFlow* 1.12 | :heavy_check_mark: | :heavy_check_mark:
-| MXNet* 1.3 | :heavy_check_mark: | :heavy_check_mark:
-| ONNX 1.3 | :heavy_check_mark: | :heavy_check_mark:
-| ONNX Runtime | Functional | No
-| PaddlePaddle | Functional | No
-
-
-
-
-[Architecture and features]: ./ABOUT.md
-[Upcoming DL accelerators]: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/vision-accelerator-design-product-brief.pdf
-[import it]: https://ngraph.nervanasys.com/docs/latest/core/constructing-graphs/import.html
-[ONNX Runtime]: https://azure.microsoft.com/en-us/blog/onnx-runtime-is-now-open-source/
-[WinML]: http://docs.microsoft.com/en-us/windows/ai
-[How to]: https://ngraph.nervanasys.com/docs/latest/howto/index.html
-[Framework integration guide]: https://ngraph.nervanasys.com/docs/latest/frameworks/index.html
except subprocess.CalledProcessError as err:
print("Could not complete the wheel building process")
print("Command that failed: ", err.cmd)
- print("Command std output: ", err.stdout.decode('utf-8'))
- print("Command err output: ", err.stderr.decode('utf-8'))
+ if err.stdout is not None:
+ print("Command std output: ", err.stdout.decode('utf-8'))
+ if err.stderr is not None:
+ print("Command err output: ", err.stderr.decode('utf-8'))
sys.exit(1)
long_description=open(os.path.join(PYNGRAPH_ROOT_DIR, "README.md")).read(),
long_description_content_type="text/markdown",
ext_modules=ext_modules,
- package_dir={"": "src"},
+ package_dir={'': PYNGRAPH_SRC_DIR},
packages=packages,
cmdclass={"build_ext": BuildExt},
data_files=data_files,
from ngraph.ops import roi_align
from ngraph.ops import roi_pooling
from ngraph.ops import scatter_elements_update
-from ngraph.ops import scatter_nd_update
from ngraph.ops import scatter_update
from ngraph.ops import select
from ngraph.ops import selu
@nameable_op
-def scatter_nd_update(
- data: NodeInput, indices: NodeInput, updates: NodeInput, name: str = None
-) -> Node:
- """Return a node which produces a ScatterNDUpdate operation.
-
- ScatterNDUpdate creates a copy of the first input tensor
- with updated elements specified with second and third input tensors.
-
- :param data: The input tensor to be updated.
- :param indices: The tensor with indexes which will be updated.
- :param updates: The tensor with update values.
- :param name: Optional name for output node.
- :return: ScatterNDUpdate node
- """
- node_inputs = as_nodes(data, indices, updates)
- return _get_node_factory().create("ScatterNDUpdate", node_inputs)
-
-
-@nameable_op
def scatter_update(
data: Node, indices: NodeInput, updates: NodeInput, axis: NodeInput, name: Optional[str] = None
) -> Node:
"""Provide a layer of abstraction for the ngraph++ runtime environment."""
import logging
from typing import Dict, List, Union
+from enum import Enum
import numpy as np
from ngraph.exceptions import UserInputError
-from ngraph.impl import Function, Node, Shape, serialize, util
+from ngraph.impl import Function, Node, Shape, PartialShape, serialize, util
from ngraph.impl.runtime import Backend, Executable, Tensor
from ngraph.utils.types import NumericData, get_dtype
log = logging.getLogger(__name__)
-def runtime(backend_name: str = "CPU") -> "Runtime":
+class BackendMode(Enum):
+ """DYNAMIC mode enables backend's wrapper which supports dynamic shapes."""
+
+ STATIC = 0
+ DYNAMIC = 1
+
+
+def runtime(backend_name: str = "CPU", mode: BackendMode = BackendMode.STATIC) -> "Runtime":
"""Create a Runtime object (helper factory).
Use signature to parameterize runtime as needed.
"""
- return Runtime(backend_name)
+ return Runtime(backend_name, mode)
class Runtime:
"""Represents the ngraph++ runtime environment."""
- def __init__(self, backend_name: str) -> None:
+ def __init__(self, backend_name: str, mode: BackendMode = BackendMode.STATIC) -> None:
self.backend_name = backend_name
- self.backend = Backend.create(backend_name)
+ if mode == BackendMode.DYNAMIC:
+ self.backend = Backend.create_dynamic(backend_name)
+ else:
+ self.backend = Backend.create(backend_name)
def set_config(self, config: Dict[str, str]) -> None:
"""Set the backend configuration."""
self.result_views = [] # type: List[Tensor]
for result in self.results:
- shape = result.get_shape()
element_type = result.get_element_type()
- self.result_views.append(runtime.backend.create_tensor(element_type, shape))
+ if self.function.is_dynamic():
+ output_pshape = result.get_output_partial_shape(0)
+ output_tensor = runtime.backend.create_dynamic_tensor(element_type, output_pshape)
+ self.result_views.append(output_tensor)
+ else:
+ output_shape = result.get_shape()
+ output_tensor = runtime.backend.create_tensor(element_type, output_shape)
+ self.result_views.append(output_tensor)
def __repr__(self) -> str:
params_string = ", ".join([param.name for param in self.parameters])
value = np.array(value)
Computation._write_ndarray_to_tensor_view(value, tensor_view)
- self.handle.call(self.result_views, self.tensor_views)
+ if self.function.is_dynamic():
+ self.handle.call_with_validate(self.result_views, self.tensor_views)
+ else:
+ self.handle.call(self.result_views, self.tensor_views)
results = []
for result_view in self.result_views:
function.def("get_output_op", &ngraph::Function::get_output_op);
function.def("get_output_element_type", &ngraph::Function::get_output_element_type);
function.def("get_output_shape", &ngraph::Function::get_output_shape);
+ function.def("get_output_partial_shape", &ngraph::Function::get_output_partial_shape);
function.def("get_parameters", &ngraph::Function::get_parameters);
function.def("get_results", &ngraph::Function::get_results);
function.def("get_result", &ngraph::Function::get_result);
function.def("get_unique_name", &ngraph::Function::get_name);
function.def("get_name", &ngraph::Function::get_friendly_name);
function.def("set_friendly_name", &ngraph::Function::set_friendly_name);
+ function.def("is_dynamic", &ngraph::Function::is_dynamic);
function.def("__repr__", [](const ngraph::Function& self) {
std::string class_name = py::cast(self).get_type().attr("__name__").cast<std::string>();
std::string shape =
node.def("get_output_element_type", &ngraph::Node::get_output_element_type);
node.def("get_element_type", &ngraph::Node::get_element_type);
node.def("get_output_shape", &ngraph::Node::get_output_shape);
+ node.def("get_output_partial_shape", &ngraph::Node::get_output_partial_shape);
node.def("get_shape", &ngraph::Node::get_shape);
node.def("get_output_partial_shape", &ngraph::Node::get_output_partial_shape);
node.def("get_type_name", &ngraph::Node::get_type_name);
{
}
- virtual void on_attribute(const std::string& name, std::string& value) override
- {
- if (m_attributes.contains(name))
- {
- value = m_attributes[name.c_str()].cast<std::string>();
- }
- }
- virtual void on_attribute(const std::string& name, bool& value) override
+ virtual void on_adapter(const std::string& name,
+ ngraph::ValueAccessor<void>& adapter) override
{
if (m_attributes.contains(name))
{
- value = m_attributes[name.c_str()].cast<bool>();
+ NGRAPH_CHECK(
+ false, "No AttributeVisitor support for accessing attribute named: ", name);
}
}
virtual void on_adapter(const std::string& name,
- ngraph::ValueAccessor<void>& adapter) override
+ ngraph::ValueAccessor<bool>& adapter) override
{
if (m_attributes.contains(name))
{
- NGRAPH_CHECK(
- false, "No AttributeVisitor support for accessing attribute named: ", name);
+ adapter.set(m_attributes[name.c_str()].cast<bool>());
}
}
virtual void on_adapter(const std::string& name,
namespace py = pybind11;
-static std::shared_ptr<ngraph::runtime::Executable> compile(ngraph::runtime::Backend* self,
- std::shared_ptr<ngraph::Function> func)
+static std::shared_ptr<ngraph::runtime::Backend> create_static(const std::string& type)
{
- bool enable_performance_data = false;
- return self->compile(func, enable_performance_data);
+ bool must_support_dynamic = false;
+ return ngraph::runtime::Backend::create(type, must_support_dynamic);
}
-static std::shared_ptr<ngraph::runtime::Backend> create(const std::string& type)
+static std::shared_ptr<ngraph::runtime::Backend> create_dynamic(const std::string& type)
{
- bool must_support_dynamic = false;
+ bool must_support_dynamic = true;
return ngraph::runtime::Backend::create(type, must_support_dynamic);
}
+static std::shared_ptr<ngraph::runtime::Executable> compile(ngraph::runtime::Backend* self,
+ std::shared_ptr<ngraph::Function> func)
+{
+ bool enable_performance_data = false;
+ return self->compile(func, enable_performance_data);
+}
+
void regclass_pyngraph_runtime_Backend(py::module m)
{
py::class_<ngraph::runtime::Backend, std::shared_ptr<ngraph::runtime::Backend>> backend(
m, "Backend");
backend.doc() = "ngraph.impl.runtime.Backend wraps ngraph::runtime::Backend";
- backend.def_static("create", &create);
+ backend.def_static("create", &create_static);
+ backend.def_static("create_dynamic", &create_dynamic);
backend.def_static("get_registered_devices", &ngraph::runtime::Backend::get_registered_devices);
backend.def("create_tensor",
(std::shared_ptr<ngraph::runtime::Tensor>(ngraph::runtime::Backend::*)(
const ngraph::element::Type&, const ngraph::Shape&)) &
ngraph::runtime::Backend::create_tensor);
+ backend.def("create_dynamic_tensor",
+ (std::shared_ptr<ngraph::runtime::Tensor>(ngraph::runtime::Backend::*)(
+ const ngraph::element::Type&, const ngraph::PartialShape&)) &
+ ngraph::runtime::Backend::create_dynamic_tensor);
backend.def("compile", &compile);
backend.def("set_config", &ngraph::runtime::Backend::set_config);
}
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&,
const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&)) &
ngraph::runtime::Executable::call);
+ executable.def("call_with_validate",
+ (bool (ngraph::runtime::Executable::*)(
+ const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&,
+ const std::vector<std::shared_ptr<ngraph::runtime::Tensor>>&)) &
+ ngraph::runtime::Executable::call_with_validate);
executable.def(
"get_performance_data",
(std::vector<ngraph::runtime::PerformanceCounter>(ngraph::runtime::Executable::*)()) &
@pytest.mark.parametrize("data_type", [np.int64, np.int32, np.int16, np.int8])
def test_constant_get_data_signed_integer(data_type):
np.random.seed(133391)
- input_data = np.random.randint(
- np.iinfo(data_type).min, np.iinfo(data_type).max, [2, 3, 4]
- ).astype(data_type)
+ input_data = np.random.randint(np.iinfo(data_type).min, np.iinfo(data_type).max,
+ size=[2, 3, 4], dtype=data_type)
node = ng.constant(input_data, dtype=data_type)
retrieved_data = node.get_data()
assert np.allclose(input_data, retrieved_data)
def test_strided_slice():
- input_tensor = np.arange(2 * 3 * 4).reshape((2, 3, 4))
- begin = np.array([1, 0])
- end = np.array([0, 0])
- strides = np.array([1, 1])
- begin_mask = np.array([0, 0, 0])
- end_mask = np.array([0, 0, 0])
- new_axis_mask = np.array([0, 1, 0])
- shrink_axis_mask = np.array([1, 0, 0])
- ellipsis_mask = np.array([0, 0, 0])
+ input_tensor = np.arange(2 * 3 * 4, dtype=np.float32).reshape((2, 3, 4))
+ begin = np.array([1, 0], dtype=np.int64)
+ end = np.array([0, 0], dtype=np.int64)
+ strides = np.array([1, 1], dtype=np.int64)
+ begin_mask = np.array([0, 0, 0], dtype=np.int64)
+ end_mask = np.array([0, 0, 0], dtype=np.int64)
+ new_axis_mask = np.array([0, 1, 0], dtype=np.int64)
+ shrink_axis_mask = np.array([1, 0, 0], dtype=np.int64)
+ ellipsis_mask = np.array([0, 0, 0], dtype=np.int64)
result = run_op_node(
[input_tensor, begin, end, strides],
ellipsis_mask,
)
- expected = np.array([12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]).reshape((1, 3, 4))
+ expected = np.array([12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
+ dtype=np.float32).reshape((1, 3, 4))
assert np.allclose(result, expected)
from ngraph.impl import Type
-def test_scatter_nd_update_props():
- dtype = np.int32
- parameter_r = ng.parameter([1000, 256, 10, 15], dtype=dtype, name="data")
- parameter_i = ng.parameter([25, 125, 3], dtype=dtype, name="indices")
- parameter_u = ng.parameter([25, 125, 15], dtype=dtype, name="updates")
-
- node = ng.scatter_nd_update(parameter_r, parameter_i, parameter_u)
- assert node.get_type_name() == "ScatterNDUpdate"
- assert node.get_output_size() == 1
- assert list(node.get_output_shape(0)) == [1000, 256, 10, 15]
- assert node.get_output_element_type(0) == Type.i32
-
-
def test_scatter_update_props():
dtype = np.int8
parameter_r = ng.parameter([2, 3, 4], dtype=dtype, name="data")
assertion.hpp
attribute_adapter.cpp
attribute_adapter.hpp
+ attribute_visitor.cpp
attribute_visitor.hpp
autodiff/adjoints.cpp
autodiff/adjoints.hpp
except.hpp
factory.cpp
factory.hpp
+ factory_adapter.hpp
file_util.cpp
file_util.hpp
function.cpp
namespace ngraph
{
- // float
constexpr DiscreteTypeInfo AttributeAdapter<float>::type_info;
- const double& AttributeAdapter<float>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<float>::set(const double& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
- // double
constexpr DiscreteTypeInfo AttributeAdapter<double>::type_info;
- const double& AttributeAdapter<double>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<double>::set(const double& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
- // bool
+ constexpr DiscreteTypeInfo AttributeAdapter<string>::type_info;
constexpr DiscreteTypeInfo AttributeAdapter<bool>::type_info;
- const bool& AttributeAdapter<bool>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<bool>::set(const bool& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
constexpr DiscreteTypeInfo AttributeAdapter<int8_t>::type_info;
- const int64_t& AttributeAdapter<int8_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<int8_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
constexpr DiscreteTypeInfo AttributeAdapter<int16_t>::type_info;
- const int64_t& AttributeAdapter<int16_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<int16_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
constexpr DiscreteTypeInfo AttributeAdapter<int32_t>::type_info;
- const int64_t& AttributeAdapter<int32_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<int32_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
constexpr DiscreteTypeInfo AttributeAdapter<int64_t>::type_info;
- const int64_t& AttributeAdapter<int64_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<int64_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
constexpr DiscreteTypeInfo AttributeAdapter<uint8_t>::type_info;
- const int64_t& AttributeAdapter<uint8_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<uint8_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
constexpr DiscreteTypeInfo AttributeAdapter<uint16_t>::type_info;
- const int64_t& AttributeAdapter<uint16_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<uint16_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
constexpr DiscreteTypeInfo AttributeAdapter<uint32_t>::type_info;
- const int64_t& AttributeAdapter<uint32_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<uint32_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
constexpr DiscreteTypeInfo AttributeAdapter<uint64_t>::type_info;
- const int64_t& AttributeAdapter<uint64_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<uint64_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
-
#ifdef __APPLE__
// size_t is not uint_64t on OSX
constexpr DiscreteTypeInfo AttributeAdapter<size_t>::type_info;
- const int64_t& AttributeAdapter<size_t>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = m_value;
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<size_t>::set(const int64_t& value)
- {
- m_value = value;
- m_buffer_valid = false;
- }
+ constexpr DiscreteTypeInfo AttributeAdapter<vector<size_t>>::type_info;
#endif
-
- // vector<int8_t>
constexpr DiscreteTypeInfo AttributeAdapter<vector<int8_t>>::type_info;
-
- const vector<int8_t>& AttributeAdapter<vector<int8_t>>::get() { return m_value; }
- void AttributeAdapter<vector<int8_t>>::set(const vector<int8_t>& value) { m_value = value; }
- // vector<int16_t>
constexpr DiscreteTypeInfo AttributeAdapter<vector<int16_t>>::type_info;
-
- const vector<int16_t>& AttributeAdapter<vector<int16_t>>::get() { return m_value; }
- void AttributeAdapter<vector<int16_t>>::set(const vector<int16_t>& value) { m_value = value; }
- // vector<int32_t>
constexpr DiscreteTypeInfo AttributeAdapter<vector<int32_t>>::type_info;
-
- const vector<int32_t>& AttributeAdapter<vector<int32_t>>::get() { return m_value; }
- void AttributeAdapter<vector<int32_t>>::set(const vector<int32_t>& value) { m_value = value; }
- // vector<int64_t>
constexpr DiscreteTypeInfo AttributeAdapter<vector<int64_t>>::type_info;
-
- const vector<int64_t>& AttributeAdapter<vector<int64_t>>::get() { return m_value; }
- void AttributeAdapter<vector<int64_t>>::set(const vector<int64_t>& value) { m_value = value; }
- // vector<uint8_t>
constexpr DiscreteTypeInfo AttributeAdapter<vector<uint8_t>>::type_info;
-
- const vector<int8_t>& AttributeAdapter<vector<uint8_t>>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<int8_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<vector<uint8_t>>::set(const vector<int8_t>& value)
- {
- m_value = copy_from<vector<uint8_t>>(value);
- m_buffer_valid = false;
- }
-
- // vector<uint16_t>
constexpr DiscreteTypeInfo AttributeAdapter<vector<uint16_t>>::type_info;
-
- const vector<int16_t>& AttributeAdapter<vector<uint16_t>>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<int16_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<vector<uint16_t>>::set(const vector<int16_t>& value)
- {
- m_value = copy_from<vector<uint16_t>>(value);
- m_buffer_valid = false;
- }
-
- // vector<uint32_t>
constexpr DiscreteTypeInfo AttributeAdapter<vector<uint32_t>>::type_info;
-
- const vector<int32_t>& AttributeAdapter<vector<uint32_t>>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<int32_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<vector<uint32_t>>::set(const vector<int32_t>& value)
- {
- m_value = copy_from<vector<uint32_t>>(value);
- m_buffer_valid = false;
- }
-
- // vector<uint64_t>
constexpr DiscreteTypeInfo AttributeAdapter<vector<uint64_t>>::type_info;
-
- const vector<int64_t>& AttributeAdapter<vector<uint64_t>>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<int64_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<vector<uint64_t>>::set(const vector<int64_t>& value)
- {
- m_value = copy_from<vector<uint64_t>>(value);
- m_buffer_valid = false;
- }
-
-#ifdef __APPLE__
- // size_t is not uint64_t on OSX
- // vector<size_t>
- constexpr DiscreteTypeInfo AttributeAdapter<vector<size_t>>::type_info;
-
- const vector<int64_t>& AttributeAdapter<vector<size_t>>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<int64_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<vector<size_t>>::set(const vector<int64_t>& value)
- {
- m_value = copy_from<vector<size_t>>(value);
- m_buffer_valid = false;
- }
-#endif
-
- /// vector<float>
constexpr DiscreteTypeInfo AttributeAdapter<vector<float>>::type_info;
-
- const vector<float>& AttributeAdapter<vector<float>>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<float>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<vector<float>>::set(const vector<float>& value)
- {
- m_value = copy_from<vector<float>>(value);
- m_buffer_valid = false;
- }
-
- /// vector<double>
constexpr DiscreteTypeInfo AttributeAdapter<vector<double>>::type_info;
-
- const vector<double>& AttributeAdapter<vector<double>>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<double>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<vector<double>>::set(const vector<double>& value)
- {
- m_value = copy_from<vector<double>>(value);
- m_buffer_valid = false;
- }
-
- /// vector<string>
constexpr DiscreteTypeInfo AttributeAdapter<vector<string>>::type_info;
-
- const vector<string>& AttributeAdapter<vector<string>>::get()
- {
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<string>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
- }
-
- void AttributeAdapter<vector<string>>::set(const vector<string>& value)
- {
- m_value = copy_from<vector<string>>(value);
- m_buffer_valid = false;
- }
}
#include "ngraph/enum_names.hpp"
#include "ngraph/type.hpp"
+///
namespace ngraph
{
- template <typename Type>
+ class AttributeVisitor;
+
+ /// \brief Provides access to an attribute of type AT as a value accessor type VAT
+ template <typename VAT>
class ValueAccessor;
/// \brief ValueAccessor<void> provides an accessor for values that do not have get/set methonds
+ /// via AttributeVistor.on_adapter.
+ ///
+ /// All ValueAccessors must be derived from ValueAccessor<void> so that an AttributeVisitor
+ /// only needs to implement a subset of the on_adapter methods.
template <>
class NGRAPH_API ValueAccessor<void>
{
public:
+ /// \brief type info enables identification of the value accessor, as well as is_type and
+ /// as_type.
virtual const DiscreteTypeInfo& get_type_info() const = 0;
virtual ~ValueAccessor() {}
};
- /// \brief Provides access to values via get/set methods
- /// \tparam T The type of the value; may be wider than the value being accessed.
- template <typename T>
+ /// \brief Provides access to values via get/set methods from an m_value, typically from
+ /// ValueReference
+ ///
+ /// The m_buffer holds a VAT, which may be wider than the attribute AT. For example, serializers
+ /// that only
+ /// support int64_t integers would use a ValueAccessor<vector<int64_t>> to reference a
+ /// vector<int8_t> attribute. Destruction moves the value back to the attribute if it was
+ /// changed.
+ /// \tparam VAT The adapter value type; may be wider than the value being accessed.
+ template <typename VAT>
class ValueAccessor : public ValueAccessor<void>
{
public:
- virtual const DiscreteTypeInfo& get_type_info() const = 0;
/// Returns the value
- virtual const T& get() = 0;
+ virtual const VAT& get() = 0;
/// Sets the value
- virtual void set(const T& value) = 0;
+ virtual void set(const VAT& value) = 0;
+ };
+ template <>
+ class ValueAccessor<void*> : public ValueAccessor<void>
+ {
+ public:
+ virtual void* get_ptr() = 0;
+ virtual size_t size() = 0;
+ };
+
+ template <typename AT>
+ class DirectValueAccessor : public ValueAccessor<AT>
+ {
+ public:
+ DirectValueAccessor(AT& ref)
+ : m_ref(ref)
+ {
+ }
+ const AT& get() override { return m_ref; }
+ void set(const AT& value) override { m_ref = value; }
protected:
- T m_buffer;
- bool m_buffer_valid{false};
+ AT& m_ref;
};
- /// \brief holds a reference to a value
- /// \tparam Type the type of the referenced value
- template <typename Type>
- class ValueReference
+ template <typename AT, typename VAT>
+ class IndirectScalarValueAccessor : public ValueAccessor<VAT>
{
public:
- operator Type&() const { return m_value; }
+ IndirectScalarValueAccessor(AT& ref)
+ : m_ref(ref)
+ {
+ }
+
+ const VAT& get() override
+ {
+ if (!m_buffer_valid)
+ {
+ m_buffer = static_cast<VAT>(m_ref);
+ m_buffer_valid = true;
+ }
+ return m_buffer;
+ }
+
+ void set(const VAT& value) override
+ {
+ m_ref = static_cast<AT>(value);
+ m_buffer_valid = false;
+ }
+
protected:
- ValueReference(Type& value)
- : m_value(value)
+ AT& m_ref;
+ VAT m_buffer;
+ bool m_buffer_valid{false};
+ };
+
+ template <typename A, typename B>
+ A copy_from(B& b)
+ {
+ A result(b.size());
+ for (size_t i = 0; i < b.size(); ++i)
+ {
+ result[i] =
+ static_cast<typename std::remove_reference<decltype(result[i])>::type>(b[i]);
+ }
+ return result;
+ }
+
+ template <typename AT, typename VAT>
+ class IndirectVectorValueAccessor : public ValueAccessor<VAT>
+ {
+ public:
+ IndirectVectorValueAccessor(AT& ref)
+ : m_ref(ref)
{
}
- Type& m_value;
+
+ const VAT& get() override
+ {
+ if (!m_buffer_valid)
+ {
+ m_buffer = copy_from<typename std::remove_cv<VAT>::type>(m_ref);
+ m_buffer_valid = true;
+ }
+ return m_buffer;
+ }
+
+ void set(const VAT& value) override
+ {
+ m_ref = copy_from<AT>(value);
+ m_buffer_valid = false;
+ }
+
+ operator AT&() { return m_ref; }
+ protected:
+ AT& m_ref;
+ VAT m_buffer;
+ bool m_buffer_valid{false};
};
- template <typename Type>
+ /// \brief An AttributeAdapter "captures" an attribute as an AT& and makes it available as a
+ /// ValueAccessor<VAT>.
+ template <typename AT>
class AttributeAdapter
{
};
/// \brief Access an enum via a string
- /// \tparam Type The enum class
- template <typename Type>
- class EnumAttributeAdapterBase : public ValueReference<Type>, public ValueAccessor<std::string>
+ /// \tparam AT The attribute type enum class
+ template <typename AT>
+ class EnumAttributeAdapterBase : public ValueAccessor<std::string>
{
public:
- EnumAttributeAdapterBase(Type& value)
- : ValueReference<Type>(value)
+ EnumAttributeAdapterBase(AT& value)
+ : m_ref(value)
{
}
- const std::string& get() override { return as_string(ValueReference<Type>::m_value); }
- void set(const std::string& value) override
- {
- ValueReference<Type>::m_value = as_enum<Type>(value);
- }
+ const std::string& get() override { return as_string(m_ref); }
+ void set(const std::string& value) override { m_ref = as_enum<AT>(value); }
+ operator AT&() { return m_ref; }
+ protected:
+ AT& m_ref;
+ };
+
+ /// Adapters will see visitor
+ class VisitorAdapter : public ValueAccessor<void>
+ {
+ public:
+ virtual bool visit_attributes(AttributeVisitor& visitor) = 0;
};
- /// \brief Access a float as a double
template <>
- class NGRAPH_API AttributeAdapter<float> : public ValueReference<float>,
- public ValueAccessor<double>
+ class NGRAPH_API AttributeAdapter<float> : public IndirectScalarValueAccessor<float, double>
{
public:
AttributeAdapter(float& value)
- : ValueReference<float>(value)
+ : IndirectScalarValueAccessor<float, double>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<float>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const double& get() override;
- void set(const double& value) override;
};
/// \brief Access a double as a double
template <>
- class NGRAPH_API AttributeAdapter<double> : public ValueReference<double>,
- public ValueAccessor<double>
+ class NGRAPH_API AttributeAdapter<double> : public DirectValueAccessor<double>
{
public:
AttributeAdapter(double& value)
- : ValueReference<double>(value)
+ : DirectValueAccessor<double>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<double>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const double& get() override;
- void set(const double& value) override;
+ };
+
+ /// \brief Access a string as a string
+ template <>
+ class NGRAPH_API AttributeAdapter<std::string> : public DirectValueAccessor<std::string>
+ {
+ public:
+ AttributeAdapter(std::string& value)
+ : DirectValueAccessor<std::string>(value)
+ {
+ }
+
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<string>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
};
/// \brief Access a bool as a bool
template <>
- class NGRAPH_API AttributeAdapter<bool> : public ValueReference<bool>,
- public ValueAccessor<bool>
+ class NGRAPH_API AttributeAdapter<bool> : public DirectValueAccessor<bool>
{
public:
AttributeAdapter(bool& value)
- : ValueReference<bool>(value)
+ : DirectValueAccessor<bool>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<bool>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const bool& get() override;
- void set(const bool& value) override;
};
- /// \brief Access an int8_t and an int16_t
+ /// \brief Access an int8_t and an int64_t
template <>
- class NGRAPH_API AttributeAdapter<int8_t> : public ValueReference<int8_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<int8_t> : public IndirectScalarValueAccessor<int8_t, int64_t>
{
public:
AttributeAdapter(int8_t& value)
- : ValueReference<int8_t>(value)
+ : IndirectScalarValueAccessor<int8_t, int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<int8_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
};
/// \brief Access an int16_t as an int64_t
template <>
- class NGRAPH_API AttributeAdapter<int16_t> : public ValueReference<int16_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<int16_t>
+ : public IndirectScalarValueAccessor<int16_t, int64_t>
{
public:
AttributeAdapter(int16_t& value)
- : ValueReference<int16_t>(value)
+ : IndirectScalarValueAccessor<int16_t, int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<int16_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
};
/// \brief Access an int32_t as an int64_t
template <>
- class NGRAPH_API AttributeAdapter<int32_t> : public ValueReference<int32_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<int32_t>
+ : public IndirectScalarValueAccessor<int32_t, int64_t>
{
public:
AttributeAdapter(int32_t& value)
- : ValueReference<int32_t>(value)
+ : IndirectScalarValueAccessor<int32_t, int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<int32_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
};
/// \brief Access an int64_t as an int64_t
template <>
- class NGRAPH_API AttributeAdapter<int64_t> : public ValueReference<int64_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<int64_t> : public DirectValueAccessor<int64_t>
{
public:
AttributeAdapter(int64_t& value)
- : ValueReference<int64_t>(value)
+ : DirectValueAccessor<int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<int64_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
};
/// \brief Access a uint8_t as an int64_t
template <>
- class NGRAPH_API AttributeAdapter<uint8_t> : public ValueReference<uint8_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<uint8_t>
+ : public IndirectScalarValueAccessor<uint8_t, int64_t>
{
public:
AttributeAdapter(uint8_t& value)
- : ValueReference<uint8_t>(value)
+ : IndirectScalarValueAccessor<uint8_t, int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<uint8_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
};
/// \brief Access a uint16_t as an int64_t
template <>
- class NGRAPH_API AttributeAdapter<uint16_t> : public ValueReference<uint16_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<uint16_t>
+ : public IndirectScalarValueAccessor<uint16_t, int64_t>
{
public:
AttributeAdapter(uint16_t& value)
- : ValueReference<uint16_t>(value)
+ : IndirectScalarValueAccessor<uint16_t, int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<uint16_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
};
/// \brief Access a uint32_t as an int64_t
template <>
- class NGRAPH_API AttributeAdapter<uint32_t> : public ValueReference<uint32_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<uint32_t>
+ : public IndirectScalarValueAccessor<uint32_t, int64_t>
{
public:
AttributeAdapter(uint32_t& value)
- : ValueReference<uint32_t>(value)
+ : IndirectScalarValueAccessor<uint32_t, int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<uint32_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
};
/// \brief Access a uint64_t as an int64_t
template <>
- class NGRAPH_API AttributeAdapter<uint64_t> : public ValueReference<uint64_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<uint64_t>
+ : public IndirectScalarValueAccessor<uint64_t, int64_t>
{
public:
AttributeAdapter(uint64_t& value)
- : ValueReference<uint64_t>(value)
+ : IndirectScalarValueAccessor<uint64_t, int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<uint64_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
};
#ifdef __APPLE__
// size_t is one of the uint types on _WIN32
template <>
- class NGRAPH_API AttributeAdapter<size_t> : public ValueReference<size_t>,
- public ValueAccessor<int64_t>
+ class NGRAPH_API AttributeAdapter<size_t> : public IndirectScalarValueAccessor<size_t, int64_t>
{
public:
AttributeAdapter(size_t& value)
- : ValueReference<size_t>(value)
+ : IndirectScalarValueAccessor<size_t, int64_t>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<size_t>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const int64_t& get() override;
- void set(const int64_t& value) override;
+ };
+
+ template <>
+ class NGRAPH_API AttributeAdapter<std::vector<size_t>>
+ : public IndirectVectorValueAccessor<std::vector<size_t>, std::vector<int64_t>>
+ {
+ public:
+ AttributeAdapter(std::vector<size_t>& value)
+ : IndirectVectorValueAccessor<std::vector<size_t>, std::vector<int64_t>>(value)
+ {
+ }
+
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<size_t>>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
};
#endif
/// \brief Access a vector<int8_t>
template <>
class NGRAPH_API AttributeAdapter<std::vector<int8_t>>
- : public ValueReference<std::vector<int8_t>>, public ValueAccessor<std::vector<int8_t>>
+ : public DirectValueAccessor<std::vector<int8_t>>
{
public:
AttributeAdapter(std::vector<int8_t>& value)
- : ValueReference<std::vector<int8_t>>(value)
+ : DirectValueAccessor<std::vector<int8_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<int8_t>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int8_t>& get() override;
- void set(const std::vector<int8_t>& value) override;
};
/// \brief Access a vector<int16_t>
template <>
class NGRAPH_API AttributeAdapter<std::vector<int16_t>>
- : public ValueReference<std::vector<int16_t>>, public ValueAccessor<std::vector<int16_t>>
+ : public DirectValueAccessor<std::vector<int16_t>>
{
public:
AttributeAdapter(std::vector<int16_t>& value)
- : ValueReference<std::vector<int16_t>>(value)
+ : DirectValueAccessor<std::vector<int16_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<int16_t>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int16_t>& get() override;
- void set(const std::vector<int16_t>& value) override;
};
/// \brief Access a vector<int32_t>
template <>
class NGRAPH_API AttributeAdapter<std::vector<int32_t>>
- : public ValueReference<std::vector<int32_t>>, public ValueAccessor<std::vector<int32_t>>
+ : public DirectValueAccessor<std::vector<int32_t>>
{
public:
AttributeAdapter(std::vector<int32_t>& value)
- : ValueReference<std::vector<int32_t>>(value)
+ : DirectValueAccessor<std::vector<int32_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<int32_t>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int32_t>& get() override;
- void set(const std::vector<int32_t>& value) override;
};
/// \brief Access a vector<int64_t>
template <>
class NGRAPH_API AttributeAdapter<std::vector<int64_t>>
- : public ValueReference<std::vector<int64_t>>, public ValueAccessor<std::vector<int64_t>>
+ : public DirectValueAccessor<std::vector<int64_t>>
{
public:
AttributeAdapter(std::vector<int64_t>& value)
- : ValueReference<std::vector<int64_t>>(value)
+ : DirectValueAccessor<std::vector<int64_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<int64_t>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int64_t>& get() override;
- void set(const std::vector<int64_t>& value) override;
};
- /// \brief Access a vector<uint8_t> as a vector<int8_t>
+ /// \brief Access a vector<uint8_t>
template <>
class NGRAPH_API AttributeAdapter<std::vector<uint8_t>>
- : public ValueReference<std::vector<uint8_t>>, public ValueAccessor<std::vector<int8_t>>
+ : public DirectValueAccessor<std::vector<uint8_t>>
{
public:
AttributeAdapter(std::vector<uint8_t>& value)
- : ValueReference<std::vector<uint8_t>>(value)
+ : DirectValueAccessor<std::vector<uint8_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<uint8_t>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int8_t>& get() override;
- void set(const std::vector<int8_t>& value) override;
};
- /// \brief Access a vector<uint16_t> as a vector<int16_t>
+ /// \brief Access a vector<uint16_t>
template <>
class NGRAPH_API AttributeAdapter<std::vector<uint16_t>>
- : public ValueReference<std::vector<uint16_t>>, public ValueAccessor<std::vector<int16_t>>
+ : public DirectValueAccessor<std::vector<uint16_t>>
{
public:
AttributeAdapter(std::vector<uint16_t>& value)
- : ValueReference<std::vector<uint16_t>>(value)
+ : DirectValueAccessor<std::vector<uint16_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<uint16_t>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int16_t>& get() override;
- void set(const std::vector<int16_t>& value) override;
};
- /// \brief Access a vector<uint32_t> as a vector<int32_t>
+ /// \brief Access a vector<uint32_t>
template <>
class NGRAPH_API AttributeAdapter<std::vector<uint32_t>>
- : public ValueReference<std::vector<uint32_t>>, public ValueAccessor<std::vector<int32_t>>
+ : public DirectValueAccessor<std::vector<uint32_t>>
{
public:
AttributeAdapter(std::vector<uint32_t>& value)
- : ValueReference<std::vector<uint32_t>>(value)
+ : DirectValueAccessor<std::vector<uint32_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<uint32_t>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int32_t>& get() override;
- void set(const std::vector<int32_t>& value) override;
};
- /// \brief Access a vector<uint64_t> as a vector<int64_t>
+ /// \brief Access a vector<uint64_t>
template <>
class NGRAPH_API AttributeAdapter<std::vector<uint64_t>>
- : public ValueReference<std::vector<uint64_t>>, public ValueAccessor<std::vector<int64_t>>
+ : public DirectValueAccessor<std::vector<uint64_t>>
{
public:
AttributeAdapter(std::vector<uint64_t>& value)
- : ValueReference<std::vector<uint64_t>>(value)
+ : DirectValueAccessor<std::vector<uint64_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<uint64_t>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int64_t>& get() override;
- void set(const std::vector<int64_t>& value) override;
};
-#ifdef __APPLE__
- // size_t is not uint64_t on OSX
- template <>
- class NGRAPH_API AttributeAdapter<std::vector<size_t>>
- : public ValueReference<std::vector<size_t>>, public ValueAccessor<std::vector<int64_t>>
- {
- public:
- AttributeAdapter(std::vector<size_t>& value)
- : ValueReference<std::vector<size_t>>(value)
- {
- }
-
- static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<size_t>>", 0};
- const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int64_t>& get() override;
- void set(const std::vector<int64_t>& value) override;
- };
-#endif
-
/// \brief Access a vector<float>
template <>
class NGRAPH_API AttributeAdapter<std::vector<float>>
- : public ValueReference<std::vector<float>>, public ValueAccessor<std::vector<float>>
+ : public DirectValueAccessor<std::vector<float>>
{
public:
AttributeAdapter(std::vector<float>& value)
- : ValueReference<std::vector<float>>(value)
+ : DirectValueAccessor<std::vector<float>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<float>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<float>& get() override;
- void set(const std::vector<float>& value) override;
};
/// \brief Access a vector<double>
template <>
class NGRAPH_API AttributeAdapter<std::vector<double>>
- : public ValueReference<std::vector<double>>, public ValueAccessor<std::vector<double>>
+ : public DirectValueAccessor<std::vector<double>>
{
public:
AttributeAdapter(std::vector<double>& value)
- : ValueReference<std::vector<double>>(value)
+ : DirectValueAccessor<std::vector<double>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<double>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<double>& get() override;
- void set(const std::vector<double>& value) override;
};
/// \brief Access a vector<string>
template <>
class NGRAPH_API AttributeAdapter<std::vector<std::string>>
- : public ValueReference<std::vector<std::string>>,
- public ValueAccessor<std::vector<std::string>>
+ : public DirectValueAccessor<std::vector<std::string>>
{
public:
AttributeAdapter(std::vector<std::string>& value)
- : ValueReference<std::vector<std::string>>(value)
+ : DirectValueAccessor<std::vector<std::string>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<vector<string>>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<std::string>& get() override;
- void set(const std::vector<std::string>& value) override;
};
-
- template <typename A, typename B>
- A copy_from(B& b)
- {
- A result(b.size());
- for (int i = 0; i < b.size(); ++i)
- {
- result[i] =
- static_cast<typename std::remove_reference<decltype(result[i])>::type>(b[i]);
- }
- return result;
- }
}
--- /dev/null
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/attribute_visitor.hpp"
+#include "ngraph/attribute_adapter.hpp"
+#include "ngraph/node.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+void AttributeVisitor::start_structure(const string& name)
+{
+ m_context.push_back(name);
+}
+
+string AttributeVisitor::finish_structure()
+{
+ string result = m_context.back();
+ m_context.pop_back();
+ return result;
+}
+
+string AttributeVisitor::get_name_with_context()
+{
+ ostringstream result;
+ string sep = "";
+ for (auto c : m_context)
+ {
+ result << sep << c;
+ sep = ".";
+ }
+ return result.str();
+}
+
+void AttributeVisitor::on_adapter(const std::string& name, VisitorAdapter& adapter)
+{
+ adapter.visit_attributes(*this);
+}
+
+void AttributeVisitor::on_adapter(const std::string& name, ValueAccessor<void*>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<string>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+};
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<bool>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+};
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<int8_t>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<int16_t>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<int32_t>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<int64_t>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<uint8_t>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<uint16_t>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<uint32_t>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<uint64_t>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<float>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<double>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<int8_t>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<int16_t>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<int32_t>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<int64_t>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<uint8_t>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<uint16_t>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<uint32_t>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<uint64_t>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<float>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<double>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+void AttributeVisitor::on_adapter(const string& name, ValueAccessor<std::vector<string>>& adapter)
+{
+ on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
+}
+
+const AttributeVisitor::node_id_t AttributeVisitor::invalid_node_id = "";
+
+void AttributeVisitor::register_node(const std::shared_ptr<Node>& node, node_id_t id)
+{
+ if (id == invalid_node_id)
+ {
+ id = node->get_friendly_name();
+ }
+ m_id_node_map[id] = node;
+ m_node_id_map[node] = id;
+}
+
+std::shared_ptr<Node> AttributeVisitor::get_registered_node(node_id_t id)
+{
+ auto it = m_id_node_map.find(id);
+ return it == m_id_node_map.end() ? shared_ptr<Node>() : it->second;
+}
+
+AttributeVisitor::node_id_t
+ AttributeVisitor::get_registered_node_id(const std::shared_ptr<Node>& node)
+{
+ auto it = m_node_id_map.find(node);
+ return it == m_node_id_map.end() ? invalid_node_id : it->second;
+}
#pragma once
#include <string>
+#include <unordered_map>
#include <utility>
#include "ngraph/partial_shape.hpp"
{
template <typename T>
class ValueAccessor;
+ class VisitorAdapter;
+ class Node;
- /// \brief Visits the attributes of a node.
+ /// \brief Visits the attributes of a node, primarily for serialization-like tasks.
///
- /// Attributes are the values set when building a graph which are not
- /// computed as the graph executes. Values computed from the graph topology and attributes
- /// during compilation are not attributes.
+ /// Attributes are the node parameters that are always compile-time constants.
+ /// Values computed from the graph topology and attributes during compilation are not
+ /// attributes.
+ ///
+ /// Attributes have a wide variety of types, but serialization formats are more restricted.
+ /// We asume serialation easily supports scalar types of bool 64-bit signed, string, and double,
+ /// and has specialized ways to support numeric arrays and raw data+size. The visitor and
+ /// adapter convert between the limited serialization types and the unlimited attribute types.
+ ///
+ /// A visitor is passed to an op's visit_attributes method. The visit_attributes method calls
+ /// the template method visitor.on_attribute<AT>(const std::string& name, AT& value) on each
+ /// attribute. The visitor can read or write the attribute's value. The on_attribute
+ /// method creates an AttributeAdapter<AT> for the value and passes it to one of the visitors
+ /// on_adapter methods. The on_adapter methods expect a reference to a ValueAccessor<VAT> or a
+ /// VisitorAdapter. A ValueAccessor<VAT> has get/set methods that can be used to read/write the
+ /// attribute value as type VAT. These methods are triggered by deriving AttributeAdapter<AT>
+ /// from ValueAccessor<VAT>. For more complex cases, such as structs, the on_adapter method for
+ /// VisitorAdapter passes the name and visitor to the adapter, so that the adapter can perform
+ /// additional work such as visiting struct members or sequence values.
+ ///
+ /// When a node visits an attribute with structure, the node's on_attribute passes a name for
+ /// the entire attribute, but the struct will have its own methods to be visited. Similarly, a
+ /// vector will have a sequence of members to be visited. The adapter may use the visitor
+ /// methods start_struct/finish_struct and start_vector/next_vector/finish_vector to inidicate
+ /// nexted members.
+ ///
+ /// The visitor method get_name_with_context creates a generic nested version of the name.
+ /// Visitors can override according to their serialization requirements.
+ ///
+ /// Attributes that are shared_ptr<Node> are special. They must have been already been
+ /// registered with the visitor using register_node, which needs a shared pointer to a node and
+ /// a string ID. The ID string will be used to serialize the node or find the node during
+ /// deserialization.
class NGRAPH_API AttributeVisitor
{
public:
virtual ~AttributeVisitor() {}
// Must implement these methods
- virtual void on_attribute(const std::string& name, std::string& value) = 0;
- virtual void on_attribute(const std::string& name, bool& value) = 0;
- virtual void on_attribute(const std::string& name, void* data, size_t size) {}
+ /// \brief handles all specialized on_adapter methods implemented by the visitor.
+ ///
+ /// The adapter implements get_type_info(), which can be used to determine the adapter
+ /// directly
+ /// or via is_type and as_type on any platform
virtual void on_adapter(const std::string& name, ValueAccessor<void>& adapter) = 0;
// The remaining adapter methods fall back on the void adapter if not implemented
- virtual void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- };
- virtual void on_adapter(const std::string& name, ValueAccessor<int8_t>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<int16_t>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<int32_t>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<uint8_t>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<uint16_t>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<uint32_t>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<uint64_t>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<float>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<double>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ virtual void on_adapter(const std::string& name, ValueAccessor<void*>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<bool>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<int8_t>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<int16_t>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<int32_t>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<uint8_t>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<uint16_t>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<uint32_t>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<uint64_t>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<float>& adapter);
+ virtual void on_adapter(const std::string& name, ValueAccessor<double>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<int8_t>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<int8_t>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<int16_t>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<int16_t>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<int32_t>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<int32_t>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<int64_t>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<int64_t>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<uint8_t>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<uint8_t>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<uint16_t>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<uint16_t>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<uint32_t>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<uint32_t>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<uint64_t>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- virtual void on_adapter(const std::string& name, ValueAccessor<std::vector<float>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<uint64_t>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<double>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
+ ValueAccessor<std::vector<float>>& adapter);
virtual void on_adapter(const std::string& name,
- ValueAccessor<std::vector<std::string>>& adapter)
- {
- on_adapter(name, static_cast<ValueAccessor<void>&>(adapter));
- }
- // Use an adapter for non-primitive types
- template <typename T>
- // typename std::enable_if<std::is_class<T>::value, void>::type
- void on_attribute(const std::string& name, T& value)
- {
- AttributeAdapter<T> adapter(value);
- on_adapter(name, adapter);
- }
- void on_attribute(const std::string& name, op::AutoBroadcastSpec& value)
- {
- AttributeAdapter<op::AutoBroadcastType> adapter(value.m_type);
- on_adapter(name, adapter);
- }
- void on_attribute(const std::string& name, op::BroadcastModeSpec& value)
- {
- AttributeAdapter<op::BroadcastType> adapter(value.m_type);
- on_adapter(name, adapter);
- }
+ ValueAccessor<std::vector<double>>& adapter);
+ virtual void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<std::string>>& adapter);
+ /// \brief Hook for adapters that need visitor access
+ virtual void on_adapter(const std::string& name, VisitorAdapter& adapter);
+
+ /// The generic visitor. There must be a definition of AttributeAdapter<T> that can convert
+ /// to a ValueAccessor<U> for one of the on_adpater methods.
+ template <typename AT>
+ void on_attribute(const std::string& name, AT& value)
+ {
+ AttributeAdapter<AT> adapter(value);
+ start_structure(name);
+ on_adapter(get_name_with_context(), adapter);
+ finish_structure();
+ }
+ /// \returns The nested context of visits
+ const std::vector<std::string>& get_context() const { return m_context; }
+ /// \returns context prepended to names
+ virtual std::string get_name_with_context();
+ /// \brief Start visiting a nested structure
+ virtual void start_structure(const std::string& name);
+ /// \brief Finish visiting a nested structure
+ virtual std::string finish_structure();
+ using node_id_t = std::string;
+ static const node_id_t invalid_node_id;
+ /// \brief Associate a node with an id.
+ ///
+ /// No node may be used as an attribute unless it has already been registered with an ID.
+ /// References to nodes are visited with a ValueAccessor of their ID.
+ virtual void register_node(const std::shared_ptr<Node>& node,
+ node_id_t id = invalid_node_id);
+ /// Returns the node with the given id, or nullptr if there is no registered node
+ virtual std::shared_ptr<Node> get_registered_node(node_id_t id);
+ /// Returns the id for the node, or -1 if the node is not registered
+ virtual node_id_t get_registered_node_id(const std::shared_ptr<Node>& node);
+
+ protected:
+ std::vector<std::string> m_context;
+ std::unordered_map<std::shared_ptr<Node>, node_id_t> m_node_id_map;
+ std::unordered_map<node_id_t, std::shared_ptr<Node>> m_id_node_map;
};
}
{
if (!m_buffer_valid)
{
- for (auto elt : m_value)
+ m_buffer.clear();
+ for (auto elt : m_ref)
{
m_buffer.push_back(elt);
}
+ m_buffer_valid = true;
}
return m_buffer;
}
void ngraph::AttributeAdapter<ngraph::AxisSet>::set(const std::vector<int64_t>& value)
{
- m_value = AxisSet();
+ m_ref = AxisSet();
for (auto elt : value)
{
- m_value.insert(elt);
+ m_ref.insert(elt);
}
m_buffer_valid = false;
}
};
template <>
- class NGRAPH_API AttributeAdapter<AxisSet> : public ValueReference<AxisSet>,
- public ValueAccessor<std::vector<int64_t>>
+ class NGRAPH_API AttributeAdapter<AxisSet> : public ValueAccessor<std::vector<int64_t>>
{
public:
AttributeAdapter(AxisSet& value)
- : ValueReference<AxisSet>(value)
+ : m_ref(value)
{
}
- static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<AxisSet>", 0};
- const DiscreteTypeInfo& get_type_info() const override { return type_info; }
const std::vector<int64_t>& get() override;
void set(const std::vector<int64_t>& value) override;
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<AxisSet>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ AxisSet& m_ref;
+ std::vector<int64_t> m_buffer;
+ bool m_buffer_valid{false};
};
NGRAPH_API
static_cast<std::vector<size_t>*>(this)->operator=(v);
return *this;
}
+
+constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter<ngraph::AxisVector>::type_info;
#include <ostream>
#include <vector>
+#include "ngraph/attribute_adapter.hpp"
#include "ngraph/ngraph_visibility.hpp"
namespace ngraph
NGRAPH_API AxisVector& operator=(AxisVector&& v) noexcept;
};
+ template <>
+ class NGRAPH_API AttributeAdapter<AxisVector>
+ : public IndirectVectorValueAccessor<AxisVector, std::vector<int64_t>>
+ {
+ public:
+ AttributeAdapter(AxisVector& value)
+ : IndirectVectorValueAccessor<AxisVector, std::vector<int64_t>>(value)
+ {
+ }
+
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<AxisVector>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ };
+
NGRAPH_API
std::ostream& operator<<(std::ostream& s, const AxisVector& axis_vector);
}
shared_ptr<Node> builder::opset1::flatten(const Output<Node>& value, int axis)
{
- if (value.get_partial_shape().is_static())
+ // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of
+ // input tensor. The last dimension is the product of the rest of input tensor dimensions:
+ // [d_{axis}, ..., d_n]
+ shared_ptr<Node> output_shape;
+ if (axis == 0)
+ {
+ output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {1, -1});
+ }
+ else if (axis == 1)
{
- auto data_shape = value.get_shape();
- // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of
- // input
- // tensor. The last dimension is the product of the rest of input tensor dimensions:
- // [d_{axis}, ..., d_n]
- size_t first_dim_size =
- accumulate(begin(data_shape), next(begin(data_shape), axis), 1UL, multiplies<size_t>());
-
- size_t last_dim_size =
- accumulate(next(begin(data_shape), axis), end(data_shape), 1UL, multiplies<size_t>());
-
- return builder::opset1::reshape(value, Shape{first_dim_size, last_dim_size});
+ output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {0, -1});
}
else
{
- shared_ptr<Node> output_shape;
- if (axis == 0)
- {
- output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {1, -1});
- }
- else if (axis == 1)
- {
- output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {0, -1});
- }
- else
- {
- const auto value_shape = make_shared<ngraph::opset1::ShapeOf>(value);
- const auto value_rank = make_shared<ngraph::opset1::ShapeOf>(value_shape);
- const auto axis_node = get_normalized_axis_node(value_rank, axis);
- const auto remaining_part_length =
- ngraph::opset1::Constant::create(element::i64, Shape{1}, {-1});
- const auto shape_split_lengths = make_shared<ngraph::opset1::Concat>(
- OutputVector{axis_node, remaining_part_length}, 0);
- const auto split_parts = make_shared<ngraph::opset1::VariadicSplit>(
- value_shape,
- ngraph::opset1::Constant::create(element::i64, Shape{}, {0}),
- shape_split_lengths);
- // We're reducing vectors thus, just single zero axis to reduce and keep dims to true.
- const auto first_part_dim = make_shared<ngraph::opset1::ReduceProd>(
- split_parts->get_output_as_single_output_node(0),
- ngraph::opset1::Constant::create(element::i64, Shape{}, {0}),
- true);
- // TODO, handle edge case where first part is empty - then should equal to one
- output_shape = make_shared<ngraph::opset1::Concat>(
- OutputVector{first_part_dim, remaining_part_length}, 0);
- }
-
- return make_shared<ngraph::opset1::Reshape>(value, output_shape, true)
- ->add_provenance_group_members_above({value});
+ const auto value_shape = make_shared<ngraph::opset1::ShapeOf>(value);
+ const auto value_rank = make_shared<ngraph::opset1::ShapeOf>(value_shape);
+ const auto axis_node = get_normalized_axis_node(value_rank, axis);
+
+ const auto first_part_dims = make_shared<ngraph::opset1::StridedSlice>(
+ value_shape,
+ ngraph::opset1::Constant::create(element::i64, {1}, {0}),
+ axis_node,
+ vector<int64_t>{},
+ vector<int64_t>{});
+ const auto first_part_dims_length = make_shared<ngraph::opset1::ReduceProd>(
+ first_part_dims, ngraph::opset1::Constant::create(element::i64, {}, {0}), true);
+
+ const auto remaining_part_length =
+ ngraph::opset1::Constant::create(element::i64, {1}, {-1});
+
+ output_shape = make_shared<ngraph::opset1::Concat>(
+ OutputVector{first_part_dims_length, remaining_part_length}, 0);
}
+ return make_shared<ngraph::opset1::Reshape>(value, output_shape, true)
+ ->add_provenance_group_members_above({value});
}
shared_ptr<Node> builder::opset1::expand_dims(const Output<Node>& value, size_t axis)
return *this;
}
-const vector<uint64_t>& AttributeAdapter<Coordinate>::get()
-{
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<uint64_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
-}
-
-void AttributeAdapter<Coordinate>::set(const vector<uint64_t>& value)
-{
- m_value = copy_from<Coordinate>(m_value);
- m_buffer_valid = false;
-}
-
constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter<ngraph::Coordinate>::type_info;
};
template <>
- class NGRAPH_API AttributeAdapter<Coordinate> : public ValueReference<Coordinate>,
- public ValueAccessor<std::vector<uint64_t>>
+ class NGRAPH_API AttributeAdapter<Coordinate>
+ : public IndirectVectorValueAccessor<Coordinate, std::vector<int64_t>>
{
public:
AttributeAdapter(Coordinate& value)
- : ValueReference<Coordinate>(value)
+ : IndirectVectorValueAccessor<Coordinate, std::vector<int64_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<Coordinate>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<uint64_t>& get() override;
- void set(const std::vector<uint64_t>& value) override;
};
NGRAPH_API
return *this;
}
-const vector<int64_t>& AttributeAdapter<CoordinateDiff>::get()
-{
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<int64_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
-}
-
-void AttributeAdapter<CoordinateDiff>::set(const vector<int64_t>& value)
-{
- m_value = copy_from<CoordinateDiff>(value);
- m_buffer_valid = false;
-}
-
constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter<ngraph::CoordinateDiff>::type_info;
};
template <>
- class NGRAPH_API AttributeAdapter<CoordinateDiff> : public ValueReference<CoordinateDiff>,
- public ValueAccessor<std::vector<int64_t>>
+ class NGRAPH_API AttributeAdapter<CoordinateDiff>
+ : public IndirectVectorValueAccessor<CoordinateDiff, std::vector<int64_t>>
+
{
public:
AttributeAdapter(CoordinateDiff& value)
- : ValueReference<CoordinateDiff>(value)
+ : IndirectVectorValueAccessor<CoordinateDiff, std::vector<int64_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<CoordinateDiff>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int64_t>& get() override;
- void set(const std::vector<int64_t>& value) override;
};
NGRAPH_API
--- /dev/null
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include "ngraph/attribute_adapter.hpp"
+#include "ngraph/attribute_visitor.hpp"
+#include "ngraph/factory.hpp"
+
+namespace ngraph
+{
+ template <typename BASE_TYPE>
+ class FactoryAttributeAdapter : public VisitorAdapter
+ {
+ public:
+ FactoryAttributeAdapter(std::shared_ptr<BASE_TYPE>& ref)
+ : m_ref(ref)
+ {
+ }
+
+ /// \brief Hook for extra processing before other attributes
+ virtual bool on_start(AttributeVisitor& /* visitor */) { return true; }
+ /// \brief Hook for extra processing after other attributes
+ virtual bool on_finish(AttributeVisitor& /* visitor */) { return true; }
+ bool visit_attributes(AttributeVisitor& visitor) override
+ {
+ if (on_start(visitor))
+ {
+ std::string type_info_name;
+ uint64_t type_info_version;
+ if (m_ref)
+ {
+ auto& type_info = m_ref->get_type_info();
+ type_info_name = type_info.name;
+ type_info_version = type_info.version;
+ }
+ visitor.on_attribute("name", type_info_name);
+ visitor.on_attribute("version", type_info_version);
+ if (!type_info_name.empty() && !m_ref)
+ {
+ m_ref = std::shared_ptr<BASE_TYPE>(FactoryRegistry<BASE_TYPE>::get().create(
+ DiscreteTypeInfo{type_info_name.c_str(), type_info_version}));
+ }
+ if (m_ref)
+ {
+ visitor.start_structure("value");
+ m_ref->visit_attributes(visitor);
+ visitor.finish_structure();
+ }
+ on_finish(visitor);
+ }
+ return true;
+ }
+
+ protected:
+ std::shared_ptr<BASE_TYPE>& m_ref;
+ };
+}
\ No newline at end of file
set(ONNX_IMPORT_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE INTERNAL "")
-target_link_libraries(onnx_importer PRIVATE ngraph onnx onnx_proto)
+target_link_libraries(onnx_importer PRIVATE onnx onnx_proto)
+target_link_libraries(onnx_importer PUBLIC ngraph)
set_target_properties(onnx_importer PROPERTIES
CXX_VISIBILITY_PRESET hidden
C_VISIBILITY_PRESET hidden
VISIBILITY_INLINES_HIDDEN ON
POSITION_INDEPENDENT_CODE ON)
-target_include_directories(onnx_importer SYSTEM PUBLIC ${ONNX_IMPORT_INCLUDE_DIR})
+target_include_directories(onnx_importer SYSTEM PUBLIC $<BUILD_INTERFACE:${ONNX_IMPORT_INCLUDE_DIR}>
+ $<INSTALL_INTERFACE:include/ngraph/frontend/onnx_import>)
target_include_directories(onnx_importer SYSTEM PRIVATE ${NGRAPH_INCLUDE_PATH}
- SYSTEM PRIVATE ${ONNX_INCLUDE_DIR} ${ONNX_PROTO_INCLUDE_DIR} ${Protobuf_INCLUDE_DIR})
+ ${ONNX_INCLUDE_DIR} ${ONNX_PROTO_INCLUDE_DIR} ${Protobuf_INCLUDE_DIR})
target_compile_definitions(onnx_importer PRIVATE ONNX_OPSET_VERSION=${ONNX_OPSET_VERSION})
target_compile_definitions(onnx_importer PRIVATE ONNX_IMPORTER_DLL_EXPORTS)
-install(TARGETS onnx_importer DESTINATION ${NGRAPH_INSTALL_LIB})
+install(TARGETS onnx_importer EXPORT ngraphTargets
+ RUNTIME DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph
+ ARCHIVE DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph
+ LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph)
#include "ngraph/opsets/opset0.hpp"
#include "ngraph/validation_util.hpp"
#include "utils/common.hpp"
+#include "utils/reshape.hpp"
namespace ngraph
{
{
const auto input = node.get_ng_inputs().at(0);
const auto& input_shape = input->get_output_partial_shape(0);
- const auto axis = node.get_attribute_value<std::int64_t>("axis", 1);
- const auto normalized_axis =
- ngraph::normalize_axis(node.get_description(), axis, input_shape.rank());
+ auto axis = node.get_attribute_value<std::int64_t>("axis", 1);
+ if (input_shape.rank().is_static())
+ {
+ axis = ngraph::normalize_axis(
+ node.get_description(), axis, input_shape.rank());
+ }
// reshape to 2D - "batch size" x "input feature dimensions" (NxD)
- const auto coerced_tensor =
- ngraph::builder::opset1::flatten(input, normalized_axis);
- const auto& coerced_shape = coerced_tensor->get_shape();
- const auto row_size = static_cast<int64_t>(coerced_shape.at(1));
+ const auto coerced_tensor = ngraph::builder::opset1::flatten(input, axis);
+
+ const auto coerced_tensor_shape =
+ std::make_shared<default_opset::ShapeOf>(coerced_tensor);
+ std::shared_ptr<ngraph::Node> row_size =
+ std::make_shared<default_opset::Gather>(
+ coerced_tensor_shape,
+ default_opset::Constant::create(element::i64, {1}, {1}),
+ default_opset::Constant::create(element::i64, {}, {0}));
+ row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size);
const auto indices_axis = 1;
const auto max_indices = std::make_shared<opset0::GetOutputElement>(
default_opset::TopK::SortType::NONE),
1);
- const auto depth =
- default_opset::Constant::create(ngraph::element::i64, Shape{}, {row_size});
const auto on_value =
default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
const auto off_value =
default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
const auto results = std::make_shared<default_opset::OneHot>(
- max_indices, depth, on_value, off_value, indices_axis);
+ max_indices, row_size, on_value, off_value, indices_axis);
const auto converted_results = std::make_shared<default_opset::Convert>(
results, input->get_element_type());
axes.insert(ax);
}
- auto attrs = default_opset::Interpolate::InterpolateAttrs();
+ auto attrs = ngraph::op::v0::InterpolateAttrs();
attrs.axes = axes;
- attrs.mode = as_enum<default_opset::Interpolate::InterpolateMode>(mode);
+ attrs.mode = mode;
+ attrs.align_corners = false;
if (scales->is_constant() && data_shape.is_static())
{
auto dm = m_map.find(domain);
if (dm == std::end(m_map))
{
- NGRAPH_WARN << "Domain '" << domain << "' not recognized by nGraph";
+ NGRAPH_DEBUG << "Domain '" << domain << "' not recognized by nGraph";
return OperatorSet{};
}
if (domain == "" && version > OperatorsBridge::LATEST_SUPPORTED_ONNX_OPSET_VERSION)
//*****************************************************************************
#include "ngraph/lambda.hpp"
-#include "validation_util.hpp"
+#include "ngraph/factory_adapter.hpp"
+#include "ngraph/graph_util.hpp"
+#include "ngraph/validation_util.hpp"
using namespace std;
using namespace ngraph;
evaluate_nodes(value_map, output_tensor_map, outputs);
return true;
}
+
+bool Lambda::visit_attributes(AttributeVisitor& visitor)
+{
+ visitor.on_attribute("parameters", m_parameters);
+ visitor.on_attribute("results", m_results);
+ return true;
+}
+
+constexpr DiscreteTypeInfo AttributeAdapter<shared_ptr<Lambda>>::type_info;
+
+AttributeAdapter<shared_ptr<Lambda>>::AttributeAdapter(shared_ptr<Lambda>& ref)
+ : m_ref(ref)
+{
+}
+
+class NodeAttributeAdapter : public FactoryAttributeAdapter<Node>
+{
+public:
+ using FactoryAttributeAdapter::FactoryAttributeAdapter;
+ bool on_start(AttributeVisitor& visitor) override
+ {
+ // Indicate that there is a node following
+ m_id = visitor.get_registered_node_id(m_ref);
+ m_set_id = (m_ref == nullptr);
+ visitor.on_attribute("id", m_id);
+ return m_ref == nullptr || m_id != AttributeVisitor::invalid_node_id;
+ }
+ bool on_finish(AttributeVisitor&) override
+ {
+ if (m_set_id && m_ref)
+ {
+ m_ref->set_friendly_name(m_id);
+ }
+ return true;
+ }
+ void visit(AttributeVisitor& visitor, const std::string& id)
+ {
+ visitor.start_structure(id);
+ visitor.on_adapter(id, *this);
+ visitor.finish_structure();
+ }
+ static constexpr DiscreteTypeInfo type_info{"Lambda.NodeAttributeAdapter", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ string m_id;
+ bool m_set_id;
+};
+
+constexpr DiscreteTypeInfo NodeAttributeAdapter::type_info;
+
+bool AttributeAdapter<shared_ptr<Lambda>>::visit_attributes(AttributeVisitor& visitor)
+{
+ if (m_ref->get_results().size() > 0)
+ {
+ NodeVector serialized_nodes;
+ {
+ // Start with all nodes not already serialized
+ visitor.start_structure("nodes");
+ NodeVector results;
+ for (auto result : m_ref->get_results())
+ {
+ results.push_back(result);
+ }
+
+ int64_t i = 0;
+ ostringstream index;
+ traverse_nodes(
+ results, [&i, &index, &visitor, &serialized_nodes](shared_ptr<Node> node) -> void {
+ if (AttributeVisitor::invalid_node_id == visitor.get_registered_node_id(node))
+ {
+ // This node hasn't been seen before
+ visitor.register_node(node);
+ index.str("");
+ index << i++;
+ string id = index.str();
+ NodeAttributeAdapter adapter(node);
+ adapter.visit(visitor, id);
+ serialized_nodes.push_back(node);
+ }
+ });
+ {
+ // Sentinel at end
+ index.str("");
+ index << i++;
+ string id = index.str();
+ shared_ptr<Node> null_node;
+ NodeAttributeAdapter adapter(null_node);
+ adapter.visit(visitor, id);
+ }
+ visitor.finish_structure();
+ }
+ {
+ // Now do all the edges
+ visitor.start_structure("edges");
+ int64_t i = 0;
+ ostringstream index;
+ for (auto node : serialized_nodes)
+ {
+ for (auto input : node->inputs())
+ {
+ index.str("");
+ index << i++;
+ string id = index.str();
+ visitor.start_structure(id);
+ string input_node_id = visitor.get_registered_node_id(node);
+ uint64_t input_index = input.get_index();
+ visitor.on_attribute("input_node", input_node_id);
+ visitor.on_attribute("input_index", input_index);
+ auto output = input.get_source_output();
+ string output_node_id =
+ visitor.get_registered_node_id(output.get_node_shared_ptr());
+ uint64_t output_index = output.get_index();
+ visitor.on_attribute("output_node", output_node_id);
+ visitor.on_attribute("output_index", output_index);
+ visitor.finish_structure();
+ }
+ }
+ {
+ // Add a sentinel
+ index.str("");
+ index << i++;
+ string id = index.str();
+ visitor.start_structure(id);
+ string input_node_id = AttributeVisitor::invalid_node_id;
+ visitor.on_attribute("input_node", input_node_id);
+ visitor.finish_structure();
+ }
+ visitor.finish_structure();
+ }
+ {
+ // Control dependencies
+ visitor.start_structure("control");
+ int64_t i = 0;
+ ostringstream index;
+ for (auto node : serialized_nodes)
+ {
+ for (auto control : node->get_control_dependencies())
+ {
+ index.str("");
+ index << i++;
+ string id = index.str();
+ visitor.start_structure(id);
+ string node_id = visitor.get_registered_node_id(node);
+ string dependency_id = visitor.get_registered_node_id(control);
+ visitor.on_attribute("node", node_id);
+ visitor.on_attribute("dependency", dependency_id);
+ visitor.finish_structure();
+ }
+ }
+ {
+ // Add a sentinel
+ index.str("");
+ index << i++;
+ string id = index.str();
+ visitor.start_structure(id);
+ string node_id = AttributeVisitor::invalid_node_id;
+ visitor.on_attribute("node", node_id);
+ visitor.finish_structure();
+ }
+ visitor.finish_structure();
+ }
+ }
+ else
+ {
+ NodeVector deserialized_nodes;
+ {
+ // Read the graph
+ visitor.start_structure("nodes");
+ int64_t i = 0;
+ ostringstream index;
+ while (true)
+ {
+ index.str("");
+ index << i++;
+ string id = index.str();
+ shared_ptr<Node> node;
+ NodeAttributeAdapter adapter(node);
+ adapter.visit(visitor, id);
+ if (node)
+ {
+ visitor.register_node(node);
+ deserialized_nodes.push_back(node);
+ }
+ else
+ {
+ break;
+ }
+ }
+ visitor.finish_structure();
+ }
+ {
+ visitor.start_structure("edges");
+ // Connect the nodes
+ int64_t i = 0;
+ ostringstream index;
+ bool more_edges = true;
+ while (more_edges)
+ {
+ index.str("");
+ index << i++;
+ string id = index.str();
+ visitor.start_structure(id);
+ string input_node_id;
+ visitor.on_attribute("input_node", input_node_id);
+ if (!input_node_id.empty())
+ {
+ shared_ptr<Node> input_node = visitor.get_registered_node(input_node_id);
+ NGRAPH_CHECK(input_node, "input node of edge not known");
+ uint64_t input_index;
+ string output_node_id;
+ uint64_t output_index;
+ visitor.on_attribute("input_index", input_index);
+ visitor.on_attribute("output_node", output_node_id);
+ visitor.on_attribute("output_index", output_index);
+ shared_ptr<Node> output_node = visitor.get_registered_node(output_node_id);
+ NGRAPH_CHECK(output_node, "output_node of edge not known");
+ input_node->set_argument(input_index, output_node->output(output_index));
+ }
+ else
+ {
+ more_edges = false;
+ }
+ visitor.finish_structure();
+ }
+ visitor.finish_structure();
+ }
+ {
+ // Control dependencies
+ visitor.start_structure("control");
+ int64_t i = 0;
+ ostringstream index;
+ bool more_control = true;
+ while (more_control)
+ {
+ index.str("");
+ index << i++;
+ string id = index.str();
+ visitor.start_structure(id);
+ string node_id;
+ visitor.on_attribute("node", node_id);
+ if (!node_id.empty())
+ {
+ shared_ptr<Node> node = visitor.get_registered_node(node_id);
+ NGRAPH_CHECK(node, "node of control edge not known");
+ string dependency_id;
+ visitor.on_attribute("dependency", dependency_id);
+ shared_ptr<Node> dependency = visitor.get_registered_node(dependency_id);
+ NGRAPH_CHECK(dependency, "dependency of control edge not known");
+ node->add_control_dependency(dependency);
+ }
+ else
+ {
+ more_control = false;
+ }
+ visitor.finish_structure();
+ }
+ visitor.finish_structure();
+ }
+ for (auto node : topological_sort(deserialized_nodes))
+ {
+ node->validate_and_infer_types();
+ }
+ }
+
+ {
+ // Finally visit the object attributes
+ visitor.start_structure("value");
+ m_ref->visit_attributes(visitor);
+ visitor.finish_structure();
+ }
+ return true;
+}
class NGRAPH_API Lambda
{
public:
+ virtual ~Lambda() {}
static constexpr DiscreteTypeInfo type_info{"Lamdba", 0};
const DiscreteTypeInfo& get_type_info() const { return type_info; }
/// Return the function parameters
+ virtual bool visit_attributes(AttributeVisitor& visitor);
const ParameterVector& get_parameters() const { return m_parameters; };
/// Index for parameter, or -1
int64_t get_parameter_index(const std::shared_ptr<op::Parameter>& parameter) const;
const HostTensorVector& input_tensors);
protected:
+ Lambda() = default;
Lambda(const ResultVector& results, const ParameterVector& parameters);
Lambda(const OutputVector& results, const ParameterVector& parameters);
ResultVector m_results;
ParameterVector m_parameters;
};
+
+ template <>
+ class NGRAPH_API AttributeAdapter<std::shared_ptr<Lambda>> : public VisitorAdapter
+ {
+ public:
+ AttributeAdapter(std::shared_ptr<Lambda>& ref);
+
+ bool visit_attributes(AttributeVisitor& visitor) override;
+
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<shared_ptr<Lambda>>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ std::shared_ptr<Lambda>& m_ref;
+ };
}
}
return false;
}
+
+constexpr DiscreteTypeInfo AttributeAdapter<shared_ptr<Node>>::type_info;
+
+AttributeAdapter<std::shared_ptr<Node>>::AttributeAdapter(std::shared_ptr<Node>& value)
+ : m_ref(value)
+{
+}
+
+bool AttributeAdapter<std::shared_ptr<Node>>::visit_attributes(AttributeVisitor& visitor)
+{
+ auto original_id = visitor.get_registered_node_id(m_ref);
+ auto id = original_id;
+ visitor.on_attribute("ID", id);
+ if (id != original_id)
+ {
+ m_ref = visitor.get_registered_node(id);
+ }
+ return true;
+}
+
+constexpr DiscreteTypeInfo AttributeAdapter<NodeVector>::type_info;
+
+AttributeAdapter<NodeVector>::AttributeAdapter(NodeVector& ref)
+ : m_ref(ref)
+{
+}
+
+bool AttributeAdapter<NodeVector>::visit_attributes(AttributeVisitor& visitor)
+{
+ int64_t size = m_ref.size();
+ visitor.on_attribute("size", size);
+ if (size != m_ref.size())
+ {
+ m_ref.resize(size);
+ }
+ ostringstream index;
+ for (int64_t i = 0; i < size; i++)
+ {
+ index.str("");
+ index << i;
+ string id;
+ if (m_ref[i])
+ {
+ id = visitor.get_registered_node_id(m_ref[i]);
+ }
+ visitor.on_attribute(index.str(), id);
+ if (!m_ref[i])
+ {
+ m_ref[i] = visitor.get_registered_node(id);
+ }
+ }
+ return true;
+}
bool operator>=(const RawNodeOutput& other) const { return !(*this < other); }
};
+ /// \brief Visits a reference to a node that has been registered with the visitor.
+ template <>
+ class NGRAPH_API AttributeAdapter<std::shared_ptr<Node>> : public VisitorAdapter
+ {
+ public:
+ AttributeAdapter(std::shared_ptr<Node>& value);
+
+ bool visit_attributes(AttributeVisitor& visitor) override;
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<std::shared_ptr<Node>>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ std::shared_ptr<Node>& m_ref;
+ };
+
+ template <>
+ class NGRAPH_API AttributeAdapter<NodeVector> : public VisitorAdapter
+ {
+ public:
+ AttributeAdapter(NodeVector& ref);
+
+ bool visit_attributes(AttributeVisitor& visitor) override;
+
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<NodeVector>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ NodeVector& m_ref;
+ };
+
using RawNodeOutputMap = std::map<RawNodeOutput, Output<Node>>;
class NGRAPH_API NodeValidationFailure : public CheckFailure
#endif
switch (get_element_type())
{
- case element::Type_t::boolean: rc = to_string(get_vector<char>()[index]); break;
+ case element::Type_t::boolean: rc = to_string(get_data_ptr<char>()[index]); break;
case element::Type_t::bf16:
- rc = to_cpp_string(static_cast<float>(get_vector<bfloat16>()[index]));
+ rc = to_cpp_string(static_cast<float>(get_data_ptr<bfloat16>()[index]));
break;
case element::Type_t::f16:
- rc = to_cpp_string(static_cast<float>(get_vector<float16>()[index]));
+ rc = to_cpp_string(static_cast<float>(get_data_ptr<float16>()[index]));
break;
- case element::Type_t::f32: rc = to_cpp_string(get_vector<float>()[index]); break;
- case element::Type_t::f64: rc = to_cpp_string(get_vector<double>()[index]); break;
- case element::Type_t::i8: rc = to_string(get_vector<int8_t>()[index]); break;
- case element::Type_t::i16: rc = to_string(get_vector<int16_t>()[index]); break;
- case element::Type_t::i32: rc = to_string(get_vector<int32_t>()[index]); break;
- case element::Type_t::i64: rc = to_string(get_vector<int64_t>()[index]); break;
+ case element::Type_t::f32: rc = to_cpp_string(get_data_ptr<float>()[index]); break;
+ case element::Type_t::f64: rc = to_cpp_string(get_data_ptr<double>()[index]); break;
+ case element::Type_t::i8: rc = to_string(get_data_ptr<int8_t>()[index]); break;
+ case element::Type_t::i16: rc = to_string(get_data_ptr<int16_t>()[index]); break;
+ case element::Type_t::i32: rc = to_string(get_data_ptr<int32_t>()[index]); break;
+ case element::Type_t::i64: rc = to_string(get_data_ptr<int64_t>()[index]); break;
case element::Type_t::u1:
- rc = to_string((get_vector<uint8_t>()[index / 8] >> (7 - (index % 8))) & 1);
+ rc = to_string((get_data_ptr<uint8_t>()[index / 8] >> (7 - (index % 8))) & 1);
break;
- case element::Type_t::u8: rc = to_string(get_vector<uint8_t>()[index]); break;
- case element::Type_t::u16: rc = to_string(get_vector<uint16_t>()[index]); break;
- case element::Type_t::u32: rc = to_string(get_vector<uint32_t>()[index]); break;
- case element::Type_t::u64: rc = to_string(get_vector<uint64_t>()[index]); break;
+ case element::Type_t::u8: rc = to_string(get_data_ptr<uint8_t>()[index]); break;
+ case element::Type_t::u16: rc = to_string(get_data_ptr<uint16_t>()[index]); break;
+ case element::Type_t::u32: rc = to_string(get_data_ptr<uint32_t>()[index]); break;
+ case element::Type_t::u64: rc = to_string(get_data_ptr<uint64_t>()[index]); break;
case element::Type_t::undefined: throw runtime_error("unsupported type");
case element::Type_t::dynamic: throw runtime_error("unsupported type");
}
// Filling in a fresh constant
allocate_buffer();
}
- visitor.on_attribute("value", get_data_ptr_nc(), shape_size(m_shape) * m_element_type.size());
+ visitor.on_attribute("value", m_data);
return true;
}
template <typename T>
std::vector<T> get_vector() const
{
- if (sizeof(T) > m_element_type.size() && shape_size(m_shape) > 0)
- {
- throw ngraph_error("Buffer over-read");
- }
-
- std::vector<T> rc;
- const T* p = static_cast<const T*>(get_data_ptr());
- for (size_t i = 0; i < shape_size(m_shape); i++)
- {
- rc.push_back(p[i]);
- }
- return rc;
+ const T* p = get_data_ptr<T>();
+ return std::vector<T>(p, p + shape_size(m_shape));
}
/// \brief Return the Constant's value as a vector cast to type T
template <typename T>
const T* get_data_ptr() const
{
+ if (sizeof(T) > m_element_type.size() && shape_size(m_shape) > 0)
+ {
+ throw ngraph_error("Buffer over-read");
+ }
+
return static_cast<const T*>(get_data_ptr());
}
constexpr NodeTypeInfo op::ShuffleChannels::type_info;
-op::ShuffleChannels::ShuffleChannels(const Output<Node>& data, const int axis, const size_t groups)
+op::ShuffleChannels::ShuffleChannels(const Output<Node>& data,
+ const int64_t axis,
+ const int64_t group)
: FusedOp({data})
, m_axis(axis)
- , m_groups{groups}
+ , m_group{group}
{
constructor_validate_and_infer_types();
}
bool ngraph::op::v0::ShuffleChannels::visit_attributes(AttributeVisitor& visitor)
{
visitor.on_attribute("axis", m_axis);
- visitor.on_attribute("groups", m_groups);
+ visitor.on_attribute("group", m_group);
return true;
}
"The 'axis' parameter for ShuffleChannels has to point to one of the "
"input tensor's shape dimensions.");
+ NODE_VALIDATION_CHECK(
+ this, m_group >= 1, "The 'group' parameter must be greater or equal to 1.");
+
const auto channel_dim_size = shape.at(axis_zb);
NODE_VALIDATION_CHECK(
this,
- channel_dim_size % m_groups == 0,
+ channel_dim_size % m_group == 0,
"The channel dimension size has to be a multiple of the groups parameter value.");
}
}
std::to_string(new_args.size()));
}
- return make_shared<ShuffleChannels>(new_args.at(0), m_axis, m_groups);
+ return make_shared<ShuffleChannels>(new_args.at(0), m_axis, m_group);
}
Shape op::ShuffleChannels::get_pre_shuffle_shape(const Shape& data_shape) const
// in general the resulting shape should contain the following values:
// [0]: ds[0] * ds[1] * ... * ds[m_axis-1] (or 1 if m_axis == 0)
- // [1]: m_groups
- // [2]: ds[axis] / m_groups
+ // [1]: m_group
+ // [2]: ds[axis] / m_group
// [3]: ds[axis+1] * ds[axis+2] * ... * ds[ds.size()-1] (or 1 if m_axis points to the last elem
// of ds)
Shape res(4, 1);
res[0] *= ds[i];
}
- res[1] = m_groups;
- res[2] = ds[axis_zb] / m_groups;
+ res[1] = m_group;
+ res[2] = ds[axis_zb] / m_group;
for (size_t i = axis_zb + 1; i < ds.size(); ++i)
{
/// that the index should be calculated from the back of the input
/// data
/// shape.
- /// \param groups - number of groups the channel dimension specified by axis should
+ /// \param group - number of group the channel dimension specified by axis should
/// be
/// split into
ShuffleChannels(const Output<Node>& data,
- const int axis = 1,
- const size_t groups = 1UL);
+ const int64_t axis = 1,
+ const int64_t group = 1);
bool visit_attributes(AttributeVisitor& visitor) override;
size_t get_zero_based_axis() const;
virtual std::shared_ptr<Node>
clone_with_new_inputs(const OutputVector& new_args) const override;
- int get_axis() const { return m_axis; }
- size_t get_groups() const { return m_groups; }
+ int64_t get_axis() const { return m_axis; }
+ int64_t get_group() const { return m_group; }
private:
/// \brief Generates a shape required to permute the data
///
/// \return A 4D tensor to be used to reshape the input data before shuffling it
Shape get_pre_shuffle_shape(const Shape& data_shape) const;
- int m_axis;
- size_t m_groups;
+ int64_t m_axis;
+ int64_t m_group;
};
}
using v0::ShuffleChannels;
{
m_is_relevant_to_shapes = is_relevant;
}
+
+constexpr DiscreteTypeInfo AttributeAdapter<ParameterVector>::type_info;
+
+AttributeAdapter<ParameterVector>::AttributeAdapter(ParameterVector& ref)
+ : m_ref(ref)
+{
+}
+
+bool AttributeAdapter<ParameterVector>::visit_attributes(AttributeVisitor& visitor)
+{
+ int64_t size = m_ref.size();
+ visitor.on_attribute("size", size);
+ if (size != m_ref.size())
+ {
+ m_ref.resize(size);
+ }
+ ostringstream index;
+ for (int64_t i = 0; i < size; i++)
+ {
+ index.str("");
+ index << i;
+ string id;
+ if (m_ref[i])
+ {
+ id = visitor.get_registered_node_id(m_ref[i]);
+ }
+ visitor.on_attribute(index.str(), id);
+ if (!m_ref[i])
+ {
+ m_ref[i] = as_type_ptr<op::v0::Parameter>(visitor.get_registered_node(id));
+ }
+ }
+ return true;
+}
using v0::Parameter;
}
using ParameterVector = std::vector<std::shared_ptr<op::Parameter>>;
+
+ template <>
+ class NGRAPH_API AttributeAdapter<ParameterVector> : public VisitorAdapter
+ {
+ public:
+ AttributeAdapter(ParameterVector& ref);
+
+ bool visit_attributes(AttributeVisitor& visitor) override;
+
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<ParameterVector>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ ParameterVector& m_ref;
+ };
}
return make_shared<Reshape>(new_args.at(0), m_input_order, m_output_shape);
}
+bool op::Reshape::visit_attributes(AttributeVisitor& visitor)
+{
+ visitor.on_attribute("input_order", m_input_order);
+ visitor.on_attribute("output_shape", m_output_shape);
+ return true;
+}
+
void op::Reshape::generate_adjoints(autodiff::Adjoints& adjoints, const OutputVector& deltas)
{
auto delta = deltas.at(0);
this, pattern_et.is_integral_number(), "Pattern must be an integral number.");
// check shapes
+ const PartialShape& input_pshape = get_input_partial_shape(0);
const PartialShape& pattern_shape = get_input_partial_shape(1);
NODE_VALIDATION_CHECK(this,
pattern_shape.rank().compatible(1),
else
{
std::vector<Dimension> partial_shape(output_rank.get_length());
- // Replace zeros and negatives with Dynamic dimensions as needed
- std::transform(out_shape_val.begin(),
- out_shape_val.end(),
- partial_shape.begin(),
- [&](const int64_t& v) {
- return (v < 0) ? Dimension()
- : ((v == 0 && m_special_zero) ? Dimension()
- : Dimension(v));
- });
+ // Replace zeros with Dynamic dimensions as needed
+ for (size_t i = 0; i < out_shape_val.size(); ++i)
+ {
+ const auto& v = out_shape_val[i];
+ if (v < 0)
+ {
+ partial_shape[i] = Dimension();
+ }
+ else if (v == 0 && m_special_zero)
+ {
+ partial_shape[i] = ((input_pshape.rank().is_static() &&
+ input_pshape.rank().get_length() == out_shape_val.size())
+ ? input_pshape[i]
+ : Dimension());
+ }
+ else
+ {
+ partial_shape[i] = Dimension(v);
+ }
+ }
- if (get_input_partial_shape(0).is_static())
+ if (input_pshape.is_static())
{
size_t output_elements = 1;
int negative_dim = -1;
- auto input_shape = get_input_partial_shape(0).to_shape();
+ auto input_shape = input_pshape.to_shape();
size_t input_elements = shape_size(input_shape);
for (size_t i = 0; i < output_rank.get_length(); i++)
{
virtual std::shared_ptr<Node>
clone_with_new_inputs(const OutputVector& new_args) const override;
+ bool visit_attributes(AttributeVisitor& visitor) override;
/// \return The order in which to iterate over input axes.
const AxisVector& get_input_order() const { return m_input_order; }
{
return false;
}
+
+constexpr DiscreteTypeInfo AttributeAdapter<ResultVector>::type_info;
+
+AttributeAdapter<ResultVector>::AttributeAdapter(ResultVector& ref)
+ : m_ref(ref)
+{
+}
+
+bool AttributeAdapter<ResultVector>::visit_attributes(AttributeVisitor& visitor)
+{
+ int64_t size = m_ref.size();
+ visitor.on_attribute("size", size);
+ if (size != m_ref.size())
+ {
+ m_ref.resize(size);
+ }
+ ostringstream index;
+ for (int64_t i = 0; i < size; i++)
+ {
+ index.str("");
+ index << i;
+ string id;
+ if (m_ref[i])
+ {
+ id = visitor.get_registered_node_id(m_ref[i]);
+ }
+ visitor.on_attribute(index.str(), id);
+ if (!m_ref[i])
+ {
+ m_ref[i] = as_type_ptr<op::v0::Result>(visitor.get_registered_node(id));
+ }
+ }
+ return true;
+}
using v0::Result;
}
using ResultVector = std::vector<std::shared_ptr<op::Result>>;
+
+ template <>
+ class NGRAPH_API AttributeAdapter<ResultVector> : public VisitorAdapter
+ {
+ public:
+ AttributeAdapter(ResultVector& ref);
+
+ bool visit_attributes(AttributeVisitor& visitor) override;
+
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<ResultVector>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ ResultVector& m_ref;
+ };
}
const auto axis_node = input_value(1).get_node_shared_ptr();
NODE_VALIDATION_CHECK(this, axis_node->is_constant(), "The 'axis' input node must be constant");
const auto axis_node_const = as_type_ptr<op::Constant>(axis_node);
- m_axis = axis_node_const->cast_vector<int64_t>()[0];
+ m_axis = axis_node_const->get_data_ptr<int64_t>()[0];
// Create dynamic-typed outputs. Actual shape/type will be computed during shape inference
for (size_t i = 0; i < std::max(m_splits.size(), m_num_split); i++)
//*****************************************************************************
#include "ngraph/op/tensor_iterator.hpp"
+#include "ngraph/factory.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/get_output_element_elimination.hpp"
#include "ngraph/specialize_function.hpp"
using namespace std;
using namespace ngraph;
-constexpr NodeTypeInfo op::TensorIterator::type_info;
+constexpr NodeTypeInfo op::v0::TensorIterator::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::SliceInputDescription::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::MergedInputDescription::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::InvariantInputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::SliceInputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::MergedInputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::InvariantInputDescription::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::BodyOutputDescription::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::ConcatOutputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::BodyOutputDescription::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::ConcatOutputDescription::type_info;
-constexpr DiscreteTypeInfo op::TensorIterator::BodyLambda::type_info;
+constexpr DiscreteTypeInfo op::v0::TensorIterator::BodyLambda::type_info;
-op::TensorIterator::TensorIterator(const OutputVector& values)
+bool op::v0::TensorIterator::BodyLambda::visit_attributes(AttributeVisitor& visitor)
+{
+ return true;
+}
+
+op::v0::TensorIterator::TensorIterator(const OutputVector& values)
: op::util::FusedOp(values)
{
}
-op::TensorIterator::InputDescription::InputDescription(uint64_t input_index,
- uint64_t body_parameter_index)
+op::v0::TensorIterator::InputDescription::InputDescription(uint64_t input_index,
+ uint64_t body_parameter_index)
: m_input_index(input_index)
, m_body_parameter_index(body_parameter_index)
{
}
-op::TensorIterator::SliceInputDescription::SliceInputDescription(uint64_t input_index,
- uint64_t body_parameter_index,
- int64_t start,
- int64_t stride,
- int64_t part_size,
- int64_t end,
- int64_t axis)
+bool op::v0::TensorIterator::InputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+ visitor.on_attribute("input_index", m_input_index);
+ visitor.on_attribute("body_parameter_index", m_body_parameter_index);
+ return true;
+}
+
+op::v0::TensorIterator::SliceInputDescription::SliceInputDescription(uint64_t input_index,
+ uint64_t body_parameter_index,
+ int64_t start,
+ int64_t stride,
+ int64_t part_size,
+ int64_t end,
+ int64_t axis)
: InputDescription(input_index, body_parameter_index)
, m_start(start)
, m_stride(stride)
{
}
-shared_ptr<op::TensorIterator::InputDescription>
- op::TensorIterator::SliceInputDescription::copy() const
+shared_ptr<op::v0::TensorIterator::InputDescription>
+ op::v0::TensorIterator::SliceInputDescription::copy() const
{
return make_shared<SliceInputDescription>(
m_input_index, m_body_parameter_index, m_start, m_stride, m_part_size, m_end, m_axis);
}
-op::TensorIterator::MergedInputDescription::MergedInputDescription(uint64_t input_index,
- uint64_t body_parameter_index,
- uint64_t body_value_index)
+bool op::v0::TensorIterator::SliceInputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+ InputDescription::visit_attributes(visitor);
+ visitor.on_attribute("start", m_start);
+ visitor.on_attribute("stride", m_stride);
+ visitor.on_attribute("part_size", m_part_size);
+ visitor.on_attribute("end", m_end);
+ visitor.on_attribute("axis", m_axis);
+ return true;
+}
+
+op::v0::TensorIterator::MergedInputDescription::MergedInputDescription(
+ uint64_t input_index, uint64_t body_parameter_index, uint64_t body_value_index)
: InputDescription(input_index, body_parameter_index)
, m_body_value_index(body_value_index)
{
}
-shared_ptr<op::TensorIterator::InputDescription>
- op::TensorIterator::MergedInputDescription::copy() const
+shared_ptr<op::v0::TensorIterator::InputDescription>
+ op::v0::TensorIterator::MergedInputDescription::copy() const
{
return make_shared<MergedInputDescription>(
m_input_index, m_body_parameter_index, m_body_value_index);
}
-op::TensorIterator::InvariantInputDescription::InvariantInputDescription(
+bool op::v0::TensorIterator::MergedInputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+ InputDescription::visit_attributes(visitor);
+ visitor.on_attribute("body_value_index", m_body_value_index);
+ return true;
+}
+
+op::v0::TensorIterator::InvariantInputDescription::InvariantInputDescription(
uint64_t input_index, uint64_t body_parameter_index)
: InputDescription(input_index, body_parameter_index)
{
}
-shared_ptr<op::TensorIterator::InputDescription>
- op::TensorIterator::InvariantInputDescription::copy() const
+shared_ptr<op::v0::TensorIterator::InputDescription>
+ op::v0::TensorIterator::InvariantInputDescription::copy() const
{
return make_shared<InvariantInputDescription>(m_input_index, m_body_parameter_index);
}
-op::TensorIterator::OutputDescription::OutputDescription(uint64_t body_value_index,
- uint64_t output_index)
+bool op::v0::TensorIterator::InvariantInputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+ InputDescription::visit_attributes(visitor);
+ return true;
+}
+
+op::v0::TensorIterator::OutputDescription::OutputDescription(uint64_t body_value_index,
+ uint64_t output_index)
: m_body_value_index(body_value_index)
, m_output_index(output_index)
{
}
-op::TensorIterator::ConcatOutputDescription::ConcatOutputDescription(uint64_t body_value_index,
- uint64_t output_index,
- int64_t start,
- int64_t stride,
- int64_t part_size,
- int64_t end,
- int64_t axis)
+bool op::v0::TensorIterator::OutputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+ visitor.on_attribute("body_value_index", m_body_value_index);
+ visitor.on_attribute("output_index", m_output_index);
+ return true;
+}
+
+op::v0::TensorIterator::ConcatOutputDescription::ConcatOutputDescription(uint64_t body_value_index,
+ uint64_t output_index,
+ int64_t start,
+ int64_t stride,
+ int64_t part_size,
+ int64_t end,
+ int64_t axis)
: OutputDescription(body_value_index, output_index)
, m_start(start)
, m_stride(stride)
{
}
-shared_ptr<op::TensorIterator::OutputDescription>
- op::TensorIterator::ConcatOutputDescription::copy() const
+bool op::v0::TensorIterator::ConcatOutputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+ OutputDescription::visit_attributes(visitor);
+ visitor.on_attribute("start", m_start);
+ visitor.on_attribute("stride", m_stride);
+ visitor.on_attribute("part_size", m_part_size);
+ visitor.on_attribute("end", m_end);
+ visitor.on_attribute("axis", m_axis);
+ return true;
+}
+
+shared_ptr<op::v0::TensorIterator::OutputDescription>
+ op::v0::TensorIterator::ConcatOutputDescription::copy() const
{
return make_shared<ConcatOutputDescription>(
m_body_value_index, m_output_index, m_start, m_stride, m_part_size, m_end, m_axis);
}
-op::TensorIterator::BodyOutputDescription::BodyOutputDescription(uint64_t body_value_index,
- uint64_t output_index,
- int64_t iteration)
+op::v0::TensorIterator::BodyOutputDescription::BodyOutputDescription(uint64_t body_value_index,
+ uint64_t output_index,
+ int64_t iteration)
: OutputDescription(body_value_index, output_index)
, m_iteration(iteration)
{
}
-shared_ptr<op::TensorIterator::OutputDescription>
- op::TensorIterator::BodyOutputDescription::copy() const
+shared_ptr<op::v0::TensorIterator::OutputDescription>
+ op::v0::TensorIterator::BodyOutputDescription::copy() const
{
return make_shared<BodyOutputDescription>(m_body_value_index, m_output_index, m_iteration);
}
-Input<Node> op::TensorIterator::input_for_value(const Output<Node>& value)
+bool op::v0::TensorIterator::BodyOutputDescription::visit_attributes(AttributeVisitor& visitor)
+{
+ OutputDescription::visit_attributes(visitor);
+ visitor.on_attribute("iteration", m_iteration);
+ return true;
+}
+
+namespace
+{
+}
+
+namespace ngraph
+{
+ template <>
+ FactoryRegistry<op::v0::TensorIterator::InputDescription>&
+ FactoryRegistry<op::v0::TensorIterator::InputDescription>::get()
+ {
+ static FactoryRegistry<op::v0::TensorIterator::InputDescription> registry;
+ static mutex init_guard;
+ if (registry.m_factory_map.size() == 0)
+ {
+ lock_guard<mutex> guard(init_guard);
+ if (registry.m_factory_map.size() == 0)
+ {
+ registry.register_factory<op::v0::TensorIterator::SliceInputDescription>();
+ registry.register_factory<op::v0::TensorIterator::MergedInputDescription>();
+ registry.register_factory<op::v0::TensorIterator::InvariantInputDescription>();
+ }
+ }
+ return registry;
+ }
+
+ constexpr DiscreteTypeInfo
+ AttributeAdapter<std::shared_ptr<op::TensorIterator::InputDescription>>::type_info;
+
+ constexpr DiscreteTypeInfo AttributeAdapter<
+ std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>>::type_info;
+
+ AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>>::
+ AttributeAdapter(std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>& ref)
+ : m_ref(ref)
+ {
+ }
+
+ bool AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>>::
+ visit_attributes(AttributeVisitor& visitor)
+ {
+ int64_t size = m_ref.size();
+ visitor.on_attribute("size", size);
+ if (size != m_ref.size())
+ {
+ m_ref.resize(size);
+ }
+ ostringstream index;
+ for (int64_t i = 0; i < size; i++)
+ {
+ index.str("");
+ index << i;
+ visitor.on_attribute(index.str(), m_ref[i]);
+ }
+ return true;
+ }
+
+ template <>
+ FactoryRegistry<op::v0::TensorIterator::OutputDescription>&
+ FactoryRegistry<op::v0::TensorIterator::OutputDescription>::get()
+ {
+ static FactoryRegistry<op::v0::TensorIterator::OutputDescription> registry;
+ static mutex init_guard;
+ // TODO: Add a lock
+ if (registry.m_factory_map.size() == 0)
+ {
+ lock_guard<mutex> guard(init_guard);
+ if (registry.m_factory_map.size() == 0)
+ {
+ registry.register_factory<op::v0::TensorIterator::ConcatOutputDescription>();
+ registry.register_factory<op::v0::TensorIterator::BodyOutputDescription>();
+ }
+ }
+ return registry;
+ }
+
+ constexpr DiscreteTypeInfo AttributeAdapter<
+ std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>>::type_info;
+
+ constexpr DiscreteTypeInfo
+ AttributeAdapter<std::shared_ptr<op::TensorIterator::OutputDescription>>::type_info;
+
+ AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>>::
+ AttributeAdapter(std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>& ref)
+ : m_ref(ref)
+ {
+ }
+
+ bool AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>>::
+ visit_attributes(AttributeVisitor& visitor)
+ {
+ int64_t size = m_ref.size();
+ visitor.on_attribute("size", size);
+ if (size != m_ref.size())
+ {
+ m_ref.resize(size);
+ }
+ ostringstream index;
+ for (int64_t i = 0; i < size; i++)
+ {
+ index.str("");
+ index << i;
+ visitor.on_attribute(index.str(), m_ref[i]);
+ }
+ return true;
+ }
+}
+
+bool op::v0::TensorIterator::visit_attributes(AttributeVisitor& visitor)
+{
+ if (!m_body)
+ {
+ m_body = make_shared<BodyLambda>();
+ }
+ shared_ptr<Lambda> lambda = m_body;
+ visitor.on_attribute("body", lambda);
+ visitor.on_attribute("input_descriptions", m_input_descriptions);
+ visitor.on_attribute("output_descriptions", m_output_descriptions);
+
+ return false;
+}
+
+Input<Node> op::v0::TensorIterator::input_for_value(const Output<Node>& value)
{
auto input_index = get_input_size();
set_argument(input_index, value);
return Input<Node>(this, input_index);
}
-void op::TensorIterator::set_sliced_input(const std::shared_ptr<op::Parameter>& body_parameter,
- const Output<Node>& value,
- int64_t start,
- int64_t stride,
- int64_t part_size,
- int64_t end,
- int64_t axis)
+void op::v0::TensorIterator::set_sliced_input(const std::shared_ptr<op::Parameter>& body_parameter,
+ const Output<Node>& value,
+ int64_t start,
+ int64_t stride,
+ int64_t part_size,
+ int64_t end,
+ int64_t axis)
{
m_input_descriptions.push_back(
make_shared<SliceInputDescription>(input_for_value(value).get_index(),
axis));
}
-void op::TensorIterator::set_merged_input(const std::shared_ptr<Parameter>& body_parameter,
- const Output<Node>& initial_value,
- const Output<Node>& successive_value)
+void op::v0::TensorIterator::set_merged_input(const std::shared_ptr<Parameter>& body_parameter,
+ const Output<Node>& initial_value,
+ const Output<Node>& successive_value)
{
m_input_descriptions.push_back(
make_shared<MergedInputDescription>(input_for_value(initial_value).get_index(),
m_body->get_result_index(successive_value)));
}
-void op::TensorIterator::set_invariant_input(const std::shared_ptr<Parameter>& body_parameter,
- const Output<Node>& value)
+void op::v0::TensorIterator::set_invariant_input(const std::shared_ptr<Parameter>& body_parameter,
+ const Output<Node>& value)
{
m_input_descriptions.push_back(make_shared<InvariantInputDescription>(
input_for_value(value).get_index(), m_body->get_parameter_index(body_parameter)));
}
-Output<Node> op::TensorIterator::get_iter_value(const Output<Node>& body_value, int64_t iteration)
+Output<Node> op::v0::TensorIterator::get_iter_value(const Output<Node>& body_value,
+ int64_t iteration)
{
auto output_index = get_output_size();
m_output_descriptions.push_back(make_shared<BodyOutputDescription>(
return Output<Node>(shared_from_this(), output_index);
}
-Output<Node> op::TensorIterator::get_concatenated_slices(const Output<Node>& body_value,
- int64_t start,
- int64_t stride,
- int64_t part_size,
- int64_t end,
- int64_t axis)
+Output<Node> op::v0::TensorIterator::get_concatenated_slices(const Output<Node>& body_value,
+ int64_t start,
+ int64_t stride,
+ int64_t part_size,
+ int64_t end,
+ int64_t axis)
{
auto output_index = get_output_size();
m_output_descriptions.push_back(make_shared<ConcatOutputDescription>(
return Output<Node>(shared_from_this(), output_index);
}
-NodeVector op::TensorIterator::decompose_op() const
+NodeVector op::v0::TensorIterator::decompose_op() const
{
// Stub
return NodeVector{};
}
-void op::TensorIterator::revalidate_and_infer_types_for_body_ops()
+void op::v0::TensorIterator::revalidate_and_infer_types_for_body_ops()
{
std::stack<std::shared_ptr<Node>, std::vector<std::shared_ptr<Node>>> nodes_to_do;
std::unordered_set<std::shared_ptr<Node>> nodes_done;
auto node = nodes_to_do.top();
if (nodes_done.count(node) == 0)
{
- NGRAPH_CHECK(as_type_ptr<op::TensorIterator>(node) == nullptr,
+ NGRAPH_CHECK(as_type_ptr<op::v0::TensorIterator>(node) == nullptr,
"No nested TensorIterator");
bool can_add = true;
size_t arg_count = node->get_input_size();
}
}
-void op::TensorIterator::validate_and_infer_types()
+void op::v0::TensorIterator::validate_and_infer_types()
{
NODE_VALIDATION_CHECK(this,
get_input_size() == m_input_descriptions.size(),
}
}
-std::shared_ptr<Node> op::TensorIterator::clone_with_new_inputs(const OutputVector& new_args) const
+std::shared_ptr<Node>
+ op::v0::TensorIterator::clone_with_new_inputs(const OutputVector& new_args) const
{
- auto op = make_shared<op::TensorIterator>(new_args);
+ auto op = make_shared<op::v0::TensorIterator>(new_args);
op->set_output_size(m_output_descriptions.size());
std::vector<::ngraph::element::Type> types(m_body->get_parameters().size());
if (new_shapes[input_description->m_body_parameter_index].is_static())
{
if (auto slice_in = ::ngraph::as_type_ptr<
- ngraph::op::TensorIterator::SliceInputDescription>(input_description))
+ ngraph::op::v0::TensorIterator::SliceInputDescription>(
+ input_description))
{
new_shapes[slice_in->m_body_parameter_index][slice_in->m_axis] =
slice_in->m_part_size;
}
return move(op);
}
+
+namespace ngraph
+{
+}
#include <vector>
+#include "ngraph/factory_adapter.hpp"
#include "ngraph/lambda.hpp"
#include "ngraph/op/parameter.hpp"
#include "ngraph/op/util/fused_op.hpp"
public:
static constexpr NodeTypeInfo type_info{"TensorIterator", 0};
const NodeTypeInfo& get_type_info() const override { return type_info; }
+ bool visit_attributes(AttributeVisitor& visitor) override;
// Forward declarations
class SliceInputDescription;
class MergedInputDescription;
class NGRAPH_API BodyLambda : public Lambda
{
public:
- static constexpr DiscreteTypeInfo type_info{"BodyLamdba", 0};
- const DiscreteTypeInfo& get_type_info() const { return type_info; }
+ using type_info_t = DiscreteTypeInfo;
+ static constexpr type_info_t type_info{"BodyLamdba", 0};
+ const type_info_t& get_type_info() const { return type_info; }
BodyLambda(const OutputVector& outputs, const ParameterVector& parameters)
: Lambda(outputs, parameters)
{
: Lambda(results, parameters)
{
}
+ BodyLambda() = default;
+ virtual bool visit_attributes(AttributeVisitor& visitor);
};
/// \brief Describes a connection between a TensorIterator input and the body.
/// \param input_index Position of the TensorIterator input
/// \param body_parameter Body parameter to receive input
InputDescription(uint64_t input_index, uint64_t body_parameter_index);
+ InputDescription() = default;
public:
+ using type_info_t = DiscreteTypeInfo;
virtual ~InputDescription() {}
virtual std::shared_ptr<InputDescription> copy() const = 0;
- virtual const DiscreteTypeInfo& get_type_info() const = 0;
+ virtual const type_info_t& get_type_info() const = 0;
+ virtual bool visit_attributes(AttributeVisitor& visitor);
- uint64_t m_input_index;
- uint64_t m_body_parameter_index;
+ uint64_t m_input_index{0};
+ uint64_t m_body_parameter_index{0};
};
/// \brief Describes a body input formed from slices of an input to
class NGRAPH_API SliceInputDescription : public InputDescription
{
public:
- static constexpr DiscreteTypeInfo type_info{"SliceInputDescription", 0};
- const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ static constexpr type_info_t type_info{"SliceInputDescription", 0};
+ const type_info_t& get_type_info() const override { return type_info; }
/// \param input_index Position of the TensorIterator input
/// \param body_parameter_index Body parameter position to receive input
/// \param start First index for slices
int64_t part_size,
int64_t end,
int64_t axis);
+ SliceInputDescription() = default;
std::shared_ptr<InputDescription> copy() const override;
-
- int64_t m_start;
- int64_t m_stride;
- int64_t m_part_size;
- int64_t m_end;
- int64_t m_axis;
+ bool visit_attributes(AttributeVisitor& visitor) override;
+ int64_t m_start{0};
+ int64_t m_stride{0};
+ int64_t m_part_size{0};
+ int64_t m_end{0};
+ int64_t m_axis{0};
};
/// \brief Describes a body input initialized from a TensorIterator input on the
class NGRAPH_API MergedInputDescription : public InputDescription
{
public:
- static constexpr DiscreteTypeInfo type_info{"MergedInputDescription", 0};
- const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ static constexpr type_info_t type_info{"MergedInputDescription", 0};
+ const type_info_t& get_type_info() const override { return type_info; }
/// \param input_index Position of the TensorIterator input supplying a
/// value to
/// body_parameter
MergedInputDescription(uint64_t input_index,
uint64_t body_parameter_index,
uint64_t body_value_index);
+ MergedInputDescription() = default;
std::shared_ptr<InputDescription> copy() const override;
-
- uint64_t m_body_value_index;
+ bool visit_attributes(AttributeVisitor& visitor) override;
+ uint64_t m_body_value_index{0};
};
class NGRAPH_API InvariantInputDescription : public InputDescription
{
public:
- static constexpr DiscreteTypeInfo type_info{"InvariantInputDescription", 0};
- const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ static constexpr type_info_t type_info{"InvariantInputDescription", 0};
+ const type_info_t& get_type_info() const override { return type_info; }
InvariantInputDescription(uint64_t input_index, uint64_t body_parameter_index);
+ InvariantInputDescription() = default;
std::shared_ptr<InputDescription> copy() const override;
+ bool visit_attributes(AttributeVisitor& visitor) override;
};
// Forward declarations
/// \param body_value_index A body value that produces the output
/// \param output_index The TensorIterator output index
OutputDescription(uint64_t body_value_index, uint64_t output_index);
+ OutputDescription() = default;
public:
+ using type_info_t = DiscreteTypeInfo;
virtual ~OutputDescription() {}
virtual std::shared_ptr<OutputDescription> copy() const = 0;
- virtual const DiscreteTypeInfo& get_type_info() const = 0;
+ virtual bool visit_attributes(AttributeVisitor& visitor);
+ virtual const type_info_t& get_type_info() const = 0;
- uint64_t m_body_value_index;
- uint64_t m_output_index;
+ uint64_t m_body_value_index{0};
+ uint64_t m_output_index{0};
};
/// \brief Produces an output by concatenating an output from each iteration
class NGRAPH_API ConcatOutputDescription : public OutputDescription
{
public:
- static constexpr DiscreteTypeInfo type_info{"ConcatOutputDescription", 0};
- const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ static constexpr type_info_t type_info{"ConcatOutputDescription", 0};
+ const type_info_t& get_type_info() const override { return type_info; }
/// \param body_value_index A body value that produces the output
/// \param output_index The TensorIterator output index
/// \param start First index for slices
int64_t part_size,
int64_t end,
int64_t axis);
+ ConcatOutputDescription() = default;
virtual std::shared_ptr<OutputDescription> copy() const override;
-
- int64_t m_start;
- int64_t m_stride;
- int64_t m_part_size;
- int64_t m_end;
- int64_t m_axis;
+ bool visit_attributes(AttributeVisitor& visitor) override;
+ int64_t m_start{0};
+ int64_t m_stride{0};
+ int64_t m_part_size{0};
+ int64_t m_end{0};
+ int64_t m_axis{0};
};
/// \brief Produces an output from a specific iteration
class NGRAPH_API BodyOutputDescription : public OutputDescription
{
public:
- static constexpr DiscreteTypeInfo type_info{"BodyOutputDescription", 0};
- const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ static constexpr type_info_t type_info{"BodyOutputDescription", 0};
+ const type_info_t& get_type_info() const override { return type_info; }
/// \param body_value_index A body value that produces the output
/// \param output_index The TensorIterator output index
/// \param iteration which iteration (typically -1, final) will supply the
BodyOutputDescription(uint64_t body_value_index,
uint64_t output_index,
int64_t iteration);
+ BodyOutputDescription() = default;
std::shared_ptr<OutputDescription> copy() const override;
-
- int64_t m_iteration;
+ bool visit_attributes(AttributeVisitor& visitor) override;
+ int64_t m_iteration{0};
};
/// \brief Indicate that a body parameter comes from slices of a value
}
using v0::TensorIterator;
}
+ template class NGRAPH_API FactoryRegistry<op::v0::TensorIterator::InputDescription>;
+
+ template <>
+ class NGRAPH_API AttributeAdapter<std::shared_ptr<op::TensorIterator::InputDescription>>
+ : public FactoryAttributeAdapter<op::TensorIterator::InputDescription>
+ {
+ public:
+ using FactoryAttributeAdapter::FactoryAttributeAdapter;
+ static constexpr DiscreteTypeInfo type_info{
+ "AttributeAdapter<std::shared_ptr<op::TensorIterator::InputDescription>>"
+ ">>",
+ 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ };
+
+ template <>
+ class NGRAPH_API
+ AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>>
+ : public VisitorAdapter
+ {
+ public:
+ AttributeAdapter(std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>& ref);
+
+ bool visit_attributes(AttributeVisitor& visitor) override;
+ static constexpr DiscreteTypeInfo type_info{
+ "AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>"
+ ">>",
+ 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ std::vector<std::shared_ptr<op::TensorIterator::InputDescription>>& m_ref;
+ };
+
+ template class NGRAPH_API FactoryRegistry<op::v0::TensorIterator::OutputDescription>;
+
+ template <>
+ class NGRAPH_API AttributeAdapter<std::shared_ptr<op::TensorIterator::OutputDescription>>
+ : public FactoryAttributeAdapter<op::TensorIterator::OutputDescription>
+ {
+ public:
+ using FactoryAttributeAdapter::FactoryAttributeAdapter;
+ static constexpr DiscreteTypeInfo type_info{
+ "AttributeAdapter<std::shared_ptr<op::TensorIterator::OutputDescription>>"
+ ">>",
+ 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ };
+
+ template <>
+ class NGRAPH_API
+ AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>>
+ : public VisitorAdapter
+ {
+ public:
+ AttributeAdapter(std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>& ref);
+
+ bool visit_attributes(AttributeVisitor& visitor) override;
+ static constexpr DiscreteTypeInfo type_info{
+ "AttributeAdapter<std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>"
+ ">>",
+ 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ std::vector<std::shared_ptr<op::TensorIterator::OutputDescription>>& m_ref;
+ };
}
//*****************************************************************************
#include <map>
+#include "ngraph/attribute_visitor.hpp"
#include "ngraph/check.hpp"
#include "ngraph/enum_names.hpp"
#include "ngraph/op/util/attr_types.hpp"
return allowed_values.at(type);
}
- NGRAPH_API constexpr DiscreteTypeInfo AttributeAdapter<op::AutoBroadcastSpec>::type_info;
+ bool AttributeAdapter<op::AutoBroadcastSpec>::visit_attributes(AttributeVisitor& visitor)
+ {
+ // Maintain back-compatibility
+ std::string name = visitor.finish_structure();
+ visitor.on_attribute(name, m_ref.m_type);
+ visitor.start_structure(name);
+ if (m_ref.m_type == op::AutoBroadcastType::PDPD)
+ {
+ visitor.on_attribute("auto_broadcast_axis", m_ref.m_axis);
+ }
+ return true;
+ }
+
+ constexpr DiscreteTypeInfo AttributeAdapter<op::AutoBroadcastSpec>::type_info;
+
+ bool AttributeAdapter<op::BroadcastModeSpec>::visit_attributes(AttributeVisitor& visitor)
+ {
+ // Maintain back-compatibility
+ std::string name = visitor.finish_structure();
+ visitor.on_attribute(name, m_ref.m_type);
+ visitor.start_structure(name);
+ if (m_ref.m_type == op::BroadcastType::PDPD)
+ {
+ visitor.start_structure(name);
+ visitor.on_attribute("axis", m_ref.m_axis);
+ visitor.finish_structure();
+ }
+ return true;
+ }
+
+ constexpr DiscreteTypeInfo AttributeAdapter<op::BroadcastModeSpec>::type_info;
}
{
enum class TopKSortType
{
- // Returned values are not sorted
+ // Returned values are not sorte
NONE,
// Sort result based on element indices
SORT_INDICES,
}
template <>
- class NGRAPH_API AttributeAdapter<op::AutoBroadcastSpec>
- : public ValueReference<op::AutoBroadcastSpec>, public ValueAccessor<void>
+ class AttributeAdapter<op::AutoBroadcastSpec> : public VisitorAdapter
{
public:
AttributeAdapter(op::AutoBroadcastSpec& value)
- : ValueReference<op::AutoBroadcastSpec>(value)
+ : m_ref(value)
{
}
+ bool visit_attributes(AttributeVisitor& visitor) override;
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<op::AutoBroadcastSpec>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ op::AutoBroadcastSpec& m_ref;
};
namespace op
}
};
}
+
template <>
- class NGRAPH_API AttributeAdapter<op::BroadcastModeSpec>
- : public ValueReference<op::BroadcastModeSpec>, public ValueAccessor<void>
+ class AttributeAdapter<op::BroadcastModeSpec> : public VisitorAdapter
{
public:
AttributeAdapter(op::BroadcastModeSpec& value)
- : ValueReference<op::BroadcastModeSpec>(value)
+ : m_ref(value)
{
}
+ bool visit_attributes(AttributeVisitor& visitor) override;
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<op::BroadcastModeSpec>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ op::BroadcastModeSpec& m_ref;
};
}
}
PartialShape result_shape{PartialShape::dynamic()};
-
- const auto shape_constant = as_type_ptr<op::v0::Constant>(input_value(1).get_node_shared_ptr());
-
- if (shape_constant)
+ auto input_rank = input_value(0).get_partial_shape().rank();
+ auto output_rank = input_value(1).get_partial_shape();
+ if (input_rank.is_static() && output_rank.is_static() && output_rank[0].is_static())
{
- result_shape = shape_constant->get_shape_val();
+ result_shape = PartialShape::dynamic(std::max(input_rank.get_length(), output_rank[0].get_length()));
}
- else if (auto concat = as_type_ptr<op::v0::Concat>(input_value(1).get_node_shared_ptr()))
+ const auto shape_constant = as_type_ptr<op::v0::Constant>(input_value(1).get_node_shared_ptr());
+
+ if (auto concat = as_type_ptr<op::v0::Concat>(input_value(1).get_node_shared_ptr()))
{
auto concat_inputs = concat->inputs();
if (m_mode.m_type == BroadcastType::NONE)
{
+ if (shape_constant)
+ {
+ result_shape = shape_constant->get_shape_val();
+ }
// Validate axes_mapping
if (get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static() &&
get_input_partial_shape(2).is_static())
NGRAPH_OP(Abs, ngraph::op::v0)
NGRAPH_OP(Acos, ngraph::op::v0)
-NGRAPH_OP(Acosh, ngraph::op::v3)
NGRAPH_OP(Add, ngraph::op::v1)
NGRAPH_OP(Asin, ngraph::op::v0)
-NGRAPH_OP(Asinh, ngraph::op::v3)
NGRAPH_OP(Atan, ngraph::op::v0)
-NGRAPH_OP(Atanh, ngraph::op::v3)
NGRAPH_OP(AvgPool, ngraph::op::v1)
NGRAPH_OP(BatchNormInference, ngraph::op::v0)
NGRAPH_OP(BinaryConvolution, ngraph::op::v1)
NGRAPH_OP(GroupConvolutionBackpropData, ngraph::op::v1)
NGRAPH_OP(GRN, ngraph::op::v0)
NGRAPH_OP(HardSigmoid, ngraph::op::v0)
-NGRAPH_OP(Interpolate, ngraph::op::v3)
+NGRAPH_OP(Interpolate, ngraph::op::v0)
NGRAPH_OP(Less, ngraph::op::v1)
NGRAPH_OP(LessEqual, ngraph::op::v1)
NGRAPH_OP(Log, ngraph::op::v0)
// Superseded
// NGRAPH_OP(ShapeOf, ngraph::op::v0)
-// Moved out of opset2, it was added to opset1 by mistake
-// NGRAPH_OP(ShuffleChannels, ngraph::op::v0)
-
NGRAPH_OP(Sign, ngraph::op::v0)
NGRAPH_OP(Sigmoid, ngraph::op::v0)
NGRAPH_OP(Sin, ngraph::op::v0)
NGRAPH_OP(ROIAlign, ngraph::op::v3)
NGRAPH_OP(ScatterElementsUpdate, ngraph::op::v3)
NGRAPH_OP(ScatterUpdate, ngraph::op::v3)
-NGRAPH_OP(ScatterNDUpdate, ngraph::op::v3)
NGRAPH_OP(ShuffleChannels, ngraph::op::v0)
NGRAPH_OP(ShapeOf, ngraph::op::v3)
NGRAPH_OP(TopK, ngraph::op::v3)
return m_dimensions[i];
}
+const std::vector<int64_t>& ngraph::AttributeAdapter<ngraph::PartialShape>::get()
+{
+ if (!m_buffer_valid)
+ {
+ m_buffer.clear();
+ if (m_ref.rank().is_dynamic())
+ {
+ m_buffer.push_back(-2);
+ }
+ else
+ {
+ for (size_t i = 0; i < m_ref.rank().get_length(); ++i)
+ {
+ auto& elt = m_ref[i];
+ m_buffer.push_back(elt.is_dynamic() ? -1 : elt.get_length());
+ }
+ }
+ m_buffer_valid = true;
+ }
+ return m_buffer;
+}
+
+void ngraph::AttributeAdapter<ngraph::PartialShape>::set(const std::vector<int64_t>& value)
+{
+ m_ref = PartialShape();
+ if (value.size() == 1 && value[0] == -2)
+ {
+ m_ref = PartialShape::dynamic();
+ }
+ else
+ {
+ std::vector<Dimension> dims;
+ for (auto elt : value)
+ {
+ dims.push_back(elt == -1 ? Dimension::dynamic() : elt);
+ }
+ m_ref = PartialShape(dims);
+ }
+ m_buffer_valid = false;
+}
+
NGRAPH_API constexpr DiscreteTypeInfo AttributeAdapter<PartialShape>::type_info;
std::ostream& operator<<(std::ostream& str, const PartialShape& shape);
template <>
- class NGRAPH_API AttributeAdapter<PartialShape> : public ValueReference<PartialShape>,
- public ValueAccessor<void>
+ class NGRAPH_API AttributeAdapter<PartialShape> : public ValueAccessor<std::vector<int64_t>>
{
public:
AttributeAdapter(PartialShape& value)
- : ValueReference<PartialShape>(value)
+ : m_ref(value)
{
}
+ const std::vector<int64_t>& get() override;
+ void set(const std::vector<int64_t>& value) override;
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<PartialShape>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ operator PartialShape&() { return m_ref; }
+ protected:
+ PartialShape& m_ref;
+ std::vector<int64_t> m_buffer;
+ bool m_buffer_valid{false};
};
}
static shared_ptr<Node>
multiply_by(element::Type type, size_t multiplier, shared_ptr<op::Constant> cnst)
{
- T sum_cnst = static_cast<T>(cnst->get_vector<T>().at(0) * multiplier);
+ T sum_cnst = static_cast<T>(cnst->get_data_ptr<T>()[0] * multiplier);
return op::Constant::create<T>(type, Shape{}, {sum_cnst});
}
static shared_ptr<Node> pow_by(element::Type type, size_t multiplier, shared_ptr<op::Constant> cnst)
{
T prod = static_cast<T>(1);
- T val = cnst->get_vector<T>().at(0);
+ T val = cnst->get_data_ptr<T>()[0];
for (size_t i = 0; i < multiplier; i++)
{
prod *= val;
if (auto max = as_type_ptr<op::Max>(reduction_node))
{
- runtime::reference::max<T>(constant->get_vector<T>().data(),
+ runtime::reference::max<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
max->get_reduction_axes());
}
else if (auto reduce_max = as_type_ptr<op::v1::ReduceMax>(reduction_node))
{
- runtime::reference::max<T>(constant->get_vector<T>().data(),
+ runtime::reference::max<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
reduce_max->get_reduction_axes());
}
else if (auto min = as_type_ptr<op::Min>(reduction_node))
{
- runtime::reference::min<T>(constant->get_vector<T>().data(),
+ runtime::reference::min<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
min->get_reduction_axes());
}
else if (auto reduce_min = as_type_ptr<op::v1::ReduceMin>(reduction_node))
{
- runtime::reference::min<T>(constant->get_vector<T>().data(),
+ runtime::reference::min<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
reduce_min->get_reduction_axes());
}
else if (auto prod = as_type_ptr<op::Product>(reduction_node))
{
- runtime::reference::product<T>(constant->get_vector<T>().data(),
+ runtime::reference::product<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
prod->get_reduction_axes());
}
else if (auto reduce_prod = as_type_ptr<op::v1::ReduceProd>(reduction_node))
{
- runtime::reference::product<T>(constant->get_vector<T>().data(),
+ runtime::reference::product<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
reduce_prod->get_reduction_axes());
}
else if (auto sum = as_type_ptr<op::Sum>(reduction_node))
{
- runtime::reference::sum<T>(constant->get_vector<T>().data(),
+ runtime::reference::sum<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
sum->get_reduction_axes());
}
else if (auto reduce_sum = as_type_ptr<op::v1::ReduceSum>(reduction_node))
{
- runtime::reference::sum<T>(constant->get_vector<T>().data(),
+ runtime::reference::sum<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
reduce_sum->get_reduction_axes());
}
else if (auto reduce_mean = as_type_ptr<op::v1::ReduceMean>(reduction_node))
{
- runtime::reference::mean<T>(constant->get_vector<T>().data(),
+ runtime::reference::mean<T>(constant->get_data_ptr<T>(),
data_ptr,
constant->get_output_shape(0),
reduce_mean->get_reduction_axes());
TO* data_ptr = buffer.get_ptr<TO>();
runtime::reference::convert<TI, TO>(
- constant->get_vector<TI>().data(), data_ptr, shape_size(out_shape));
+ constant->get_data_ptr<TI>(), data_ptr, shape_size(out_shape));
return make_shared<op::Constant>(output_element_type, out_shape, data_ptr);
}
runtime::AlignedBuffer buffer(shape_size(out_shape) * sizeof(REAL));
REAL* data_ptr = buffer.get_ptr<REAL>();
- runtime::reference::dequantize<QUANT, REAL>(constant->get_vector<QUANT>().data(),
- scale->get_vector<REAL>().data(),
- offset->get_vector<QUANT>().data(),
+ runtime::reference::dequantize<QUANT, REAL>(constant->get_data_ptr<QUANT>(),
+ scale->get_data_ptr<REAL>(),
+ offset->get_data_ptr<QUANT>(),
data_ptr,
constant->get_shape(),
scale->get_shape(),
if (auto all = as_type_ptr<::ngraph::op::All>(reduction_node))
{
- runtime::reference::all(constant->get_vector<char>().data(),
+ runtime::reference::all(constant->get_data_ptr<char>(),
data_ptr,
constant->get_output_shape(0),
reduction_node->get_shape(),
}
else if (auto any = as_type_ptr<::ngraph::op::Any>(reduction_node))
{
- runtime::reference::any(constant->get_vector<char>().data(),
+ runtime::reference::any(constant->get_data_ptr<char>(),
data_ptr,
constant->get_output_shape(0),
reduction_node->get_shape(),
const auto reduction_axes = reduce_and->get_reduction_axes();
const auto input_shape = reduce_and->get_input_shape(0);
- runtime::reference::all(constant->get_vector<char>().data(),
+ runtime::reference::all(constant->get_data_ptr<char>(),
data_ptr,
constant->get_output_shape(0),
get_shape_no_keep_dims(reduction_axes, input_shape),
const auto reduction_axes = reduce_or->get_reduction_axes();
const auto input_shape = reduce_or->get_input_shape(0);
- runtime::reference::any(constant->get_vector<char>().data(),
+ runtime::reference::any(constant->get_data_ptr<char>(),
data_ptr,
constant->get_output_shape(0),
get_shape_no_keep_dims(reduction_axes, input_shape),
size_t axis)
{
std::vector<OUTPUT_TYPE> out_vec(shape_size(output_shape));
- runtime::reference::one_hot<INDICES_TYPE, OUTPUT_TYPE>(indices->get_data_ptr<INDICES_TYPE>(),
- out_vec.data(),
- indices->get_shape(),
- output_shape,
- axis,
- on_value->get_vector<OUTPUT_TYPE>()[0],
- off_value->get_vector<OUTPUT_TYPE>()[0]);
+ runtime::reference::one_hot<INDICES_TYPE, OUTPUT_TYPE>(
+ indices->get_data_ptr<INDICES_TYPE>(),
+ out_vec.data(),
+ indices->get_shape(),
+ output_shape,
+ axis,
+ on_value->get_data_ptr<OUTPUT_TYPE>()[0],
+ off_value->get_data_ptr<OUTPUT_TYPE>()[0]);
return make_shared<op::Constant>(on_value->get_element_type(), output_shape, out_vec);
}
runtime::AlignedBuffer buffer(shape_size(out_shape) * sizeof(QUANT));
QUANT* data_ptr = buffer.get_ptr<QUANT>();
- runtime::reference::quantize<REAL, QUANT>(constant->get_vector<REAL>().data(),
- scale->get_vector<REAL>().data(),
- offset->get_vector<QUANT>().data(),
+ runtime::reference::quantize<REAL, QUANT>(constant->get_data_ptr<REAL>(),
+ scale->get_data_ptr<REAL>(),
+ offset->get_data_ptr<QUANT>(),
data_ptr,
constant->get_shape(),
scale->get_shape(),
T* data_ptr = buffer.get_ptr<T>();
runtime::reference::reverse<T>(
- constant->get_vector<T>().data(), data_ptr, out_shape, out_shape, reversed_axes);
+ constant->get_data_ptr<T>(), data_ptr, out_shape, out_shape, reversed_axes);
return make_shared<op::Constant>(constant->get_output_element_type(0), out_shape, data_ptr);
}
}
return *this;
}
+
+namespace ngraph
+{
+ constexpr DiscreteTypeInfo AttributeAdapter<shared_ptr<runtime::AlignedBuffer>>::type_info;
+
+ AttributeAdapter<shared_ptr<runtime::AlignedBuffer>>::AttributeAdapter(
+ shared_ptr<runtime::AlignedBuffer>& value)
+ : m_ref(value)
+ {
+ }
+
+ void* AttributeAdapter<shared_ptr<runtime::AlignedBuffer>>::get_ptr()
+ {
+ return m_ref->get_ptr();
+ }
+ size_t AttributeAdapter<shared_ptr<runtime::AlignedBuffer>>::size() { return m_ref->size(); }
+}
char* m_aligned_buffer;
size_t m_byte_size;
};
+
+namespace ngraph
+{
+ template <>
+ class NGRAPH_API AttributeAdapter<std::shared_ptr<runtime::AlignedBuffer>>
+ : public ValueAccessor<void*>
+ {
+ public:
+ AttributeAdapter(std::shared_ptr<runtime::AlignedBuffer>& value);
+ void* get_ptr() override;
+ size_t size() override;
+
+ static constexpr DiscreteTypeInfo type_info{
+ "AttributeAdapter<std::shared_ptr<runtime::AlignedBuffer>>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ std::shared_ptr<runtime::AlignedBuffer>& m_ref;
+ };
+}
: m_json(j)
{
}
-
- void on_attribute(const std::string& name, std::string& value) override
+ void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
{
- m_json[name] = value;
+ NGRAPH_CHECK(false, "Adapter ", adapter.get_type_info().name, " is not handled");
}
- void on_attribute(const std::string& name, bool& value) override { m_json[name] = value; }
- void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
+ void on_adapter(const std::string& name, ValueAccessor<bool>& adapter) override
{
- if (auto a = as_type<AttributeAdapter<element::Type>>(&adapter))
- {
- m_json[name] = write_element_type(static_cast<element::Type&>(*a));
- }
- else if (auto a = as_type<AttributeAdapter<PartialShape>>(&adapter))
- {
- m_json[name] = write_partial_shape(static_cast<PartialShape&>(*a));
- }
+ m_json[name] = adapter.get();
}
void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
{
{
m_json[name] = adapter.get();
}
+ void on_adapter(const std::string& name, ValueAccessor<std::vector<uint64_t>>& adapter) override
+ {
+ m_json[name] = adapter.get();
+ }
void on_adapter(const std::string& name, ValueAccessor<std::vector<float>>& adapter) override
{
m_json[name] = adapter.get();
: m_json(j)
{
}
- void on_attribute(const std::string& name, std::string& value) override
- {
- if (has_key(m_json, name))
- {
- value = m_json.at(name).get<std::string>();
- }
- }
- void on_attribute(const std::string& name, bool& value) override
+ void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
{
- if (has_key(m_json, name))
- {
- value = m_json.at(name).get<bool>();
- }
+ NGRAPH_CHECK(false, "Adapter ", adapter.get_type_info().name, " is not handled");
}
- void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
+ void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
{
if (has_key(m_json, name))
{
- if (auto a = as_type<AttributeAdapter<element::Type>>(&adapter))
- {
- static_cast<element::Type&>(*a) =
- read_element_type(m_json.at(name).get<std::string>());
- }
- else if (auto a = as_type<AttributeAdapter<PartialShape>>(&adapter))
- {
- static_cast<PartialShape&>(*a) = read_partial_shape(m_json.at(name));
- }
+ adapter.set(m_json.at(name).get<std::string>());
}
}
- void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
+ void on_adapter(const std::string& name, ValueAccessor<bool>& adapter) override
{
if (has_key(m_json, name))
{
- adapter.set(m_json.at(name).get<std::string>());
+ adapter.set(m_json.at(name).get<bool>());
}
}
+
void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter) override
{
if (has_key(m_json, name))
adapter.set(m_json.at(name).get<std::vector<int64_t>>());
}
}
+ void on_adapter(const std::string& name, ValueAccessor<std::vector<uint64_t>>& adapter) override
+ {
+ if (has_key(m_json, name))
+ {
+ adapter.set(m_json.at(name).get<std::vector<uint64_t>>());
+ }
+ }
void on_adapter(const std::string& name, ValueAccessor<std::vector<float>>& adapter) override
{
if (has_key(m_json, name))
{
const auto tmp = static_cast<const op::ShuffleChannels*>(&n);
node["axis"] = tmp->get_axis();
- node["groups"] = tmp->get_groups();
+ node["groups"] = tmp->get_group();
break;
}
case OP_TYPEID::Sigmoid: { break;
return *this;
}
-const vector<int64_t>& AttributeAdapter<Shape>::get()
-{
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<int64_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
-}
-
-void AttributeAdapter<Shape>::set(const vector<int64_t>& value)
-{
- m_value = copy_from<Shape>(value);
- m_buffer_valid = false;
-}
-
constexpr DiscreteTypeInfo AttributeAdapter<Shape>::type_info;
};
template <>
- class NGRAPH_API AttributeAdapter<Shape> : public ValueReference<Shape>,
- public ValueAccessor<std::vector<int64_t>>
+ class NGRAPH_API AttributeAdapter<Shape>
+ : public IndirectVectorValueAccessor<Shape, std::vector<int64_t>>
+
{
public:
AttributeAdapter(Shape& value)
- : ValueReference<Shape>(value)
+ : IndirectVectorValueAccessor<Shape, std::vector<int64_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<Shape>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int64_t>& get() override;
- void set(const std::vector<int64_t>& value) override;
};
/// Number of elements in spanned by a shape
return *this;
}
-const vector<int64_t>& AttributeAdapter<Strides>::get()
-{
- if (!m_buffer_valid)
- {
- m_buffer = copy_from<vector<int64_t>>(m_value);
- m_buffer_valid = true;
- }
- return m_buffer;
-}
-
-void AttributeAdapter<Strides>::set(const vector<int64_t>& value)
-{
- m_value = copy_from<Strides>(value);
- m_buffer_valid = false;
-}
-
constexpr DiscreteTypeInfo AttributeAdapter<Strides>::type_info;
};
template <>
- class NGRAPH_API AttributeAdapter<Strides> : public ValueReference<Strides>,
- public ValueAccessor<std::vector<int64_t>>
+ class NGRAPH_API AttributeAdapter<Strides>
+ : public IndirectVectorValueAccessor<Strides, std::vector<int64_t>>
+
{
public:
AttributeAdapter(Strides& value)
- : ValueReference<Strides>(value)
+ : IndirectVectorValueAccessor<Strides, std::vector<int64_t>>(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<Strides>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
- const std::vector<int64_t>& get() override;
- void set(const std::vector<int64_t>& value) override;
};
NGRAPH_API
const std::string& AttributeAdapter<element::Type>::get()
{
- return as_string(static_cast<element::Type_t>(ValueReference<element::Type>::m_value));
+ return as_string(static_cast<element::Type_t>(m_ref));
}
void AttributeAdapter<element::Type>::set(const std::string& value)
{
- ValueReference<element::Type>::m_value = as_enum<element::Type_t>(value);
+ m_ref = as_enum<element::Type_t>(value);
}
};
template <>
- class NGRAPH_API AttributeAdapter<element::Type> : public ValueReference<element::Type>,
- public ValueAccessor<std::string>
+ class NGRAPH_API AttributeAdapter<element::Type> : public ValueAccessor<std::string>
{
public:
AttributeAdapter(element::Type& value)
- : ValueReference<element::Type>(value)
+ : m_ref(value)
{
}
static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<element::Type>", 0};
const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ operator element::Type&() { return m_ref; }
+ protected:
+ element::Type& m_ref;
};
/// \brief Return the number of bytes in the compile-time representation of the element type.
set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS
NGRAPH_INCLUDES="${PROJECT_SOURCE_DIR}/src/ngraph")
-# if (NGRAPH_IE_ENABLE)
-# if (ENABLE_MKL_DNN)
-# message(STATUS "NGRAPH_TESTS: IE:CPU enabled")
-# set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU")
-# endif()
-
-# if (ENABLE_CLDNN)
-# message(STATUS "NGRAPH_TESTS: IE:GPU enabled")
-# set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU")
-# endif()
-# endif()
+if (NGRAPH_IE_ENABLE)
+ if (ENABLE_MKL_DNN)
+ message(STATUS "NGRAPH_TESTS: IE:CPU enabled")
+ set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU")
+ endif()
+ if (ENABLE_CLDNN)
+ message(STATUS "NGRAPH_TESTS: IE:GPU enabled")
+ set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU")
+ endif()
+endif()
if (NGRAPH_INTERPRETER_ENABLE)
list(APPEND SRC
#include "ngraph/opsets/opset1.hpp"
#include "ngraph/opsets/opset3.hpp"
+#include "util/visitor.hpp"
+
using namespace std;
using namespace ngraph;
+using ngraph::test::NodeBuilder;
+using ngraph::test::ValueMap;
+
+TEST(attributes, value_map)
+{
+ ValueMap value_map;
+ bool a = true;
+ int8_t b = 2;
+ value_map.insert("a", a);
+ value_map.insert("b", b);
+ bool g_a = value_map.get<bool>("a");
+ int8_t g_b = value_map.get<int8_t>("b");
+ EXPECT_EQ(a, g_a);
+ EXPECT_EQ(b, g_b);
+}
enum class TuringModel
{
};
constexpr DiscreteTypeInfo AttributeAdapter<TuringModel>::type_info;
-} // namespace ngraph
+
+ struct Position
+ {
+ float x;
+ float y;
+ float z;
+ bool operator==(const Position& p) const { return x == p.x && y == p.y && z == p.z; }
+ Position& operator=(const Position& p)
+ {
+ x = p.x;
+ y = p.y;
+ z = p.z;
+ return *this;
+ }
+ };
+
+ template <>
+ class AttributeAdapter<Position> : public VisitorAdapter
+ {
+ public:
+ AttributeAdapter(Position& value)
+ : m_ref(value)
+ {
+ }
+ bool visit_attributes(AttributeVisitor& visitor) override
+ {
+ visitor.on_attribute("x", m_ref.x);
+ visitor.on_attribute("y", m_ref.y);
+ visitor.on_attribute("z", m_ref.z);
+ return true;
+ }
+ static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<Position>", 0};
+ const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+ protected:
+ Position& m_ref;
+ };
+
+ constexpr DiscreteTypeInfo AttributeAdapter<Position>::type_info;
+}
// Given a Turing machine program and data, return scalar 1 if the program would
// complete, 1 if it would not.
int16_t val_int16_t,
int32_t val_int32_t,
int64_t val_int64_t,
+ size_t val_size_t,
const std::vector<std::string>& vec_string,
const std::vector<float>& vec_float,
const std::vector<double>& vec_double,
const std::vector<int8_t>& vec_int8_t,
const std::vector<int16_t>& vec_int16_t,
const std::vector<int32_t>& vec_int32_t,
- const std::vector<int64_t>& vec_int64_t)
+ const std::vector<int64_t>& vec_int64_t,
+ const std::vector<size_t>& vec_size_t,
+ const Position& position,
+ const shared_ptr<Node>& node,
+ const NodeVector& node_vector,
+ const ParameterVector& parameter_vector,
+ const ResultVector& result_vector)
: Op({program, data})
, m_turing_model(turing_model)
, m_element_type(element_type)
, m_val_int16_t(val_int16_t)
, m_val_int32_t(val_int32_t)
, m_val_int64_t(val_int64_t)
+ , m_val_size_t(val_size_t)
, m_vec_string(vec_string)
, m_vec_float(vec_float)
, m_vec_double(vec_double)
, m_vec_int16_t(vec_int16_t)
, m_vec_int32_t(vec_int32_t)
, m_vec_int64_t(vec_int64_t)
+ , m_vec_size_t(vec_size_t)
+ , m_position(position)
+ , m_node(node)
+ , m_node_vector(node_vector)
+ , m_parameter_vector(parameter_vector)
+ , m_result_vector(result_vector)
{
}
int64_t get_val_int16_t() const { return m_val_int16_t; }
int64_t get_val_int32_t() const { return m_val_int32_t; }
int64_t get_val_int64_t() const { return m_val_int64_t; }
+ size_t get_val_size_t() const { return m_val_size_t; }
const vector<uint8_t>& get_vec_uint8_t() const { return m_vec_uint8_t; }
const vector<uint16_t>& get_vec_uint16_t() const { return m_vec_uint16_t; }
const vector<uint32_t>& get_vec_uint32_t() const { return m_vec_uint32_t; }
const vector<string>& get_vec_string() const { return m_vec_string; }
const vector<float>& get_vec_float() const { return m_vec_float; }
const vector<double>& get_vec_double() const { return m_vec_double; }
+ const vector<size_t>& get_vec_size_t() const { return m_vec_size_t; }
+ const Position& get_position() const { return m_position; }
+ const shared_ptr<Node>& get_node() const { return m_node; }
+ const NodeVector& get_node_vector() const { return m_node_vector; }
+ const ParameterVector& get_parameter_vector() const { return m_parameter_vector; }
+ const ResultVector& get_result_vector() const { return m_result_vector; }
shared_ptr<Node> clone_with_new_inputs(const OutputVector& args) const override
{
return make_shared<Oracle>(args[0],
m_val_int16_t,
m_val_int32_t,
m_val_int64_t,
+ m_val_size_t,
m_vec_string,
m_vec_float,
m_vec_double,
m_vec_int8_t,
m_vec_int16_t,
m_vec_int32_t,
- m_vec_int64_t);
+ m_vec_int64_t,
+ m_vec_size_t,
+ m_position,
+ m_node,
+ m_node_vector,
+ m_parameter_vector,
+ m_result_vector);
}
void validate_and_infer_types() override { set_output_type(0, element::i64, {}); }
visitor.on_attribute("val_int16_t", m_val_int16_t);
visitor.on_attribute("val_int32_t", m_val_int32_t);
visitor.on_attribute("val_int64_t", m_val_int64_t);
+ visitor.on_attribute("val_size_t", m_val_size_t);
visitor.on_attribute("vec_string", m_vec_string);
visitor.on_attribute("vec_float", m_vec_float);
visitor.on_attribute("vec_double", m_vec_double);
visitor.on_attribute("vec_int16_t", m_vec_int16_t);
visitor.on_attribute("vec_int32_t", m_vec_int32_t);
visitor.on_attribute("vec_int64_t", m_vec_int64_t);
+ visitor.on_attribute("vec_size_t", m_vec_size_t);
+ visitor.on_attribute("position", m_position);
+ visitor.on_attribute("node", m_node);
+ visitor.on_attribute("node_vector", m_node_vector);
+ visitor.on_attribute("parameter_vector", m_parameter_vector);
+ visitor.on_attribute("result_vector", m_result_vector);
return true;
}
int16_t m_val_int16_t;
int32_t m_val_int32_t;
int64_t m_val_int64_t;
+ size_t m_val_size_t{23};
vector<string> m_vec_string;
vector<float> m_vec_float;
vector<double> m_vec_double;
vector<int16_t> m_vec_int16_t;
vector<int32_t> m_vec_int32_t;
vector<int64_t> m_vec_int64_t;
+ vector<size_t> m_vec_size_t;
+ Position m_position;
+ shared_ptr<Node> m_node;
+ NodeVector m_node_vector;
+ ParameterVector m_parameter_vector;
+ ResultVector m_result_vector;
};
constexpr NodeTypeInfo Oracle::type_info;
-class NodeSaver : public AttributeVisitor
-{
-public:
- NodeSaver(shared_ptr<Node> node)
- : m_node_type_info(node->get_type_info())
- {
- node->visit_attributes(*this);
- }
- const NodeTypeInfo& get_node_type_info() { return m_node_type_info; }
- string& get_string(const string& name) { return m_strings.at(name); }
- bool get_bool(const string& name) { return m_bools.at(name); }
- float get_float(const string& name) { return m_doubles.at(name); }
- double get_double(const string& name) { return m_doubles.at(name); }
- int64_t get_signed(const string& name) { return m_signeds.at(name); }
- uint64_t get_unsigned(const string& name) { return m_unsigneds.at(name); }
- vector<float>& get_float_vector(const string& name) { return m_float_vectors.at(name); }
- vector<double>& get_double_vector(const string& name) { return m_double_vectors.at(name); }
- vector<int8_t>& get_int8_t_vector(const string& name) { return m_int8_t_vectors.at(name); }
- vector<int16_t>& get_int16_t_vector(const string& name) { return m_int16_t_vectors.at(name); }
- vector<int32_t>& get_int32_t_vector(const string& name) { return m_int32_t_vectors.at(name); }
- vector<int64_t>& get_int64_t_vector(const string& name) { return m_int64_t_vectors.at(name); }
- vector<uint8_t>& get_uint8_t_vector(const string& name) { return m_uint8_t_vectors.at(name); }
- vector<uint16_t>& get_uint16_t_vector(const string& name)
- {
- return m_uint16_t_vectors.at(name);
- }
- vector<uint32_t>& get_uint32_t_vector(const string& name)
- {
- return m_uint32_t_vectors.at(name);
- }
- vector<uint64_t>& get_uint64_t_vector(const string& name)
- {
- return m_uint64_t_vectors.at(name);
- }
-
- vector<string>& get_string_vector(const string& name) { return m_string_vectors.at(name); }
- HostTensorPtr get_host_tensor(const string& name) { return m_host_tensors.at(name); }
- void set_string(const string& name, const string& value) { m_strings[name] = value; }
- void set_bool(const string& name, bool value) { m_bools[name] = value; }
- void set_double(const string& name, double value) { m_doubles[name] = value; }
- void set_signed(const string& name, int64_t value) { m_signeds[name] = value; }
- void set_float_vector(const string& name, const vector<float>& value)
- {
- m_float_vectors[name] = value;
- }
- void set_double_vector(const string& name, const vector<double>& value)
- {
- m_double_vectors[name] = value;
- }
- void set_int8_t_vector(const string& name, const vector<int8_t>& value)
- {
- m_int8_t_vectors[name] = value;
- }
- void set_int16_t_vector(const string& name, const vector<int16_t>& value)
- {
- m_int16_t_vectors[name] = value;
- }
- void set_int32_t_vector(const string& name, const vector<int32_t>& value)
- {
- m_int32_t_vectors[name] = value;
- }
- void set_int64_t_vector(const string& name, const vector<int64_t>& value)
- {
- m_int64_t_vectors[name] = value;
- }
- void set_uint8_t_vector(const string& name, const vector<uint8_t>& value)
- {
- m_uint8_t_vectors[name] = value;
- }
- void set_uint16_t_vector(const string& name, const vector<uint16_t>& value)
- {
- m_uint16_t_vectors[name] = value;
- }
- void set_uint32_t_vector(const string& name, const vector<uint32_t>& value)
- {
- m_uint32_t_vectors[name] = value;
- }
- void set_uint64_t_vector(const string& name, const vector<uint64_t>& value)
- {
- m_uint64_t_vectors[name] = value;
- }
- void set_string_vector(const string& name, const vector<string>& value)
- {
- m_string_vectors[name] = value;
- }
- void set_host_tensor(const string& name, const HostTensorPtr& value)
- {
- m_host_tensors[name] = value;
- }
-
- void on_attribute(const string& name, string& value) override { set_string(name, value); };
- void on_attribute(const string& name, bool& value) override { set_bool(name, value); }
- void on_adapter(const string& name, ValueAccessor<void>& adapter) override
- {
- NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be marshalled");
- }
- // The remaining adapter methods fall back on the void adapter if not implemented
- void on_adapter(const string& name, ValueAccessor<string>& adapter) override
- {
- set_string(name, adapter.get());
- };
- void on_adapter(const string& name, ValueAccessor<int64_t>& adapter) override
- {
- set_signed(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<double>& adapter) override
- {
- set_double(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<string>>& adapter) override
- {
- set_string_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<float>>& adapter) override
- {
- set_float_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<double>>& adapter) override
- {
- set_double_vector(name, adapter.get());
- }
-
- void on_adapter(const string& name, ValueAccessor<vector<int8_t>>& adapter) override
- {
- set_int8_t_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<int16_t>>& adapter) override
- {
- set_int16_t_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<int32_t>>& adapter) override
- {
- set_int32_t_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<int64_t>>& adapter) override
- {
- set_int64_t_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<uint8_t>>& adapter) override
- {
- set_uint8_t_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<uint16_t>>& adapter) override
- {
- set_uint16_t_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<uint32_t>>& adapter) override
- {
- set_uint32_t_vector(name, adapter.get());
- }
- void on_adapter(const string& name, ValueAccessor<vector<uint64_t>>& adapter) override
- {
- set_uint64_t_vector(name, adapter.get());
- }
- void on_attribute(const std::string& name, void* constant_data, size_t size) override
- {
- HostTensorPtr data = make_shared<HostTensor>(element::u8, Shape{size});
- data->write(constant_data, size);
- set_host_tensor(name, data);
- }
-
-protected:
- NodeTypeInfo m_node_type_info;
- map<string, string> m_strings;
- map<string, bool> m_bools;
- map<string, double> m_doubles;
- map<string, int64_t> m_signeds;
- map<string, uint64_t> m_unsigneds;
- map<string, vector<int8_t>> m_int8_t_vectors;
- map<string, vector<int16_t>> m_int16_t_vectors;
- map<string, vector<int32_t>> m_int32_t_vectors;
- map<string, vector<int64_t>> m_int64_t_vectors;
- map<string, vector<uint8_t>> m_uint8_t_vectors;
- map<string, vector<uint16_t>> m_uint16_t_vectors;
- map<string, vector<uint32_t>> m_uint32_t_vectors;
- map<string, vector<uint64_t>> m_uint64_t_vectors;
- map<string, vector<float>> m_float_vectors;
- map<string, vector<double>> m_double_vectors;
- map<string, vector<std::string>> m_string_vectors;
- map<string, HostTensorPtr> m_host_tensors;
-};
-
-class NodeBuilder : public AttributeVisitor
-{
-public:
- NodeBuilder(const shared_ptr<Node>& node)
- : m_values(node)
- {
- }
-
- // Does not validate, since inputs aren't set
- shared_ptr<Node> create()
- {
- shared_ptr<Node> node(FactoryRegistry<Node>::get().create(m_values.get_node_type_info()));
- node->visit_attributes(*this);
- return node;
- }
-
- void on_attribute(const string& name, string& value) override
- {
- value = m_values.get_string(name);
- };
- void on_attribute(const string& name, bool& value) override { value = m_values.get_bool(name); }
- void on_adapter(const string& name, ValueAccessor<void>& adapter) override
- {
- NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be unmarshalled");
- }
- // The remaining adapter methods fall back on the void adapter if not implemented
- void on_adapter(const string& name, ValueAccessor<string>& adapter) override
- {
- adapter.set(m_values.get_string(name));
- };
- void on_adapter(const string& name, ValueAccessor<int64_t>& adapter) override
- {
- adapter.set(m_values.get_signed(name));
- }
- void on_adapter(const string& name, ValueAccessor<double>& adapter) override
- {
- adapter.set(m_values.get_double(name));
- }
-
- void on_adapter(const string& name, ValueAccessor<vector<int8_t>>& adapter) override
- {
- adapter.set(m_values.get_int8_t_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<int16_t>>& adapter) override
- {
- adapter.set(m_values.get_int16_t_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<int32_t>>& adapter) override
- {
- adapter.set(m_values.get_int32_t_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<int64_t>>& adapter) override
- {
- adapter.set(m_values.get_int64_t_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<uint8_t>>& adapter) override
- {
- adapter.set(m_values.get_uint8_t_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<uint16_t>>& adapter) override
- {
- adapter.set(m_values.get_uint16_t_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<uint32_t>>& adapter) override
- {
- adapter.set(m_values.get_uint32_t_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<uint64_t>>& adapter) override
- {
- adapter.set(m_values.get_uint64_t_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<string>>& adapter) override
- {
- adapter.set(m_values.get_string_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<float>>& adapter) override
- {
- adapter.set(m_values.get_float_vector(name));
- }
- void on_adapter(const string& name, ValueAccessor<vector<double>>& adapter) override
- {
- adapter.set(m_values.get_double_vector(name));
- }
- void on_attribute(const std::string& name, void* constant_data, size_t size) override
- {
- HostTensorPtr data = m_values.get_host_tensor(name);
- data->read(constant_data, size);
- }
-
-protected:
- NodeSaver m_values;
-};
-
TEST(attributes, user_op)
{
FactoryRegistry<Node>::get().register_factory<Oracle>();
auto program = make_shared<op::Parameter>(element::i32, Shape{200});
auto data = make_shared<op::Parameter>(element::i32, Shape{200});
+ auto result = make_shared<op::Result>(data);
auto oracle = make_shared<Oracle>(program,
data,
TuringModel::XL1200,
-2,
-4,
-8,
+ 34,
vector<string>{"Hello", "World"},
vector<float>{1.0f, 2.0f},
vector<double>{1.0, 2.0},
vector<int8_t>{1, 2, 4, 8},
vector<int16_t>{1, 2, 4, 8},
vector<int32_t>{1, 2, 4, 8},
- vector<int64_t>{1, 2, 4, 8});
- NodeBuilder builder(oracle);
+ vector<int64_t>{1, 2, 4, 8},
+ vector<size_t>{1, 3, 8, 4, 2},
+ Position{1.3f, 5.1f, 2.3f},
+ data,
+ NodeVector{program, result, data},
+ ParameterVector{data, data, program},
+ ResultVector{result});
+ NodeBuilder builder;
+ AttributeVisitor& saver = builder.get_node_saver();
+ AttributeVisitor& loader = builder.get_node_loader();
+ loader.register_node(program, "program");
+ ASSERT_EQ(loader.get_registered_node("program"), program);
+ ASSERT_EQ(loader.get_registered_node_id(program), "program");
+ loader.register_node(data, "data");
+ loader.register_node(result, "result");
+ saver.register_node(program, "program");
+ saver.register_node(data, "data");
+ saver.register_node(result, "result");
+ builder.save_node(oracle);
auto g_oracle = as_type_ptr<Oracle>(builder.create());
EXPECT_EQ(g_oracle->get_turing_model(), oracle->get_turing_model());
EXPECT_EQ(g_oracle->get_val_int16_t(), oracle->get_val_int16_t());
EXPECT_EQ(g_oracle->get_val_int32_t(), oracle->get_val_int32_t());
EXPECT_EQ(g_oracle->get_val_int64_t(), oracle->get_val_int64_t());
+ EXPECT_EQ(g_oracle->get_val_size_t(), oracle->get_val_size_t());
EXPECT_EQ(g_oracle->get_vec_uint8_t(), oracle->get_vec_uint8_t());
EXPECT_EQ(g_oracle->get_vec_uint16_t(), oracle->get_vec_uint16_t());
EXPECT_EQ(g_oracle->get_vec_uint32_t(), oracle->get_vec_uint32_t());
EXPECT_EQ(g_oracle->get_vec_string(), oracle->get_vec_string());
EXPECT_EQ(g_oracle->get_vec_float(), oracle->get_vec_float());
EXPECT_EQ(g_oracle->get_vec_double(), oracle->get_vec_double());
+ EXPECT_EQ(g_oracle->get_vec_size_t(), oracle->get_vec_size_t());
+ EXPECT_EQ(g_oracle->get_position(), oracle->get_position());
+ EXPECT_EQ(g_oracle->get_node(), oracle->get_node());
+ EXPECT_EQ(g_oracle->get_node_vector(), oracle->get_node_vector());
+ EXPECT_EQ(g_oracle->get_parameter_vector(), oracle->get_parameter_vector());
+ EXPECT_EQ(g_oracle->get_result_vector(), oracle->get_result_vector());
}
TEST(attributes, matmul_op)
EXPECT_EQ(g_matmul->get_transpose_b(), matmul->get_transpose_b());
}
+TEST(attributes, partial_shape)
+{
+ NodeBuilder builder;
+ AttributeVisitor& loader = builder.get_node_loader();
+ AttributeVisitor& saver = builder.get_node_saver();
+
+ PartialShape dyn = PartialShape::dynamic();
+ saver.on_attribute("dyn", dyn);
+ PartialShape g_dyn;
+ loader.on_attribute("dyn", g_dyn);
+ EXPECT_EQ(dyn, g_dyn);
+
+ PartialShape scalar{};
+ saver.on_attribute("scalar", scalar);
+ PartialShape g_scalar;
+ loader.on_attribute("scalar", g_scalar);
+ EXPECT_EQ(scalar, g_scalar);
+
+ PartialShape dyn_vector{Dimension::dynamic()};
+ saver.on_attribute("dyn_vector", dyn_vector);
+ PartialShape g_dyn_vector;
+ loader.on_attribute("dyn_vector", g_dyn_vector);
+ EXPECT_EQ(dyn_vector, g_dyn_vector);
+
+ PartialShape stat_vector{7};
+ saver.on_attribute("stat_vector", stat_vector);
+ PartialShape g_stat_vector;
+ loader.on_attribute("stat_vector", g_stat_vector);
+ EXPECT_EQ(stat_vector, g_stat_vector);
+
+ PartialShape general{7, Dimension::dynamic(), 2, Dimension::dynamic(), 4};
+ saver.on_attribute("general", general);
+ PartialShape g_general;
+ loader.on_attribute("general", g_general);
+ EXPECT_EQ(general, g_general);
+}
+
TEST(attributes, max_pool_op)
{
FactoryRegistry<Node>::get().register_factory<opset1::MaxPool>();
auto g_shuffle_channels = as_type_ptr<opset1::ShuffleChannels>(builder.create());
EXPECT_EQ(g_shuffle_channels->get_axis(), shuffle_channels->get_axis());
- EXPECT_EQ(g_shuffle_channels->get_groups(), shuffle_channels->get_groups());
+ EXPECT_EQ(g_shuffle_channels->get_group(), shuffle_channels->get_group());
}
TEST(attributes, softmax_op)
Shape shape{100, 200};
auto c1 = make_shared<op::Constant>(element::f16, shape, vector<float16>{123});
auto c2 = static_pointer_cast<op::Constant>(c1->clone_with_new_inputs({}));
- const float* p1 = c1->get_data_ptr<float>();
- const float* p2 = c2->get_data_ptr<float>();
+ const int16_t* p1 = c1->get_data_ptr<int16_t>();
+ const int16_t* p2 = c2->get_data_ptr<int16_t>();
EXPECT_EQ(p1, p2);
}
type {
tensor_type {
elem_type: 1
- shape {
- dim {
- dim_value: 1
- }
- dim {
- dim_value: 2
- }
- dim {
- dim_value: 2
- }
- dim {
- dim_value: 2
- }
- }
}
}
}
type {
tensor_type {
elem_type: 1
- shape {
- dim {
- dim_value: 1
- }
- dim {
- dim_value: 2
- }
- dim {
- dim_value: 4
- }
- }
}
}
}
test_case.run(4);
}
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_2)
-{
- auto function = onnx_import::import_onnx_model(
- file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_2.prototxt"));
-
- auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
- test_case.add_input<float>(SOFTMAX_INPUT);
-
- test_case.add_expected_output<float>(
- {0.80619486, 0.03075257, 0.1161086, 0.027393, 0.01955098, 0.07012682, 0.22670066,
- 0.18689779, 0.4614171, 0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265,
- 0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552,
-
- 0.30399806, 0.1076406, 0.03371745, 0.0950595, 0.4595844, 0.13369873, 0.04866969,
- 0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588,
- 0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432,
-
- 0.0402528, 0.31156222, 0.23747503, 0.1543129, 0.25639705, 0.10627912, 0.00436928,
- 0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175, 0.17468555, 0.34994439,
- 0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905});
-
- test_case.run(4);
-}
-
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_1D)
{
ASSERT_THROW(onnx_import::import_onnx_model(
test_case.run();
}
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_hardmax)
-{
- auto hardmax_fn = onnx_import::import_onnx_model(
- file_util::path_join(SERIALIZED_ZOO, "onnx/hardmax.prototxt"));
-
- auto test_case = ngraph::test::NgraphTestCase(hardmax_fn, "${BACKEND_NAME}");
- test_case.add_input<float>(
- {-2.02458119f, 0.00126542f, -0.58045743f, -0.75186814f, 0.9406899f,
- -0.513188f, 0.85887463f, 1.61444086f, 0.23801147f, -0.26816885f,
- 0.6597208f, 1.43889519f, 0.28798895f, 1.44769952f, -1.99466756f,
- 0.41386644f, 0.69389555f, 1.46118255f, -1.67628606f, 1.49697552f,
-
- 0.06337166f, -1.15740783f, 0.8792142f, -0.95352717f, -1.87895792f,
- -0.74066102f, -0.27131459f, 0.2219685f, 0.31831001f, 0.52495901f,
- 0.60283089f, 0.60397976f, 0.92401468f, 0.29565101f, -1.14443776f,
- -1.07399045f, -0.92266259f, 0.24017731f, -0.30105675f, 1.18513269f,
-
- 0.55494542f, 1.12119279f, -0.43156474f, 0.15101668f, -1.460439f,
- 0.96375129f, 1.10411785f, -0.30272771f, -0.48855848f, 0.12103213f,
- -0.71388492f, 1.38398178f, 0.21924434f, 0.93105052f, -0.21074303f,
- 0.48213503f, -1.37810638f, 8.99060285f, 0.54794592f, -0.46820172f});
-
- // values for hardmax with axis==2
- test_case.add_expected_output<float>(
- Shape{3, 4, 5}, {0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
-
- 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
- 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
-
- 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f});
-
- test_case.run();
-}
-
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shrink_float)
{
const auto shrink_fn = onnx_import::import_onnx_model(
}
}
+NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_flatten)
+{
+ auto function = onnx_import::import_onnx_model(
+ file_util::path_join(SERIALIZED_ZOO, "onnx/flatten.prototxt"));
+
+ std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
+ auto test_case =
+ ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}", BackendMode::DYNAMIC);
+ test_case.add_input<float>(Shape{1, 2, 2, 2}, data);
+ test_case.add_expected_output<float>(Shape{1, 8}, data);
+
+ test_case.run();
+}
+
NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_global_lp_dynamic_hw)
{
auto function = onnx_import::import_onnx_model(
test_case.add_input<int64_t>({2, 2, 2});
test_case.add_expected_output<float>(Shape{1, 1, 1}, {9});
}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_hardmax)
+{
+ auto hardmax_fn = onnx_import::import_onnx_model(
+ file_util::path_join(SERIALIZED_ZOO, "onnx/hardmax.prototxt"));
+
+ auto test_case =
+ ngraph::test::NgraphTestCase(hardmax_fn, "${BACKEND_NAME}", BackendMode::DYNAMIC);
+ test_case.add_input<float>(
+ {-2.02458119f, 0.00126542f, -0.58045743f, -0.75186814f, 0.9406899f,
+ -0.513188f, 0.85887463f, 1.61444086f, 0.23801147f, -0.26816885f,
+ 0.6597208f, 1.43889519f, 0.28798895f, 1.44769952f, -1.99466756f,
+ 0.41386644f, 0.69389555f, 1.46118255f, -1.67628606f, 1.49697552f,
+
+ 0.06337166f, -1.15740783f, 0.8792142f, -0.95352717f, -1.87895792f,
+ -0.74066102f, -0.27131459f, 0.2219685f, 0.31831001f, 0.52495901f,
+ 0.60283089f, 0.60397976f, 0.92401468f, 0.29565101f, -1.14443776f,
+ -1.07399045f, -0.92266259f, 0.24017731f, -0.30105675f, 1.18513269f,
+
+ 0.55494542f, 1.12119279f, -0.43156474f, 0.15101668f, -1.460439f,
+ 0.96375129f, 1.10411785f, -0.30272771f, -0.48855848f, 0.12103213f,
+ -0.71388492f, 1.38398178f, 0.21924434f, 0.93105052f, -0.21074303f,
+ 0.48213503f, -1.37810638f, 8.99060285f, 0.54794592f, -0.46820172f});
+
+ // values for hardmax with axis==2
+ test_case.add_expected_output<float>(
+ Shape{3, 4, 5}, {0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
+
+ 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
+ 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f,
+
+ 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f});
+
+ test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_softmax_axis_2)
+{
+ auto function = onnx_import::import_onnx_model(
+ file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_2.prototxt"));
+
+ const std::vector<float> input = {
+ 2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, 0.21004745, 1.38337255,
+ 1.19030397, 2.0940445, -0.03551657, -0.78686039, 1.992782, 0.04300319, -0.29230777,
+ -0.56797112, -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233,
+
+ 0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, -0.13259761, -1.14313018,
+ 0.2673723, -0.87996154, 1.29053106, 1.55, 0.8396538, 1.20729817, 0.23727845,
+ -0.89113606, -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615,
+
+ -1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, 0.75425957, -2.43721014,
+ -1.24478184, 2.65316853, 1.19509542, -0.95523998, 0.5149006, -0.01151649, 0.68327026,
+ -0.4589638, -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919};
+
+ auto test_case =
+ ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}", BackendMode::DYNAMIC);
+ test_case.add_input<float>(input);
+
+ test_case.add_expected_output<float>(
+ {0.80619486, 0.03075257, 0.1161086, 0.027393, 0.01955098, 0.07012682, 0.22670066,
+ 0.18689779, 0.4614171, 0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265,
+ 0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552,
+
+ 0.30399806, 0.1076406, 0.03371745, 0.0950595, 0.4595844, 0.13369873, 0.04866969,
+ 0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588,
+ 0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432,
+
+ 0.0402528, 0.31156222, 0.23747503, 0.1543129, 0.25639705, 0.10627912, 0.00436928,
+ 0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175, 0.17468555, 0.34994439,
+ 0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905});
+
+ test_case.run(4);
+}
test_case.run();
}
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_flatten)
-{
- auto function = onnx_import::import_onnx_model(
- file_util::path_join(SERIALIZED_ZOO, "onnx/flatten.prototxt"));
-
- auto input = test::NDArray<float, 4>({{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}}).get_vector();
- auto expected_output = test::NDArray<float, 3>({{{1, 2, 3, 4}, {5, 6, 7, 8}}}).get_vector();
-
- auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}");
- test_case.add_input(input);
- test_case.add_expected_output(expected_output);
- test_case.run();
-}
-
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_split_equal_parts_default)
{
auto function = onnx_import::import_onnx_model(
/// \param backend_constructor A BackendConstructor which will be called to
//// construct an instance of the registered backend.
static BACKEND_API void register_backend(const std::string& name,
- BackendConstructor backend_constructor);
+ BackendConstructor backend_constructor);
/// \brief Query the list of registered devices
/// \returns A vector of all registered devices.
expand_6_dyn_shape
expand_uint16_dyn_shape
minimum_int64
+floor_int64
+ceiling_int64
+matmul_2x2x3_2x3x1_int64
+matmul_2x2x3_2x1x3_transpose_int64
# TopK Incorrect input data/index values precision
onnx_model_argmax_int32
dyn_slice_109
dyn_slice_114
reduce_sum_keep_dynamic
-reduce_sum_keep_stable_simple_double
+reduce_sum_keep_stable_simple_double
reduce_sum_keep_stable_acc_double
reduce_sum_keep_stable_acc
reduce_sum_keep_3d_eliminate_zero_dim
tile_3d_small_data_rank
tile_3d_few_repeats
+# Error of validate layer: MatMul_683292 with type: Gemm. Gemm input shapes must have at least 2 dimensions
+matmul_2_2
+
# Result mismatch
sum_large_1d_to_scalar
sum_stable_acc
gather_4d_indices_no_axis_uint8
tensor_constant_with_op
constant_equality_bool
+reduce_product_matrix_rows
+reduce_product_3d_to_matrix_most_sig
+reduce_product_3d_to_matrix_least_sig
+reduce_product_keep_matrix_columns
+reduce_product_keep_matrix_rows
+reduce_product_keep_3d_to_matrix_most_sig
+reduce_product_keep_3d_to_matrix_least_sig
+reduce_product_matrix_columns_dynamic
+reduce_product_matrix_rows_dynamic
+reduce_product_keep_matrix_columns_dynamic
+reduce_product_keep_matrix_rows_dynamic
+reduce_min_matrix_columns
+reduce_min_matrix_rows
+reduce_min_matrix_rows_int32
+reduce_min_3d_to_matrix_most_sig
+reduce_min_3d_to_matrix_least_sig
+reduce_min_keep_matrix_columns
+reduce_min_keep_matrix_rows
+reduce_min_keep_matrix_rows_int32
+reduce_min_keep_3d_to_matrix_most_sig
+reduce_min_keep_3d_to_matrix_least_sig
+reduce_min_matrix_columns_dynamic
+reduce_min_matrix_rows_dynamic
+reduce_min_keep_matrix_columns_dynamic
+reduce_min_keep_matrix_rows_dynamic
+
+# zero dimension / result mismatch
+reduce_product_matrix_rows_zero
+reduce_product_matrix_cols_zero
+reduce_product_vector_zero
+reduce_product_matrix_to_scalar_zero_by_zero
+reduce_product_3d_eliminate_zero_dim
+reduce_product_to_scalar_int8
+reduce_product_keep_matrix_rows_zero
+reduce_product_keep_matrix_cols_zero
+reduce_product_keep_vector_zero
+reduce_product_keep_matrix_to_scalar_zero_by_zero
+reduce_product_keep_3d_eliminate_zero_dim
+reduce_product_keep_to_scalar_int8
+reduce_min_to_scalar_int8
+reduce_min_matrix_rows_zero
+reduce_min_matrix_cols_zero
+reduce_min_vector_zero
+reduce_min_matrix_to_scalar_zero_by_zero
+reduce_min_3d_eliminate_zero_dim
+reduce_min_keep_to_scalar_int8
+reduce_min_keep_matrix_rows_zero
+reduce_min_keep_matrix_cols_zero
+reduce_min_keep_vector_zero
+reduce_min_keep_matrix_to_scalar_zero_by_zero
+reduce_min_keep_3d_eliminate_zero_dim
+reduce_mean_to_scalar_int8
+reduce_mean_matrix_rows_int32
+reduce_mean_keep_to_scalar_int8
+reduce_mean_keep_matrix_rows_int32
+reduce_max_to_scalar_int8
+reduce_max_matrix_rows_zero
+reduce_max_matrix_rows_zero_int32
+reduce_max_matrix_cols_zero
+reduce_max_vector_zero
+reduce_max_matrix_to_scalar_zero_by_zero
+reduce_max_3d_to_scalar_double
+reduce_max_3d_eliminate_zero_dim
+reduce_max_keep_to_scalar_int8
+reduce_max_keep_matrix_rows_zero
+reduce_max_keep_matrix_rows_zero_int32
+reduce_max_keep_matrix_cols_zero
+reduce_max_keep_vector_zero
+reduce_max_keep_matrix_to_scalar_zero_by_zero
+reduce_max_keep_3d_to_scalar_double
+reduce_max_keep_3d_eliminate_zero_dim
# Incorrect precision f64!
sum_trivial_in_double
shape_of_5d_v0
shape_of_5d_v3
-# Need use evaluate, only applicable to INTERPRETER
+# Need use evaluate, only applicable to INTERPRETER
non_zero
non_zero_all_1s
non_zero_all_0s
# Unsupported op detected
IE_CPU.backwards_batchmatmultranspose_tensor2_tensor2
IE_CPU.fuse_batch_mat_mul_transpose_forward
+IE_CPU.round_int64
+
+# Can't convert type f16 to IE Precision!
+IE_CPU.fused_clamp_float16
+
+# [NOT_IMPLEMENTED] Input image format BF16 is not supported yet...
+IE_CPU.fused_clamp_bfloat16
+
+# Operations were removed from opset
+IE_CPU.atanh
+IE_CPU.asinh
+IE_CPU.acosh
+
+# Dynamic backend wrapper stops being used for IE
+IE_CPU.onnx_dyn_shapes_model_acosh_1_3
+IE_CPU.onnx_dyn_shapes_model_acosh_3_2
+IE_CPU.onnx_dyn_shapes_model_asinh_1_3
+IE_CPU.onnx_dyn_shapes_model_asinh_3_2
+IE_CPU.onnx_dyn_shapes_model_atanh_1_3
+IE_CPU.onnx_dyn_shapes_model_atanh_3_2
+IE_CPU.onnx_dyn_shapes_avg_pool_dyn_shape
+IE_CPU.onnx_dyn_shapes_max_pool_dyn_shape
+IE_CPU.onnx_dyn_shapes_global_avg_pool_dyn_shape
+IE_CPU.onnx_dyn_shapes_global_max_pool_dyn_shape
+IE_CPU.onnx_dyn_shapes_model_flatten
+IE_CPU.onnx_dyn_shapes_slice_10_default_axes
+IE_CPU.fused_clamp_float
#-------------------------------------------------------------------------------
#
install(TARGETS interpreter_backend
LIBRARY DESTINATION "${NGRAPH_INSTALL_LIB}"
ARCHIVE DESTINATION "${NGRAPH_INSTALL_LIB}"
+ RUNTIME DESTINATION "${NGRAPH_INSTALL_LIB}"
)
endif()
reduce_sum_large_1d_to_scalar
reduce_sum_keep_large_1d_to_scalar
-
-#ONNX Flatten with dynamic reshape
-onnx_dyn_shapes_flatten_axis
-onnx_dyn_shapes_flatten_neg_axis
#include "nlohmann/json.hpp"
#include "util/all_close_f.hpp"
#include "util/test_tools.hpp"
+#include "util/visitor.hpp"
using namespace std;
using namespace ngraph;
auto f = make_shared<Function>(results, ParameterVector{X, Hinit, WH, WX, bH, WY, bY});
string s = serialize(f);
shared_ptr<Function> g = deserialize(s);
+
+ ngraph::test::NodeBuilder builder;
+ // Uncomment to see serialization
+ // builder.set_print(true);
+ builder.save_node(tensor_iterator);
+ auto g_tensor_iterator = as_type_ptr<op::v0::TensorIterator>(builder.create());
+ ASSERT_TRUE(g_tensor_iterator);
+ auto& inputs = tensor_iterator->get_input_descriptions();
+ auto& g_inputs = g_tensor_iterator->get_input_descriptions();
+ ASSERT_EQ(inputs.size(), g_inputs.size());
+ for (size_t i = 0; i < tensor_iterator->get_input_descriptions().size(); ++i)
+ {
+ auto& val = inputs[i];
+ auto& g_val = g_inputs[i];
+ ASSERT_EQ(val->get_type_info(), g_val->get_type_info());
+ ASSERT_EQ(val->m_input_index, g_val->m_input_index);
+ ASSERT_EQ(val->m_body_parameter_index, g_val->m_body_parameter_index);
+ }
+ auto& outputs = tensor_iterator->get_output_descriptions();
+ auto& g_outputs = g_tensor_iterator->get_output_descriptions();
+ ASSERT_EQ(outputs.size(), g_outputs.size());
+ for (size_t i = 0; i < tensor_iterator->get_output_descriptions().size(); ++i)
+ {
+ auto& val = outputs[i];
+ auto& g_val = g_outputs[i];
+ ASSERT_EQ(val->get_type_info(), g_val->get_type_info());
+ }
}
TEST(serialize, tensor_iterator_lstm)
FAIL() << "Deduced type check failed for unexpected reason";
}
}
+
+TEST(type_prop, broadcast_v3_output_rank_not_deduced)
+{
+ const auto arg = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+ const auto shape = make_shared<op::Parameter>(element::i64, PartialShape::dynamic(1));
+ const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL;
+
+ const auto broadcast_v3 = make_shared<op::v3::Broadcast>(arg, shape, broadcast_spec);
+
+ ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic()));
+}
+
+TEST(type_prop, broadcast_v3_output_rank_deduced_from_arg)
+{
+ const auto arg = make_shared<op::Parameter>(element::f32, PartialShape::dynamic(4));
+ const auto shape = op::Constant::create(element::i64, {3}, {8, 6, 4});
+ const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL;
+
+ const auto broadcast_v3 = make_shared<op::v3::Broadcast>(arg, shape, broadcast_spec);
+ ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(4)));
+}
+
+TEST(type_prop, broadcast_v3_output_rank_deduced_from_new_shape_input)
+{
+ const auto arg = make_shared<op::Parameter>(element::f32, PartialShape::dynamic(4));
+ const auto shape = op::Constant::create(element::i64, {5}, {8, 6, 1, 5, 1});
+ const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL;
+
+ const auto broadcast_v3 = make_shared<op::v3::Broadcast>(arg, shape, broadcast_spec);
+ ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(5)));
+}
test_tools.cpp
test_control.cpp
test_case.cpp
+ visitor.hpp
provenance_enabler.hpp
)
const std::string& backend_name,
const BackendMode mode)
: m_function(function)
- , m_backend(ngraph::runtime::Backend::create(backend_name, mode == BackendMode::DYNAMIC))
{
if (mode == BackendMode::STATIC)
{
NGRAPH_CHECK(!m_function->is_dynamic(),
"For dynamic function using dynamic backend is expected.");
}
+
+ // IE backend test should not be run with dynamic backend wrapper
+ const bool use_dynamic =
+ mode == BackendMode::DYNAMIC && backend_name.find("IE") == std::string::npos;
+
+ m_backend = ngraph::runtime::Backend::create(backend_name, use_dynamic);
m_executable = m_backend->compile(m_function);
for (auto i = 0; i < m_function->get_output_size(); ++i)
{
const auto& output_tensor =
- (mode == BackendMode::DYNAMIC)
+ (use_dynamic)
? m_backend->create_dynamic_tensor(m_function->get_output_element_type(i),
m_function->get_output_partial_shape(i))
: m_backend->create_tensor(m_function->get_output_element_type(i),
--- /dev/null
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "ngraph/attribute_visitor.hpp"
+#include "ngraph/factory.hpp"
+#include "ngraph/runtime/host_tensor.hpp"
+
+namespace ngraph
+{
+ namespace test
+ {
+ class ValueHolder
+ {
+ template <typename T>
+ T& invalid()
+ {
+ NGRAPH_CHECK(false, "Invalid type access");
+ }
+
+ public:
+ virtual ~ValueHolder() {}
+ virtual operator bool&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator float&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator double&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::string&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator int8_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator int16_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator int32_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator int64_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator uint8_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator uint16_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator uint32_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator uint64_t&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::vector<std::string>&()
+ {
+ NGRAPH_CHECK(false, "Invalid type access");
+ }
+ virtual operator std::vector<float>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::vector<double>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::vector<int8_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::vector<int16_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::vector<int32_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::vector<int64_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::vector<uint8_t>&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ virtual operator std::vector<uint16_t>&()
+ {
+ NGRAPH_CHECK(false, "Invalid type access");
+ }
+ virtual operator std::vector<uint32_t>&()
+ {
+ NGRAPH_CHECK(false, "Invalid type access");
+ }
+ virtual operator std::vector<uint64_t>&()
+ {
+ NGRAPH_CHECK(false, "Invalid type access");
+ }
+ virtual operator HostTensorPtr&() { NGRAPH_CHECK(false, "Invalid type access"); }
+ uint64_t get_index() { return m_index; }
+ protected:
+ uint64_t m_index{0};
+ };
+
+ template <typename T>
+ class ValueHolderImp : public ValueHolder
+ {
+ public:
+ ValueHolderImp(const T& value, uint64_t index)
+ : m_value(value)
+ {
+ m_index = index;
+ }
+ operator T&() override { return m_value; }
+ protected:
+ T m_value;
+ };
+
+ class ValueMap
+ {
+ using map_type = std::unordered_map<std::string, std::shared_ptr<ValueHolder>>;
+
+ public:
+ /// \brief Set to print serialization information
+ void set_print(bool value) { m_print = value; }
+ template <typename T>
+ void insert(const std::string& name, const T& value)
+ {
+ std::pair<map_type::iterator, bool> result = m_values.insert(map_type::value_type(
+ name, std::make_shared<ValueHolderImp<T>>(value, m_write_count++)));
+ NGRAPH_CHECK(result.second, name, " is already in use");
+ }
+ template <typename T>
+ void insert_scalar(const std::string& name, const T& value)
+ {
+ std::pair<map_type::iterator, bool> result = m_values.insert(map_type::value_type(
+ name, std::make_shared<ValueHolderImp<T>>(value, m_write_count++)));
+ NGRAPH_CHECK(result.second, name, " is already in use");
+ if (m_print)
+ {
+ std::cerr << "SER: " << name << " = " << value << std::endl;
+ }
+ }
+ template <typename T>
+ void insert_vector(const std::string& name, const T& value)
+ {
+ std::pair<map_type::iterator, bool> result = m_values.insert(map_type::value_type(
+ name, std::make_shared<ValueHolderImp<T>>(value, m_write_count++)));
+ NGRAPH_CHECK(result.second, name, " is already in use");
+ if (m_print)
+ {
+ std::cerr << "SER: " << name << " = [";
+ std::string comma = "";
+ for (auto val : value)
+ {
+ std::cerr << comma << val;
+ comma = ", ";
+ }
+ std::cerr << "]" << std::endl;
+ }
+ }
+ template <typename T>
+ T& get(const std::string& name)
+ {
+ auto& value_holder = *m_values.at(name);
+ NGRAPH_CHECK(m_read_count++ == value_holder.get_index());
+ return static_cast<T&>(*m_values.at(name));
+ }
+
+ protected:
+ map_type m_values;
+ uint64_t m_write_count{0};
+ uint64_t m_read_count{0};
+ bool m_print{false};
+ };
+
+ class DeserializeAttributeVisitor : public AttributeVisitor
+ {
+ public:
+ DeserializeAttributeVisitor(ValueMap& value_map)
+ : m_values(value_map)
+ {
+ }
+ void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
+ {
+ NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be unmarshalled");
+ }
+ // The remaining adapter methods fall back on the void adapter if not implemented
+ void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
+ {
+ adapter.set(m_values.get<std::string>(name));
+ };
+ void on_adapter(const std::string& name, ValueAccessor<bool>& adapter) override
+ {
+ adapter.set(m_values.get<bool>(name));
+ };
+ void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter) override
+ {
+ adapter.set(m_values.get<int64_t>(name));
+ }
+ void on_adapter(const std::string& name, ValueAccessor<double>& adapter) override
+ {
+ adapter.set(m_values.get<double>(name));
+ }
+
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<int8_t>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<int8_t>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<int16_t>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<int16_t>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<int32_t>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<int32_t>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<int64_t>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<int64_t>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<uint8_t>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<uint8_t>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<uint16_t>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<uint16_t>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<uint32_t>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<uint32_t>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<uint64_t>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<uint64_t>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<std::string>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<std::string>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<float>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<float>>(name));
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<double>>& adapter) override
+ {
+ adapter.set(m_values.get<std::vector<double>>(name));
+ }
+ void on_adapter(const std::string& name, ValueAccessor<void*>& adapter) override
+ {
+ HostTensorPtr& data = m_values.get<HostTensorPtr>(name);
+ data->read(adapter.get_ptr(), adapter.size());
+ }
+
+ protected:
+ ValueMap& m_values;
+ };
+
+ class SerializeAttributeVisitor : public AttributeVisitor
+ {
+ public:
+ SerializeAttributeVisitor(ValueMap& value_map)
+ : m_values(value_map)
+ {
+ }
+
+ void on_adapter(const std::string& name, ValueAccessor<void>& adapter) override
+ {
+ NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be marshalled");
+ }
+ // The remaining adapter methods fall back on the void adapter if not implemented
+ void on_adapter(const std::string& name, ValueAccessor<std::string>& adapter) override
+ {
+ m_values.insert_scalar(name, adapter.get());
+ };
+ void on_adapter(const std::string& name, ValueAccessor<bool>& adapter) override
+ {
+ m_values.insert_scalar(name, adapter.get());
+ };
+
+ void on_adapter(const std::string& name, ValueAccessor<int64_t>& adapter) override
+ {
+ m_values.insert_scalar(name, adapter.get());
+ }
+ void on_adapter(const std::string& name, ValueAccessor<double>& adapter) override
+ {
+ m_values.insert_scalar(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<std::string>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<float>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<double>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<int8_t>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<int16_t>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<int32_t>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<int64_t>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<uint8_t>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<uint16_t>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<uint32_t>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name,
+ ValueAccessor<std::vector<uint64_t>>& adapter) override
+ {
+ m_values.insert_vector(name, adapter.get());
+ }
+ void on_adapter(const std::string& name, ValueAccessor<void*>& adapter) override
+ {
+ HostTensorPtr data =
+ std::make_shared<HostTensor>(element::u8, Shape{adapter.size()});
+ data->write(adapter.get_ptr(), adapter.size());
+ m_values.insert(name, data);
+ }
+
+ protected:
+ ValueMap& m_values;
+ };
+
+ class NodeBuilder : public ValueMap, public DeserializeAttributeVisitor
+ {
+ public:
+ NodeBuilder()
+ : DeserializeAttributeVisitor(static_cast<ValueMap&>(*this))
+ , m_serializer(*this)
+ {
+ }
+
+ NodeBuilder(const std::shared_ptr<Node>& node)
+ : DeserializeAttributeVisitor(static_cast<ValueMap&>(*this))
+ , m_serializer(*this)
+ {
+ save_node(node);
+ }
+
+ void save_node(std::shared_ptr<Node> node)
+ {
+ m_node_type_info = node->get_type_info();
+ node->visit_attributes(m_serializer);
+ }
+
+ // Does not validate, since inputs aren't set
+ std::shared_ptr<Node> create()
+ {
+ std::shared_ptr<Node> node(FactoryRegistry<Node>::get().create(m_node_type_info));
+ node->visit_attributes(*this);
+ return node;
+ }
+ AttributeVisitor& get_node_saver() { return m_serializer; }
+ AttributeVisitor& get_node_loader() { return *this; }
+ protected:
+ Node::type_info_t m_node_type_info;
+ SerializeAttributeVisitor m_serializer;
+ };
+ }
+}
# Name of virtualenv created by stress_tests/scripts/get_testdata.py
-.stress_venv
+.stress_venv
\ No newline at end of file
return pmc.WorkingSetSize;
}
#else
-size_t getVirtualMemoryInKB(char *name){
+size_t getSystemDataByName(char *name){
FILE* file = fopen("/proc/self/status", "r");
size_t result = 0;
if (file != nullptr) {
return result;
}
-size_t getVmSizeInKB() {return getVirtualMemoryInKB((char*) "VmSize:");}
-size_t getVmPeakInKB() {return getVirtualMemoryInKB((char*) "VmPeak:");}
-size_t getVmRSSInKB() {return getVirtualMemoryInKB((char*) "VmRSS:");}
-size_t getVmHWMInKB() {return getVirtualMemoryInKB((char*) "VmHWM:");}
+size_t getVmSizeInKB() {return getSystemDataByName((char*) "VmSize:");}
+size_t getVmPeakInKB() {return getSystemDataByName((char*) "VmPeak:");}
+size_t getVmRSSInKB() {return getSystemDataByName((char*) "VmRSS:");}
+size_t getVmHWMInKB() {return getSystemDataByName((char*) "VmHWM:");}
+size_t getThreadsNum() {return getSystemDataByName((char*) "Threads:");}
#endif
size_t getVmPeakInKB();
size_t getVmRSSInKB();
size_t getVmHWMInKB();
+size_t getThreadsNum();
template<typename Function, typename ... Args>
int run_in_processes(const int &numprocesses, Function const &function, Args ... args) {
#define THRESHOLD 0.1
// Measure values
-enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, MeasureValueMax };
+enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, THREADS, MeasureValueMax };
namespace util {
template <typename In, typename Out, typename Func>
past.resize(std::min(n / 2, MAX_AVERAGE));
log_info("Warming up for " << WARMUP_STEPS << " iterations");
- log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK");
+ log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK\tTHREADS");
int measure_count = n;
for (size_t iteration = 0; measure_count > 0; iteration++) {
// Warm up to take reference values
test_pipeline();
getVmValues(cur[VMSIZE], cur[VMPEAK], cur[VMRSS], cur[VMHWM]);
+ cur[THREADS] = getThreadsNum();
past[iteration % past.size()] = cur;
progress_str = std::to_string(iteration + 1) + "\t" + std::to_string(cur[VMRSS]) + "\t" +
std::to_string(cur[VMHWM]) + "\t" + std::to_string(cur[VMSIZE]) + "\t" +
- std::to_string(cur[VMPEAK]);
+ std::to_string(cur[VMPEAK]) + "\t" + std::to_string(cur[THREADS]);
// measure
if (iteration >= WARMUP_STEPS) {
--- /dev/null
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ {% block head %}
+ <title>{% block title %}{% endblock %}</title>
+ {% endblock %}
+</head>
+<body>
+ <div id="content">{% block content %}{% endblock %}</div>
+ <div id="footer">
+ {% block footer %}
+ {% endblock %}
+ </div>
+</body>
+</html>
--- /dev/null
+{% extends "base.html" %}
+{% block title %}Memcheck report{% endblock %}
+{% block head %}
+ {{ super() }}
+<link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
+<script src="https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.13.0/moment.min.js"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.9.3/Chart.js"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/chartjs-plugin-annotation/0.5.7/chartjs-plugin-annotation.min.js"></script>
+{% endblock %}
+{% block content %}
+<div class="w3-container">
+<h2>Memcheck report</h2>
+ {% for timeline in timelines %}
+ <button onclick="show('{{timeline['_id'][0]|e}}-details')"
+ class="w3-button w3-block w3-border w3-left-align">
+ <h4> {{ timeline['device'][0]|e }} {{ timeline['model'][0]|e }} {{ timeline['test_name'][0]|e }} </h4>
+ <div class="w3-text-green">
+ PASS
+ </div>
+
+ <div id="{{timeline['_id'][0]|e}}-details" class="w3-hide w3-container">
+
+ <div class="w3-container">
+ Test timeline:
+ <canvas id="{{timeline['_id'][0]|e}}-chart" height="100"></canvas>
+ <script>
+ var ctx = document.getElementById("{{timeline['_id'][0]|e}}-chart").getContext('2d');
+ var myChart = new Chart(ctx, {
+ type: 'line',
+ data: {
+ labels: [
+{% for point in timeline['commit_date'] %} "{{point}}", {% endfor %}
+ ],
+ datasets: [{
+ label: 'vmrss',
+ borderColor: 'red',
+ lineTension: 0,
+ data: [
+{% for point in timeline['metrics']['vmrss'] %} {{point}}, {% endfor %}
+ ],
+ },
+ {
+ label: 'vmhwm',
+ borderColor: 'blue',
+ lineTension: 0,
+ data: [
+{% for point in timeline['metrics']['vmhwm'] %} {{point}}, {% endfor %}
+ ],
+ }
+ ]
+ },
+ options: {
+ annotation: {
+ annotations: [{
+ type: 'line',
+ mode: 'horizontal',
+ scaleID: 'y-axis-0',
+ value: '{{ timeline['ref_metrics']['vmrss'][-1]|e }}',
+ borderColor: 'black',
+ borderWidth: 3,
+ label: {
+ backgroundColor: "red",
+ content: "vmrss waterline",
+ enabled: true,
+ },
+
+ },
+ {
+ type: 'line',
+ mode: 'horizontal',
+ scaleID: 'y-axis-0',
+ value: '{{ timeline['ref_metrics']['vmhwm'][-1]|e }}',
+ borderColor: 'black',
+ borderWidth: 3,
+ label: {
+ backgroundColor: "blue",
+ content: "vmhwm waterline",
+ enabled: true,
+ },
+
+ }],
+ drawTime: "afterDraw" // (default)
+ },
+ scales: {
+ xAxes: [{
+ type: 'time',
+ distribution: 'series',
+ time: {
+ format: 'YYYY-MM-DD hh:mm:ss',
+ displayFormats: {
+ 'millisecond': 'MMM DD hh:mm',
+ 'second': 'MMM DD hh:mm',
+ 'minute': 'MMM DD hh:mm',
+ 'hour': 'MMM DD hh:mm',
+ 'day': 'MMM DD hh:mm',
+ 'week': 'MMM DD hh:mm',
+ 'month': 'MMM DD hh:mm',
+ 'quarter': 'MMM DD hh:mm',
+ 'year': 'MMM DD hh:mm',
+ },
+
+ },
+ scaleLabel: {
+ display: true,
+ labelString: 'Commit Time'
+ }
+ }]
+ }
+ }
+ });
+ </script>
+ </div>
+ </div>
+
+ </button>
+ {% endfor %}
+</div>
+<script>
+function show(id) {
+ var x = document.getElementById(id);
+ if (x.className.indexOf("w3-show") == -1) {
+ x.className += " w3-show";
+ } else {
+ x.className = x.className.replace(" w3-show", "");
+ }
+}
+</script>
+{% endblock %}
import re
import sys
import argparse
+from inspect import getsourcefile
from glob import glob
import xml.etree.ElementTree as ET
import hashlib
KEY_FIELDS = ('test_name', 'model', 'device', 'build_url')
-def globber(paths):
- """Generator extending paths with wildcards"""
- for path in paths:
- if any(magic in path for magic in ['*', '?', '!', '[', ']']):
- for resolved in glob(path, recursive=True):
- yield resolved
- else:
- yield path
+def abs_path(relative_path):
+ """Return absolute path given path relative to the current file.
+ """
+ return os.path.realpath(
+ os.path.join(os.path.dirname(getsourcefile(lambda: 0)), relative_path))
def parse_memcheck_log(log_path):
collection.replace_one({'_id': record['_id']}, record, upsert=True)
+def _transpose_dicts(items, template=None):
+ """ Build dictionary of arrays from array of dictionaries
+ Example:
+ > in = [{'a':1, 'b':3}, {'a':2}]
+ > _transpose_dicts(in, template=in[0])
+ {'a':[1,2], 'b':[3, None]}
+ """
+ result = {}
+ if not items:
+ return result
+ if not template:
+ template = items[0]
+ for key, template_val in template.items():
+ if isinstance(template_val, dict):
+ result[key] = _transpose_dicts(
+ [item[key] for item in items if key in item], template_val)
+ else:
+ result[key] = [item.get(key, None) for item in items]
+ return result
+
+
+TIMELINE_SIMILARITY = ('test_name', 'model', 'device', 'target_branch')
+
+
+def query_timeline(records, db_url, db_collection, max_items=20, similarity=TIMELINE_SIMILARITY):
+ """ Query database for similar memcheck items committed previously
+ """
+ client = MongoClient(db_url)
+ collection = client[DATABASE][db_collection]
+ result = []
+ for record in records:
+ query = dict((key, record[key]) for key in similarity)
+ query['commit_date'] = {'$lt': record['commit_date']}
+ pipeline = [
+ {'$match': query},
+ {'$addFields': {'commit_date': {'$dateFromString': {'dateString': '$commit_date'}}}},
+ {'$sort': {'commit_date': -1}},
+ {'$limit': max_items},
+ {'$sort': {'commit_date': 1}},
+ ]
+ items = list(collection.aggregate(pipeline)) + [record]
+ timeline = _transpose_dicts(items, template=record)
+ result += [timeline]
+ return result
+
+
+def create_memcheck_report(records, db_url, db_collection, output_path):
+ """ Create memcheck timeline HTML report for records.
+ """
+ if db_collection == 'pre_commit':
+ db_collection = 'commit' # pre-commit jobs building report from past commits
+ records.sort(
+ key=lambda item: f"{item['status']}{item['device']}{item['model']}{item['test_name']}")
+ timelines = query_timeline(records, db_url, db_collection)
+ import jinja2 # pylint: disable=import-outside-toplevel
+ env = jinja2.Environment(
+ loader=jinja2.FileSystemLoader(
+ searchpath=os.path.join(abs_path('.'), 'memcheck-template')),
+ autoescape=False)
+ template = env.get_template('timeline_report.html')
+ template.stream(records=records, timelines=timelines).dump(output_path)
+
+
+def globber(paths):
+ """Generator extending paths with wildcards"""
+ for path in paths:
+ if any(magic in path for magic in ['*', '?', '!', '[', ']']):
+ for resolved in glob(path, recursive=True):
+ yield resolved
+ else:
+ yield path
+
+
def main():
"""Main entry point.
"""
-pymongo
\ No newline at end of file
+pymongo
+Jinja2
\ No newline at end of file