InputsDataMap inputs = network.getInputsInfo();
OutputsDataMap outputs = network.getOutputsInfo();
for (auto iter : sortedLayers) {
- if (_skipmarking.find(iter->type) != _skipmarking.end()) {
- continue;
+ // check, if memory output node needs to be transformed
+ if (iter->type == "Memory" && iter->outData.size() == 0 &&
+ iter->insData[0].lock()->getPrecision() == Precision::FP32) {
+ auto curPrec = iter->insData[0].lock()->getPrecision();
+ iter->insData[0].lock()->setPrecision(Precision::BF16);
}
for (size_t o = 0; o < iter->outData.size(); o++) {
if (inputs.find(iter->outData[o]->getName()) == inputs.end()
}
}
}
-
// convert all edges back to FP32 on demand
optimizeToFloat(network);
}
toAnalyzeTensors.insert(output.second);
}
}
-
// 2b. go over all unknown layers for this algo and mark them as fp32 and add to the toAnalyzeTensors
// 2c. go over all inputs to _initbf16 and if they are fp32 - add them to the toAnalyzeTensors
for (auto iter : sortedLayers) {
- if (_skipmarking.find(iter->type) != _skipmarking.end()) {
- continue;
- }
if (_initbf16.find(iter->type) == _initbf16.end()
&& _complementbf16.find(iter->type) == _complementbf16.end()
&& _multiinput.find(iter->type) == _multiinput.end()) {
}
}
}
-
// 3 - while toAnalyzeTensors is not empty look at the layers dealing with tensors mentioned in toAnalyzeTensors
while (!toAnalyzeTensors.empty()) {
DataPtr tensor = *toAnalyzeTensors.begin();
if (_initbf16.find(layer->type) == _initbf16.end()) {
// for all inputs investigate and modify tensor precision if required
for (size_t i = 0; i < layer->insData.size(); i++) {
+ auto creator = getCreatorLayer(layer->insData[i].lock());
+ if (_skipmarking.find(creator.lock()->type) != _skipmarking.end()) {
+ continue;
+ }
bool marked = tryToMarkFP32(layer->insData[i].lock(), immutable);
if (marked) {
toAnalyzeTensors.insert(layer->insData[i].lock());
for (auto inputTo : getInputTo(tensor)) {
for (size_t o = 0; o < inputTo.second->outData.size(); o++) {
if (inputTo.second->outData[o]->getTensorDesc().getPrecision() == Precision::BF16) {
+ // if some layer (e.g. memory) consumes tensor, but must be fitted with another layer (e.g. memory output)
+ // in the net, whe must prevent this tensor to be fp32 - marked
+ bool notToMarkFP32 = false;
+ for (auto consumer : getInputTo(inputTo.second->outData[o])) {
+ if (_skipmarking.find(consumer.second->type) !=
+ _skipmarking.end()) {
+ notToMarkFP32 = true;
+ }
+ }
+ if (notToMarkFP32) {
+ continue;
+ }
bool marked = tryToMarkFP32(inputTo.second->outData[o], immutable);
if (marked) {
toAnalyzeTensors.insert(layer->outData[o]);
{ "convolution", "fullyconnected", "innerproduct" };
const InferenceEngine::details::caseless_set<std::string> _complementbf16 =
{ "relu", "tanh", "elu", "square", "abs", "sqrt", "linear", "bounded_relu", "soft_relu", "logistic",
- "exp", "gelu", "clamp", "swish", "prelu", "pooling", "norm", "gather" };
+ "exp", "gelu", "clamp", "swish", "prelu", "pooling", "norm", "gather", "memory" };
const InferenceEngine::details::caseless_set<std::string> _multiinput =
{ "concat", "eltwise" };
+ // prevent fallback to fp32 without considering both input and output nodes
const InferenceEngine::details::caseless_set<std::string> _skipmarking =
- { "const" };
+ { "memory" };
/**
* Tries to mark tensor as FP32 by analyzing of local consumers of the tensor. Do not mark if
#include "mkldnn_infer_request.h"
#include "mkldnn_memory_state.h"
#include "mkldnn_itt.h"
+#include "nodes/mkldnn_memory_node.hpp"
#include "bf16transformer.h"
#include <legacy/ie_util_internal.hpp>
#include <legacy/graph_tools.hpp>
if (_graphs.size() == 1) {
for (auto &node : _graphs.begin()->get()->GetNodes()) {
if (node->getType() == MemoryInput) {
- auto state_store = node->getChildEdgeAt(0)->getMemoryPtr();
+ auto memoryNode = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
+ auto state_store = memoryNode->getStore();
auto state_name = node->getName();
// Remove suffix with pair ID. Internal information.
isConst |= isConstOutput(edge);
isOutput |= edge->getChild()->getType() == Output;
isInput |= edge->getParent()->getType() == Input;
-
- // WA. MemoryOutput will keep data in that edge
- // So need to make it immortal..
- isConst |= edge->getParent()->getType() == MemoryInput;
}
if (reuse_io_tensors) {
ngraph::ResultVector results;
ngraph::ParameterVector params;
+ ngraph::NodeVector to_hold;
auto get_inputs = [&] (const MKLDNNNodePtr & node) {
auto pr_edges = node->getParentEdges();
};
auto create_ngraph_node = [&](const MKLDNNNodePtr &node) {
- bool is_input = false, is_output = false;
+ bool is_input = false, is_output = false, should_be_hold = false;
for (auto && kvp : graph.inputNodes) {
if (kvp.second == node) {
is_input = true;
}
}
+ if (!is_output && node->getChildEdges().empty()) {
+ // The node has no consumer and is not an output.
+ // Should be hold in other irregular way.
+ should_be_hold = true;
+ }
+
auto meta_data = extract_node_metadata(node);
std::shared_ptr<ngraph::Node> return_node;
if (is_input) {
}
}
+ if (should_be_hold) {
+ to_hold.push_back(return_node);
+ }
+
for (auto && kvp : meta_data)
return_node->get_rt_info()[kvp.first] = std::make_shared<::ngraph::VariantWrapper<std::string>>(kvp.second);
return_node->set_friendly_name(node->getName());
node2layer[node] = nodes.back();
}
+ auto holder = results[0];
+ for (auto &node : to_hold) {
+ holder->add_control_dependency(node);
+ }
+
ngraph::op::GenericIE::DisableReshape reshape(nodes);
auto function = std::make_shared<ngraph::Function>(results, params, graph._name);
InferenceEngine::CNNNetwork net(function);
} else {
str_type += "_I8";
}
+ } else {
+ if (selectedPrimitiveDesc->getConfig().outConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) {
+ str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].desc.getPrecision().name());
+ } else {
+ str_type += "_I8";
+ }
}
}
return;
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
- if (precision != InferenceEngine::Precision::FP32)
- precision = InferenceEngine::Precision::FP32;
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
InferenceEngine::LayerConfig config;
config.dynBatchSupport = true;
config.inConfs.resize(1);
config.inConfs[0].inPlace = -1;
config.inConfs[0].constant = false;
- config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format::any);
+ config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims()));
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, memory::format::any);
}
-const MKLDNNEdgePtr MKLDNNMemoryOutputNode::getChildEdgeAt(size_t idx) const {
- if (inputNode != nullptr) {
- return inputNode->getChildEdgeAt(idx);
- }
- return MKLDNNNode::getChildEdgeAt(idx);
-}
-
void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm) {
auto& srcMemory = getParentEdgeAt(0)->getMemory();
- const float *src_ptr = reinterpret_cast<const float*>(srcMemory.GetData()) +
- srcMemory.GetDescriptor().data.layout_desc.blocking.offset_padding;
- float *dst_ptr = reinterpret_cast<float*>(getChildEdgeAt(0)->getMemory().GetData()) +
- getChildEdgeAt(0)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
-
- // TODO: this can be eliminated by completely removing MKLDNN memory output NODE, to fuse it with output of prev layer
- memcpy(dst_ptr, src_ptr, srcMemory.GetSize());
+ auto inputMemoryNode = dynamic_cast<MKLDNNMemoryInputNode*>(inputNode);
+ IE_ASSERT(inputMemoryNode != nullptr);
+ inputMemoryNode->storeState(srcMemory);
}
#if defined (COMPILED_CPU_MKLDNN_INPUT_NODE)
MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
- : MKLDNNInputNode(layer, eng, cache), MKLDNNMemoryNode(layer) {
+ : MKLDNNInputNode(layer, eng, cache), MKLDNNMemoryNode(layer), dataStore(new MKLDNNMemory{eng}) {
if (created()) {
holder = MKLDNNMemoryNodeVirtualEdge::registerInput(this);
}
}
+void MKLDNNMemoryInputNode::createPrimitive() {
+ MKLDNNInputNode::createPrimitive();
+
+ auto mem_desc = getChildEdgeAt(0)->getMemoryPtr()->GetDescriptor();
+ dataStore->Create(mem_desc);
+
+ // default memory state is zero filled
+ dataStore->FillZero();
+}
+
+/**
+ * Copy data from one tensor into other.
+ * As is. Assume that data is dense tensor with same layout.
+ * @param dst destination memory object
+ * @param src source memory object
+ */
+inline
+static void simple_copy(MKLDNNMemory& dst, const MKLDNNMemory& src) {
+ auto getDataWithOff = [] (const MKLDNNMemory& mem) {
+ auto elemSize = MKLDNNExtensionUtils::sizeOfDataType(mem.GetDataType());
+ return static_cast<uint8_t*>(mem.GetData()) +
+ mem.GetDescriptor().data.layout_desc.blocking.offset_padding * elemSize;
+ };
+
+ auto srcPtr = getDataWithOff(src);
+ auto dstPtr = getDataWithOff(dst);
+ auto srcSizeInByte = src.GetSize();
+ auto dstSizeInByte = dst.GetSize();
+
+ IE_ASSERT(srcSizeInByte == dstSizeInByte) << "Memory objects are not compatible. Has different sizes.";
+
+ memcpy(dstPtr, srcPtr, srcSizeInByte);
+}
+
MKLDNNMemoryInputNode::~MKLDNNMemoryInputNode() {
MKLDNNMemoryNodeVirtualEdge::remove(this, holder);
}
+MKLDNNMemoryPtr MKLDNNMemoryInputNode::getStore() {
+ return dataStore;
+}
+
+void MKLDNNMemoryInputNode::storeState(const MKLDNNMemory &new_state) {
+ // TODO: Should be next one call:
+ // dataStore.SetData(new_state, false);
+ // But because of performance reason we use simple manual copy
+ simple_copy(*dataStore, new_state);
+}
+
+void MKLDNNMemoryInputNode::execute(mkldnn::stream strm) {
+ auto dst_mem = getChildEdgeAt(0)->getMemory();
+ // TODO: Should be simple call of:
+ // dst_mem.SetData(dataStore, false);
+ // But because of performance reason we use simple manual copy
+ simple_copy(dst_mem, *dataStore);
+}
+
MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerInput(MKLDNNMemoryInputNode * node) {
std::lock_guard<std::mutex> lock{MKLDNNMemoryNodeVirtualEdge::holderMutex};
// in case of output already registered
~MKLDNNMemoryOutputNode() override;
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
- const MKLDNNEdgePtr getChildEdgeAt(size_t idx) const override;
void createPrimitive() override {}
void execute(mkldnn::stream strm) override;
bool created() const override {
void setInputNode(MKLDNNNode* node) override {
inputNode = node;
}
+
private:
/**
* @brief keeps reference to input sibling node
bool created() const override {
return getType() == MemoryInput;
}
+ void execute(mkldnn::stream strm) override;
+
+ void createPrimitive() override;
void setInputNode(MKLDNNNode* node) override {}
+ void storeState(const MKLDNNMemory& mem);
+ MKLDNNMemoryPtr getStore();
private:
+ MKLDNNMemoryPtr dataStore;
static Register<MKLDNNMemoryInputNode> reg;
MKLDNNMemoryNodeVirtualEdge::Holder* holder = nullptr;
};
ASSERT_FALSE(has_type<ngraph::element::Type_t::f16>(tensor_iterator->get_body()->to_function()));
ASSERT_FALSE(has_type<ngraph::element::Type_t::i64>(tensor_iterator->get_body()->to_function()));
}
+}
+
+TEST(TransformationTests, ConvertPrecision_Variables) {
+ std::shared_ptr<ngraph::Function> f(nullptr);
+ {
+ Shape shape {1, 10, 2};
+ auto inp = std::make_shared<opset4::Parameter>(element::f16, shape);
+ auto m_i = std::make_shared<opset4::Constant>(element::f16, shape, 1);
+ auto m_r = std::make_shared<opset4::ReadValue>(m_i, "ID");
+ auto sum = std::make_shared<opset4::Add>(inp, m_r);
+ auto m_w = std::make_shared<opset4::Assign>(sum, "ID");
+ auto mul = std::make_shared<opset4::Multiply>(inp, sum);
+
+ mul->add_control_dependency(m_w);
+
+ f = std::make_shared<Function>(NodeVector{mul}, ParameterVector{inp});
+
+ pass::Manager manager;
+ manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32);
+ manager.run_passes(f);
+ }
+
+ ASSERT_FALSE(has_type<ngraph::element::Type_t::f16>(f));
}
\ No newline at end of file
if (!InferenceEngine::with_cpu_x86_bfloat16()) {
// on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
// tests are useless on such platforms
- return;
+ GTEST_SKIP();
}
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
InferenceEngine::CNNNetwork cnnNet(fnPtr);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <fstream>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ie_system_conf.h"
+
+#include <ngraph/ngraph.hpp>
+#include <ngraph_ops/fully_connected.hpp>
+
+namespace LayerTestsDefinitions {
+
+using InferenceEngine::Precision;
+using InferenceEngine::SizeVector;
+
+class MemoryConv : public testing::WithParamInterface<LayerTestsUtils::basicParams>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<LayerTestsUtils::basicParams> obj) {
+ Precision netPrecision;
+ SizeVector inputShapes, newInputShapes;
+ std::string targetDevice;
+ std::tie(netPrecision, inputShapes, targetDevice) = obj.param;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+ return result.str();
+ }
+
+protected:
+ void SetUp() {
+ SizeVector ie_shape;
+ std::tie(inPrc, ie_shape, targetDevice) = this->GetParam();
+
+ using namespace ngraph;
+ using std::make_shared;
+
+ Shape shape = ie_shape;
+ size_t C = shape[1];
+ element::Type type = ngraph::element::f32;
+
+ auto input = make_shared<op::v0::Parameter>(type, shape);
+ auto mem_i = make_shared<op::v0::Constant>(type, shape, 0);
+ auto mem_r = make_shared<op::v3::ReadValue>(mem_i, "id");
+
+ auto mul = make_shared<op::v0::Multiply>(mem_r, input);
+ auto sig = make_shared<op::v0::Sigmoid>(mul);
+
+ auto fc1_w = make_shared<op::v0::Constant>(type, Shape{C, C}, 1);
+ auto fc1_b = make_shared<op::v0::Constant>(type, Shape{C}, 1);
+ auto fc1 = make_shared<op::FullyConnected>(sig, fc1_w, fc1_b, shape);
+
+ auto fc2_w = make_shared<op::v0::Constant>(type, Shape{C, C}, 1);
+ auto fc2_b = make_shared<op::v0::Constant>(type, Shape{C}, 1);
+ auto fc2 = make_shared<op::FullyConnected>(fc1, fc2_w, fc2_b, shape);
+
+ auto mem_w = make_shared<op::v3::Assign>(fc1, "id");
+
+ // WA. Limitation of ngraph. control_dependency are required.
+ mem_w->add_control_dependency(mem_r);
+ fc2->add_control_dependency(mem_w);
+
+ function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector {fc2},
+ ngraph::ParameterVector {input},
+ "SimpleNet");
+ }
+};
+
+TEST_P(MemoryConv, CheckTypeConversion) {
+ if (!InferenceEngine::with_cpu_x86_bfloat16())
+ GTEST_SKIP();
+
+ auto ie = PluginCache::get().ie();
+ auto net = InferenceEngine::CNNNetwork(function);
+ auto exe_net = ie->LoadNetwork(net, "CPU");
+ auto inf_reg = exe_net.CreateInferRequest();
+
+ // check data type via exec graph
+ auto exec_graph = exe_net.GetExecGraphInfo();
+ auto exec_ops = exec_graph.getFunction()->get_ops();
+ std::shared_ptr<ngraph::Node> mem_r, mem_w;
+
+ for (auto &node : exec_ops) {
+ auto var = node->get_rt_info()["layerType"];
+ auto s_val = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(var);
+ if (s_val->get() == "MemoryOutput")
+ mem_w = node;
+ if (s_val->get() == "MemoryInput")
+ mem_r = node;
+ }
+
+ ASSERT_NE(nullptr, mem_r);
+ ASSERT_EQ(ngraph::element::bf16, mem_r->output(0).get_element_type());
+
+ ASSERT_NE(nullptr, mem_w);
+ ASSERT_EQ(ngraph::element::bf16, mem_w->input(0).get_element_type());
+}
+
+INSTANTIATE_TEST_CASE_P(CPU, MemoryConv,
+ ::testing::Combine(
+ ::testing::Values<Precision>(Precision::BF16, Precision::FP32),
+ ::testing::Values(SizeVector{1, 200}),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ MemoryConv::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "execution_graph_tests/keep_assing.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+INSTANTIATE_TEST_CASE_P(KeepAssign, ExecGraphKeepAssignNode,
+ ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ExecGraphKeepAssignNode::getTestCaseName);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/split_concat_memory.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::I32,
+ InferenceEngine::Precision::FP16,
+ InferenceEngine::Precision::I16,
+ InferenceEngine::Precision::U8,
+ InferenceEngine::Precision::I8,
+};
+
+const std::vector<InferenceEngine::SizeVector> shapes = {
+ {1, 8, 3, 2},
+ {3, 8, 3, 2},
+ {3, 8, 3},
+ {3, 8},
+};
+
+INSTANTIATE_TEST_CASE_P(CPU, SplitConcatMemory,
+ ::testing::Combine(
+ ::testing::ValuesIn(shapes),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(1),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ SplitConcatMemory::getTestCaseName);
+} // namespace
+
+
+
+
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gtest/gtest.h"
+
+namespace LayerTestsDefinitions {
+
+class ExecGraphKeepAssignNode : public testing::TestWithParam<std::string> {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<std::string> obj);
+};
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+namespace LayerTestsDefinitions {
+
+using SplitConcatMemoryParamsTuple = typename std::tuple<
+ std::vector<size_t>, // input shapes
+ InferenceEngine::Precision, // precision
+ int, // axis of split
+ std::string // device name
+>;
+
+
+class SplitConcatMemory : public testing::WithParamInterface<SplitConcatMemoryParamsTuple>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<ParamType> obj);
+
+protected:
+ void SetUp() override;
+
+ int axis;
+};
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "execution_graph_tests/keep_assing.hpp"
+
+#include <ngraph/ngraph.hpp>
+#include <inference_engine.hpp>
+
+namespace LayerTestsDefinitions {
+
+std::string ExecGraphKeepAssignNode::getTestCaseName(testing::TestParamInfo<std::string> obj) {
+ std::string targetDevice = obj.param;
+ return "Dev=" + targetDevice;
+}
+
+/**
+ * Assign/MemoryOutput operation node may hanging in air (leaf, has no consumer).
+ * So exec graph may lose it. Will check that it's present in dumped exec graph.
+ */
+TEST_P(ExecGraphKeepAssignNode, KeepAssignNode) {
+ auto device_name = this->GetParam();
+ ngraph::Shape shape = {3, 2};
+ ngraph::element::Type type = ngraph::element::f32;
+
+ using std::make_shared;
+ using namespace ngraph::op;
+
+ // Some simple graph with Memory(Assign) node // in read //
+ auto input = make_shared<Parameter>(type, shape); // | \ / //
+ auto mem_i = make_shared<Constant>(type, shape, 0); // | mul //
+ auto mem_r = make_shared<ReadValue>(mem_i, "id"); // | / \ //
+ auto mul = make_shared<Multiply>(mem_r, input); // sum assign //
+ auto mem_w = make_shared<Assign>(mul, "id"); // | //
+ auto sum = make_shared<Add>(mul, input); // out //
+
+ mem_w->add_control_dependency(mem_r);
+ sum->add_control_dependency(mem_w);
+
+ auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector {sum},
+ ngraph::ParameterVector {input},
+ "SimpleNet");
+
+ // Load into plugin and get exec graph
+ auto ie = InferenceEngine::Core();
+ auto net = InferenceEngine::CNNNetwork(function);
+ auto exec_net = ie.LoadNetwork(net, device_name);
+ auto exec_graph = exec_net.GetExecGraphInfo();
+ auto exec_ops = exec_graph.getFunction()->get_ops();
+
+ // Check Memory(Assign) node existence
+ bool assign_node_found;
+ for (auto &node : exec_ops) {
+ auto var = node->get_rt_info()["layerType"];
+ auto s_val = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(var);
+
+ if (s_val->get() == "MemoryOutput") {
+ assign_node_found = true;
+ break;
+ }
+ }
+ ASSERT_TRUE(assign_node_found);
+}
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "subgraph_tests/split_concat_memory.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+
+namespace LayerTestsDefinitions {
+
+using namespace CommonTestUtils;
+using namespace InferenceEngine;
+
+std::string SplitConcatMemory::getTestCaseName(testing::TestParamInfo<ParamType> obj) {
+ InferenceEngine::Precision netPrecision;
+ InferenceEngine::SizeVector inputShapes;
+ int axis;
+ std::string targetDevice;
+ std::tie(inputShapes, netPrecision, axis, targetDevice) = obj.param;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "PRC=" << netPrecision.name() << "_";
+ result << "axis=" << axis << "_";
+ result << "dev=" << targetDevice;
+ return result.str();
+}
+
+void SplitConcatMemory::SetUp() {
+ SizeVector shape;
+ std::tie(shape, inPrc, axis, targetDevice) = this->GetParam();
+
+ auto shape_14 = shape;
+ shape_14[axis] /= 4;
+ auto shape_34 = shape;
+ shape_34[axis] -= shape_14[axis];
+
+ /*
+ * Cyclic buffer length of 4
+ * ______ ______
+ * [_mem1_] [_inp1_]
+ * _|______|_
+ * [_cocncat__]
+ * _____|______
+ * __|____ ___|__
+ * [_plus1_] [_spl1_]
+ * | | |
+ * __|___ __|___
+ * [_out1_] [_mem2_]
+ */
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
+ ngraph::Shape ng_share_14(shape_14);
+ ngraph::Shape ng_share_34(shape_34);
+
+ auto input = std::make_shared<ngraph::op::Parameter>(ngPrc, ng_share_14);
+ input->set_friendly_name("input");
+
+ auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ng_share_34, 0);
+ auto mem_r = std::make_shared<ngraph::op::ReadValue>(mem_c, "id");
+ auto cnc = std::make_shared<ngraph::op::Concat>(ngraph::NodeVector{mem_r, input}, axis);
+
+ std::vector<int64_t> chunks_val {static_cast<int64_t>(ng_share_14[axis]), static_cast<int64_t>(ng_share_34[axis])};
+ auto chunk_c = std::make_shared<ngraph::op::Constant>(::ngraph::element::i64, ngraph::Shape{chunks_val.size()}, chunks_val);
+ auto axis_c = std::make_shared<ngraph::op::Constant>(::ngraph::element::i64, ngraph::Shape{}, axis);
+ auto spl = std::make_shared<ngraph::op::v1::VariadicSplit>(cnc, axis_c, chunk_c);
+
+ auto one = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{}, 1);
+ auto plus = std::make_shared<ngraph::op::Add>(cnc, one, ngraph::op::AutoBroadcastSpec::NUMPY);
+ plus->set_friendly_name("plus_one");
+
+ auto mem_w = std::make_shared<ngraph::op::Assign>(spl->output(1), "id");
+
+ // WA. Ngraph limitations. Assign should have control dependencies on read.
+ // And someone should hold assign node.
+ mem_w->add_control_dependency(mem_r);
+ plus->add_control_dependency(mem_w);
+
+ function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector {plus},
+ ngraph::ParameterVector {input},
+ "CyclicBuffer4");
+}
+
+TEST_P(SplitConcatMemory, cyclicBufferCorrectness) {
+ auto ie = PluginCache::get().ie();
+ cnnNetwork = InferenceEngine::CNNNetwork{function};
+
+ auto exe_net = ie->LoadNetwork(cnnNetwork, "CPU");
+ auto inf_reg = exe_net.CreateInferRequest();
+
+ /*
+ * cnc1 out | mem | In|q
+ * |===============|
+ * iter_1 | 0 | 0 | 0 | 1 |
+ * iter_2 | 0 | 0 | 1 | 2 |
+ * iter 3 | 0 | 1 | 2 | 3 |
+ */
+
+ auto i_blob = inf_reg.GetBlob("input");
+ auto o_blob = inf_reg.GetBlob("plus_one");
+
+ auto o_blob_ref = make_blob_with_precision(o_blob->getTensorDesc());
+ o_blob_ref->allocate();
+
+ auto fill_by_quarter = [this] (Blob::Ptr& blob, std::vector<float> vals) {
+ IE_ASSERT(vals.size() == 4);
+ auto quarter_blocked_shape = blob->getTensorDesc().getDims();
+
+ // splis axis dimension into chunk
+ IE_ASSERT(quarter_blocked_shape[axis] % vals.size() == 0);
+ quarter_blocked_shape[axis] /= vals.size();
+ quarter_blocked_shape.insert(quarter_blocked_shape.begin() + axis, vals.size());
+
+ auto quarter_blocked_view = make_reshape_view(blob, quarter_blocked_shape);
+ fill_data_with_broadcast(quarter_blocked_view, axis, vals);
+ };
+
+ // iteration 1
+ fill_data_const(i_blob, 1);
+ fill_by_quarter(o_blob_ref, {1, 1, 1, 2});
+ inf_reg.Infer();
+ Compare(o_blob_ref, o_blob);
+
+ // iteration 2
+ fill_data_const(i_blob, 2);
+ fill_by_quarter(o_blob_ref, {1, 1, 2, 3});
+ inf_reg.Infer();
+ Compare(o_blob_ref, o_blob);
+
+ // iteration 3
+ fill_data_const(i_blob, 3);
+ fill_by_quarter(o_blob_ref, {1, 2, 3, 4});
+ inf_reg.Infer();
+ Compare(o_blob_ref, o_blob);
+}
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2019-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+
+#include <debug.h> // to allow putting vector into exception string stream
+#include <details/ie_exception.hpp>
+
+#include <ie_blob.h>
+#include <blob_factory.hpp>
+
+namespace CommonTestUtils {
+
+bool isDenseBlob(const InferenceEngine::Blob::Ptr& blob) {
+ auto blk_desc = blob->getTensorDesc().getBlockingDesc();
+ auto dims = blk_desc.getBlockDims();
+ auto strs = blk_desc.getStrides();
+
+ IE_ASSERT(dims.size() == strs.size()) << " isDenseBlob: inconsistent tensor descriptor";
+
+ auto size = dims.size();
+ if (size == 0) return true;
+ if (size == 1) return strs[0] == 1;
+
+ for (auto i = size - 1; i > 0; i--) {
+ if (strs[i - 1] != strs[i - 1] * dims[i])
+ return false;
+ }
+
+ return true;
+}
+
+template<typename T>
+void copy_7D(void *src_raw_ptr, std::vector<size_t> &src_str, void *dst_raw_ptr, std::vector<size_t> &dst_str, std::vector<size_t> &dims) {
+ auto src_ptr = static_cast<T*>(src_raw_ptr);
+ auto dst_ptr = static_cast<T*>(dst_raw_ptr);
+
+ for (size_t d0 = 0; d0 < dims[0]; d0++) { auto src_ptr_0 = src_ptr + src_str[0]*d0; auto dst_ptr_0 = dst_ptr + dst_str[0]*d0;
+ for (size_t d1 = 0; d1 < dims[1]; d1++) { auto src_ptr_1 = src_ptr_0 + src_str[1]*d1; auto dst_ptr_1 = dst_ptr_0 + dst_str[1]*d1;
+ for (size_t d2 = 0; d2 < dims[2]; d2++) { auto src_ptr_2 = src_ptr_1 + src_str[2]*d2; auto dst_ptr_2 = dst_ptr_1 + dst_str[2]*d2;
+ for (size_t d3 = 0; d3 < dims[3]; d3++) { auto src_ptr_3 = src_ptr_2 + src_str[3]*d3; auto dst_ptr_3 = dst_ptr_2 + dst_str[3]*d3;
+ for (size_t d4 = 0; d4 < dims[4]; d4++) { auto src_ptr_4 = src_ptr_3 + src_str[4]*d4; auto dst_ptr_4 = dst_ptr_3 + dst_str[4]*d4;
+ for (size_t d5 = 0; d5 < dims[5]; d5++) { auto src_ptr_5 = src_ptr_4 + src_str[5]*d5; auto dst_ptr_5 = dst_ptr_4 + dst_str[5]*d5;
+ for (size_t d6 = 0; d6 < dims[6]; d6++) { auto src_ptr_6 = src_ptr_5 + src_str[6]*d6; auto dst_ptr_6 = dst_ptr_5 + dst_str[6]*d6;
+ *dst_ptr_6 = *src_ptr_6;
+ }}}}}}}
+}
+
+void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine::Blob::Ptr& values) {
+ using InferenceEngine::SizeVector;
+ constexpr size_t MAX_N_DIMS = 7; // Suppose it's enough
+
+ IE_ASSERT(blob->getTensorDesc().getPrecision() == values->getTensorDesc().getPrecision());
+
+ auto values_dims = values->getTensorDesc().getDims();
+ auto blob_dims = blob->getTensorDesc().getDims();
+ auto n_dims = blob_dims.size();
+ IE_ASSERT(values_dims.size() <= n_dims);
+ IE_ASSERT(n_dims <= MAX_N_DIMS);
+
+ SizeVector src_dims(MAX_N_DIMS, 1);
+ std::copy(values_dims.rbegin(), values_dims.rend(), src_dims.rbegin());
+
+ SizeVector dst_dims(MAX_N_DIMS, 1);
+ std::copy(blob_dims.rbegin(), blob_dims.rend(), dst_dims.rbegin());
+
+ bool compatible = true;
+ for (int i = 0; i < MAX_N_DIMS; i++) {
+ if (src_dims[i] != dst_dims[i] && src_dims[i] != 1)
+ compatible = false;
+ }
+ IE_ASSERT(compatible) << "fill_data_with_broadcast error: Tensor shape " << values_dims
+ << " can not be broadcasted to shape " << blob_dims;
+
+ auto fill_strides_like_plain = [] (SizeVector dims) {
+ SizeVector str(dims.size());
+ if (str.empty())
+ return str;
+ else
+ str.back() = 1;
+
+ // stride[i] = stride[i+1]*d[i+1]
+ std::transform(dims.rbegin(), dims.rend() - 1, str.rbegin(), str.rbegin() + 1,
+ [] (size_t d, size_t s) { return d * s; });
+
+ // zeroing broadcast dimension equal 1
+ std::transform(str.begin(), str.end(), dims.begin(), str.begin(),
+ [] (size_t s, size_t d) { return d == 1 ? 0 : s; });
+
+ return str;
+ };
+
+ SizeVector src_strides = fill_strides_like_plain(src_dims);
+ SizeVector dst_strides = fill_strides_like_plain(dst_dims);
+
+ auto get_data = [] (InferenceEngine::Blob::Ptr &blob) {
+ auto mem_blob = dynamic_cast<InferenceEngine::MemoryBlob*>(blob.get());
+ auto mem = mem_blob->rwmap();
+ return mem.as<float*>();
+ };
+
+ auto dst_ptr = get_data(blob);
+ auto src_ptr = get_data(values);
+
+ switch (blob->getTensorDesc().getPrecision()) {
+ case InferenceEngine::Precision::FP32:
+ case InferenceEngine::Precision::I32:
+ copy_7D<uint32_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
+ break;
+ case InferenceEngine::Precision::I16:
+ case InferenceEngine::Precision::U16:
+ case InferenceEngine::Precision::FP16:
+ case InferenceEngine::Precision::BF16:
+ copy_7D<uint16_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
+ break;
+ case InferenceEngine::Precision::U8:
+ case InferenceEngine::Precision::I8:
+ copy_7D<uint8_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
+ break;
+ default:
+ THROW_IE_EXCEPTION << "Unsupported precision by fill_data_with_broadcast function";
+ }
+}
+
+void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, size_t axis, std::vector<float> values) {
+ InferenceEngine::SizeVector value_dims(blob->getTensorDesc().getDims().size() - axis, 1);
+ value_dims.front() = values.size();
+ auto prc = blob->getTensorDesc().getPrecision();
+ auto layout = InferenceEngine::TensorDesc::getLayoutByDims(value_dims);
+ InferenceEngine::TensorDesc value_tdesc(prc, value_dims, layout);
+
+ auto values_blob = make_blob_with_precision(value_tdesc, values.data());
+ fill_data_with_broadcast(blob, values_blob);
+}
+
+InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &blob, InferenceEngine::SizeVector new_shape) {
+ using InferenceEngine::TensorDesc;
+ auto new_size = std::accumulate(new_shape.begin(), new_shape.end(), 1, std::multiplies<size_t>());
+ IE_ASSERT(new_size == blob->size());
+
+ auto orig_mem_blob = dynamic_cast<InferenceEngine::MemoryBlob*>(blob.get());
+ auto orig_mem = orig_mem_blob->rwmap();
+ auto orig_ptr = orig_mem.as<float*>();
+
+ auto new_tdesc = TensorDesc(blob->getTensorDesc().getPrecision(), new_shape, TensorDesc::getLayoutByDims(new_shape));
+ auto new_blob = make_blob_with_precision(new_tdesc, orig_ptr);
+ return new_blob;
+}
+
+/**
+ * repeated filling tensor with data.
+ *
+ * @tparam PRC
+ * @param data
+ * @param size
+ * @param values
+ */
+template<InferenceEngine::Precision::ePrecision PRC = InferenceEngine::Precision::FP32>
+static void fill_data_const(void *data, size_t size, const std::vector<float> &values) {
+ auto t_data = static_cast<typename InferenceEngine::PrecisionTrait<PRC>::value_type *>(data);
+ auto val_size = values.size();
+ for (size_t i = 0, j = 0; i < size; i++) {
+ t_data[i] = values[j++];
+ if (j == val_size) j = 0;
+ }
+}
+
+void fill_data_const(InferenceEngine::Blob::Ptr& blob, const std::vector<float> &val) {
+ auto prc = blob->getTensorDesc().getPrecision();
+ auto raw_data_ptr = blob->buffer().as<void*>();
+ auto raw_data_size = blob->size();
+
+ using InferenceEngine::Precision;
+ switch (prc) {
+ case Precision::FP32:
+ fill_data_const<Precision::FP32>(raw_data_ptr, raw_data_size, val);
+ break;
+ case Precision::I32:
+ fill_data_const<Precision::I32>(raw_data_ptr, raw_data_size, val);
+ break;
+ case Precision::U8:
+ fill_data_const<Precision::U8>(raw_data_ptr, raw_data_size, val);
+ break;
+ case Precision::I8:
+ fill_data_const<Precision::I8>(raw_data_ptr, raw_data_size, val);
+ break;
+ case Precision::U16:
+ fill_data_const<Precision::U16>(raw_data_ptr, raw_data_size, val);
+ break;
+ case Precision::I16:
+ fill_data_const<Precision::I16>(raw_data_ptr, raw_data_size, val);
+ break;
+ default:
+ THROW_IE_EXCEPTION << "Unsupported precision by fill_data_const() function";
+ }
+}
+
+void fill_data_const(InferenceEngine::Blob::Ptr& blob, float val) {
+ fill_data_const(blob, std::vector<float> {val});
+}
+} // namespace CommonTestUtils
#include <ngraph/type/float16.hpp>
#include <ie_blob.h>
+#include <blob_factory.hpp>
#include <random>
namespace CommonTestUtils {
}
}
-static void fill_data_const(float *data, size_t size, float value) {
- for (size_t i = 0; i < size; i++) {
- data[i] = value;
- }
-}
+/**
+ * Fill blob with value data blob. Broadcast semantic is included.
+ * Broadcasting with alignment through last dimension.
+ *
+ * @param blob tensor to fill in
+ * @param values src tensor which should be broadcast
+ */
+void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine::Blob::Ptr& values);
-static void fill_data_const(InferenceEngine::Blob::Ptr& blob, float val) {
- fill_data_const(blob->buffer().as<float*>(), blob->size(), val);
-}
+/**
+ * Wrapper on top of fill_data_with_broadcast with simplified signature
+ *
+ * @param blob the destination blob to fill in
+ * @param axis Axis to apply values
+ * @param values data to broadcast
+ */
+void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, size_t axis, std::vector<float> values);
+
+/**
+ * Make a view blob with new shape. It will reinterpret original tensor data as a tensor with new shape.
+ *
+ * NB! Limitation: the nwe one blob will no have ownership of data buffer. The original blob should be alive
+ * while view is in use.
+ *
+ * @param blob original source tensor
+ * @param new_shape new one shape for view blob
+ * @return new one blob view
+ */
+InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &blob, InferenceEngine::SizeVector new_shape);
+
+/**
+ * Fill blob with single value for all elements
+ *
+ * like:
+ * fill_data_with_broadcast(blob, 0, {val});
+ *
+ * @param blob tensor to fill in
+ * @param val value to set into each element
+ */
+void fill_data_const(InferenceEngine::Blob::Ptr& blob, float val);
static void fill_data_bbox(float *data, size_t size, int height, int width, float omega) {
float center_h = (height - 1.0f) / 2;
fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, start_from, k);
}
-
template<>
void inline fill_data_random<InferenceEngine::Precision::FP16>(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
fill_data_random_float<InferenceEngine::Precision::FP16>(blob, range, start_from, k);
}
}
+void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const InferenceEngine::Blob::Ptr &actual) {
+ auto get_raw_buffer = [] (const InferenceEngine::Blob::Ptr &blob) {
+ auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
+ IE_ASSERT(memory);
+ const auto lockedMemory = memory->wmap();
+ return lockedMemory.as<const std::uint8_t *>();
+ };
+ const auto expectedBuffer = get_raw_buffer(expected);
+ const auto actualBuffer = get_raw_buffer(actual);
+
+ const auto &precision = actual->getTensorDesc().getPrecision();
+ const auto &size = actual->size();
+ switch (precision) {
+ case InferenceEngine::Precision::FP32:
+ Compare(reinterpret_cast<const float *>(expectedBuffer), reinterpret_cast<const float *>(actualBuffer),
+ size, threshold);
+ break;
+ case InferenceEngine::Precision::I32:
+ Compare(reinterpret_cast<const std::int32_t *>(expectedBuffer),
+ reinterpret_cast<const std::int32_t *>(actualBuffer), size, 0);
+ break;
+ default:
+ FAIL() << "Comparator for " << precision << " precision isn't supported";
+ }
+}
+
void LayerTestsCommon::ConfigurePlugin() {
if (!configuration.empty()) {
core->SetConfig(configuration, targetDevice);
virtual void Compare(const std::vector<std::uint8_t> &expected, const InferenceEngine::Blob::Ptr &actual);
+ virtual void Compare(const InferenceEngine::Blob::Ptr &expected, const InferenceEngine::Blob::Ptr &actual);
+
virtual void SetRefMode(RefMode mode);
protected:
ROOT ${CMAKE_CURRENT_SOURCE_DIR}
INCLUDES
${IE_MAIN_SOURCE_DIR}/src/mkldnn_plugin
+ ${IE_MAIN_SOURCE_DIR}/src/transformations/include
OBJECT_FILES
${MKLDNN_SRC_OBJ}
LINK_LIBRARIES
unitTestUtils
mkldnn
+ inference_engine_transformations
ADD_CPPLINT
LABELS
CPU
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <gtest/gtest.h>
+
+#include <ngraph/ngraph.hpp>
+#include <ngraph_ops/fully_connected.hpp>
+
+#include <inference_engine.hpp>
+#include <details/ie_cnn_network_tools.h>
+#include <convert_function_to_cnn_network.hpp>
+#include <bf16transformer.h>
+
+using ngraph::Shape;
+using ngraph::element::Type;
+using namespace ngraph::op;
+using std::make_shared;
+using InferenceEngine::Precision;
+
+std::map<std::string, InferenceEngine::CNNLayerPtr> get_layer_collection(InferenceEngine::CNNNetwork net) {
+ IE_SUPPRESS_DEPRECATED_START
+ auto all_layers = InferenceEngine::details::CNNNetSortTopologically(net);
+
+ std::map<std::string, InferenceEngine::CNNLayerPtr> res;
+ for (auto &layer : all_layers) {
+ res[layer->name] = layer;
+ }
+ IE_SUPPRESS_DEPRECATED_END
+ return res;
+}
+
+enum TypeOfNet { NG, IE };
+InferenceEngine::CNNNetwork create_net(std::shared_ptr<ngraph::Function> &func, TypeOfNet type) {
+ InferenceEngine::CNNNetwork ng_net(func);
+ if (type == NG)
+ return ng_net;
+ else
+ return InferenceEngine::CNNNetwork {InferenceEngine::details::convertFunctionToICNNNetwork(func, ng_net)};
+}
+
+
+TEST(BF16TransformerTest, KeepMemoryPrecision) {
+ /*
+ * Suggested pattern
+ * _______ _____
+ * [_mem_r_] [_inp_]
+ * _|______|_
+ * [___mul____]
+ * __|__
+ * [_sig_]
+ * __|__
+ * [_fc1_]
+ * ___|____
+ * ___|___ __|__
+ * [_mem_w_] [_fc2_]
+ * __|__
+ * [_out_]
+ *
+ * If does'n care about memory precision the mem_w will have precicion of data
+ * between fc1 and fc2 operations. In case of enabled BF16 it should be BF16.
+ * However mem_r still keep original precision.
+ */
+ Shape shape = {3, 2};
+ Type type = ngraph::element::f32;
+ auto input = make_shared<Parameter>(type, shape);
+ auto mem_i = make_shared<Constant>(type, shape, 0);
+ auto mem_r = make_shared<ReadValue>(mem_i, "id");
+ mem_r->set_friendly_name("mem_r");
+
+ auto mul = make_shared<Multiply>(mem_r, input);
+ auto sig = make_shared<Sigmoid>(mul);
+
+ auto fc1_w = make_shared<Constant>(type, Shape{2, 2}, 1);
+ auto fc1_b = make_shared<Constant>(type, Shape{2}, 1);
+ auto fc1 = make_shared<FullyConnected>(sig, fc1_w, fc1_b, shape);
+
+ auto fc2_w = make_shared<Constant>(type, Shape{2, 2}, 1);
+ auto fc2_b = make_shared<Constant>(type, Shape{2}, 1);
+ auto fc2 = make_shared<FullyConnected>(fc1, fc2_w, fc2_b, shape);
+
+ auto mem_w = make_shared<Assign>(fc1, "id");
+ mem_w->set_friendly_name("mem_w");
+
+ // WA. Limitation of ngraph. control_dependency are required.
+ mem_w->add_control_dependency(mem_r);
+ fc2->add_control_dependency(mem_w);
+
+ auto function = make_shared<ngraph::Function>(
+ ngraph::NodeVector {fc2},
+ ngraph::ParameterVector {input});
+
+ auto net = create_net(function, IE);
+
+ // Apply tested BF16 transformation
+ MKLDNNPlugin::BF16Transformer transformer;
+ transformer.convertToBFloat16(net);
+
+ // Check precision
+ auto layers = get_layer_collection(net);
+ IE_SUPPRESS_DEPRECATED_START
+ Precision prc_mem_r = layers["mem_r"]->outData[0]->getPrecision();
+ Precision prc_mem_w = layers["mem_w"]->insData[0].lock()->getPrecision();
+ IE_SUPPRESS_DEPRECATED_END
+
+ ASSERT_EQ(prc_mem_r, Precision::BF16);
+ ASSERT_EQ(prc_mem_w, Precision::BF16);
+}
+
+TEST(BF16TransformerTest, DISABLED_KeepMemoryPrecisionWithGEMM) {
+ /* _______ _____
+ * [_mem_r_] [_inp_]
+ * _|______|_
+ * [___mul____]
+ * __|__
+ * [_sig_]
+ * __|____
+ * [_gemm1_]
+ * ___|____
+ * ___|___ __|____
+ * [_mem_w_] [_gemm2_]
+ * __|__
+ * [_out_]
+ *
+ * Same as KeepMemoryPrecision test with replacing FC -> GEMM
+ */
+ Shape shape = {3, 2};
+ Type type = ngraph::element::f32;
+ auto input = make_shared<Parameter>(type, shape);
+ auto mem_i = make_shared<Constant>(type, shape, 0);
+ auto mem_r = make_shared<ReadValue>(mem_i, "id");
+ mem_r->set_friendly_name("mem_r");
+
+ auto mul = make_shared<Multiply>(mem_r, input);
+ auto sig = make_shared<Sigmoid>(mul);
+
+ auto fc1_w = make_shared<Constant>(type, Shape{2, 2}, 1);
+ auto fc1 = make_shared<MatMul>(sig, fc1_w);
+
+ auto fc2_w = make_shared<Constant>(type, Shape{2, 2}, 1);
+ auto fc2 = make_shared<MatMul>(fc1, fc2_w);
+
+ auto mem_w = make_shared<Assign>(fc1, "id");
+ mem_w->set_friendly_name("mem_w");
+
+ // WA. Limitation of ngraph. control_dependency are required.
+ mem_w->add_control_dependency(mem_r);
+ fc2->add_control_dependency(mem_w);
+
+ auto function = make_shared<ngraph::Function>(
+ ngraph::NodeVector {fc2},
+ ngraph::ParameterVector {input});
+
+ auto net = create_net(function, IE);
+
+ // Apply tested BF16 transformation
+ MKLDNNPlugin::BF16Transformer transformer;
+ transformer.convertToBFloat16(net);
+
+ // Check precision
+ auto layers = get_layer_collection(net);
+ IE_SUPPRESS_DEPRECATED_START
+ Precision prc_mem_r = layers["mem_r"]->outData[0]->getPrecision();
+ Precision prc_mem_w = layers["mem_w"]->insData[0].lock()->getPrecision();
+ IE_SUPPRESS_DEPRECATED_END
+
+ ASSERT_EQ(prc_mem_r, Precision::BF16);
+ ASSERT_EQ(prc_mem_w, Precision::BF16);
+}
if (p.op != eltwise_test_params::Pow)
CommonTestUtils::fill_data_sine(inputBlob->buffer().as<float*>(), inputBlob->size(), 100, 10, 10);
else
- CommonTestUtils::fill_data_const(inputBlob->buffer().as<float*>(), inputBlob->size(), 2);
+ CommonTestUtils::fill_data_const(inputBlob, 2);
srcs_vec.push_back(inputBlob);
}
Blob::Ptr output_low_data = make_shared_blob<float>({Precision::FP32, { p.ic_const_blobs }, Layout::C});
output_low_data->allocate();
if (p.levels == 2) {
- CommonTestUtils::fill_data_const(output_low_data->buffer().as<float*>(), output_low_data->size(), low_val);
+ CommonTestUtils::fill_data_const(output_low_data, low_val);
} else {
CommonTestUtils::fill_data_sine(output_low_data->buffer().as<float*>(), output_low_data->size(), low_center, 2.f, 0.3f);
};
Blob::Ptr output_high_data = make_shared_blob<float>({Precision::FP32, {p.ic_const_blobs}, Layout::C});
output_high_data->allocate();
if (p.levels == 2) {
- CommonTestUtils::fill_data_const(output_high_data->buffer().as<float*>(), output_high_data->size(), high_val);
+ CommonTestUtils::fill_data_const(output_high_data, high_val);
} else {
CommonTestUtils::fill_data_sine(output_high_data->buffer().as<float*>(), output_high_data->size(), high_center, 2.f, 0.3f);
};
auto output_shape = get_input_partial_shape(0);
VariableInfo info = {output_shape, arg_t, m_variable_id};
- m_variable = std::make_shared<Variable>(info);
+ if (m_variable == nullptr)
+ m_variable = std::make_shared<Variable>(info);
+ else
+ m_variable->update(info);
set_output_type(0, arg_t, output_shape);
}