1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
6 #include "blob_factory.hpp"
8 #include "details/ie_cnn_network_tools.h"
9 #include "ie_layers_internal.hpp"
10 #include "graph_tools.hpp"
18 #include <unordered_map>
19 #include <unordered_set>
21 namespace InferenceEngine {
24 template <typename T, typename P>
25 inline bool one_of(T val, P item) { return val == item; }
26 template <typename T, typename P, typename... Args>
27 inline bool one_of(T val, P item, Args... item_others) {
28 return val == item || one_of(val, item_others...);
31 /************************************************************/
32 /**** TI Utils ********************************************/
33 /************************************************************/
35 static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr> &heads) {
36 CNNLayerSet inputLayers;
37 std::unordered_set<CNNLayer*> allLayers;
39 // Define all start layers
40 for (const auto & data : heads) {
41 auto &secondLayers = data->getInputTo();
43 details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
44 if (layer->insData.empty()) {
45 inputLayers.insert(layer);
50 std::vector<DataPtr> res = heads;
51 // Add fake input data to point on not achievable
52 // layers from head (like const placeholders)
53 for (auto &starter : inputLayers) {
54 DataPtr holder(new Data(starter->name + ":input_holder", starter->precision));
55 holder->getInputTo()[starter->name] = starter;
56 res.push_back(holder);
62 std::vector<CNNLayerPtr> TIBodySortTopologically(const TensorIterator::Body &body) {
63 std::vector<CNNLayerPtr> all_layers;
65 auto all_input_layers = getAllInputs(body.inputs);
66 CNNNetForestDFS(all_input_layers, [&](CNNLayerPtr current){
67 all_layers.push_back(current);
69 std::reverse(all_layers.begin(), all_layers.end());
73 TensorIterator::Body CopyTIBody(const TensorIterator::Body &body, std::string suffix) {
75 auto cp = [&](CNNLayerPtr lp) {
76 return injectData<NoneStruct>(lp);
79 const auto all_orig = TIBodySortTopologically(body);
80 auto num = all_orig.size();
82 std::unordered_map<CNNLayer*, CNNLayerPtr> old2new_l;
83 for (int i = 0; i < num; i++) {
84 auto &orig = all_orig[i];
85 old2new_l[orig.get()] = cp(orig);
88 std::unordered_map<Data*, DataPtr> old2new_d;
89 for (auto &in : body.inputs) {
90 auto new_data = std::make_shared<Data>(*in.get());
91 for (auto &to : new_data->getInputTo())
92 to.second = old2new_l[to.second.get()];
94 old2new_d[in.get()] = new_data;
97 for (const auto &old : all_orig) {
98 auto &new_one = old2new_l[old.get()];
100 for (int i = 0; i != old->outData.size(); i++) {
101 auto old_data = old->outData[i];
102 auto new_data = new_one->outData[i];
103 new_data->getCreatorLayer() = CNNLayerWeakPtr(new_one);
104 old2new_d[old_data.get()] = new_data;
106 for (auto &to : new_data->getInputTo())
107 to.second = old2new_l[to.second.get()];
110 for (int i = 0; i != old->insData.size(); i++) {
111 auto old_data = old->insData[i].lock();
112 auto new_data = old2new_d.at(old_data.get());
113 new_one->insData[i] = new_data;
118 if (!suffix.empty()) {
119 for (auto &kvp : old2new_l) {
120 auto layer = kvp.second;
121 auto old_name = layer->name;
122 layer->name += suffix;
123 for (auto &ins : layer->insData) {
124 ins.lock()->getInputTo().erase(old_name);
125 ins.lock()->getInputTo()[layer->name] = layer;
128 for (auto &kvp : old2new_d) kvp.second->setName(kvp.second->getName() + suffix);
131 TensorIterator::Body res;
132 for (auto &in : body.inputs)
133 res.inputs.emplace_back(old2new_d[in.get()]);
135 for (auto &out : body.outputs)
136 res.outputs.emplace_back(old2new_d[out.get()]);
139 // The graph itself is a shared_ptr set where parent holds child.
140 // Res.inputs vector hold head of graph and all nodes should be
141 // achievable for oriented search started from that. But place
142 // const holder has no input and cannot be achieved. So we need
143 // to hold then in other way.
145 // Let's add one more Data object which has no representation in
146 // original network. It will hold all unreachable const placeholder
149 std::vector<CNNLayerPtr> to_hold;
150 for (auto &kvp : old2new_l) {
151 auto layer = kvp.second;
152 if (layer->insData.empty())
153 to_hold.emplace_back(layer);
155 if (!to_hold.empty()) {
156 auto holder = DataPtr(new Data("const_holder", Precision::UNSPECIFIED));
157 for (auto layer : to_hold) {
158 holder->getInputTo()[layer->name] = layer;
160 res.inputs.emplace_back(holder);
166 /************************************************************/
167 /**** TI rule helpers *************************************/
168 /************************************************************/
170 inline bool is_full_ranged(const TensorIterator::PortMap& rule, const DataPtr &data) {
172 THROW_IE_EXCEPTION << "Internal error. data == nullptr";
174 if (rule.axis == -1 || !one_of(rule.stride, 1, -1))
177 auto &shape = data->getDims();
178 int size = shape[rule.axis];
180 int begin = rule.start >= 0 ? rule.start : size + rule.start + 1;
181 int end = rule.end >= 0 ? rule.end : size + rule.end + 1;
183 return (rule.stride == 1)
184 ? begin == 0 && end == size
185 : begin == size && end == 0;
188 using RuleSet = std::vector<TensorIterator::PortMap>;
189 using RuleClassSet = std::tuple<RuleSet, RuleSet, RuleSet>;
192 * @brief Helper to split port mapping rules to three group
194 * first_class - which has iteration component
195 * second_class - which has no iteration and there are no backedge connection to the same port
196 * third_class - which has no iteration and has corresponding backedge
198 * @param ti TensorIterator layer to analyze
199 * @return tuple with three classes of port map rule
201 static RuleClassSet classifyInputRules(const TensorIterator &ti) {
202 RuleSet first_class_rules, second_class_rules, third_class_rules;
204 std::set<int> ports_with_backedge;
205 for (const auto &back_edge : ti.back_edges) ports_with_backedge.insert(back_edge.to);
207 for (const auto &rule : ti.input_port_map) {
209 first_class_rules.push_back(rule);
211 else if (!ports_with_backedge.count(rule.to))
212 second_class_rules.push_back(rule);
215 third_class_rules.push_back(rule);
217 return RuleClassSet {first_class_rules, second_class_rules, third_class_rules};
220 static RuleClassSet classifyOutputRules(const TensorIterator &ti) {
221 RuleSet first_class_rules, second_class_rules, third_class_rules;
223 std::set<int> ports_with_backedge;
224 for (const auto &back_edge : ti.back_edges) ports_with_backedge.insert(back_edge.from);
226 for (const auto &rule : ti.output_port_map) {
228 first_class_rules.push_back(rule);
230 else if (!ports_with_backedge.count(rule.to))
231 second_class_rules.push_back(rule);
234 third_class_rules.push_back(rule);
236 return RuleClassSet {first_class_rules, second_class_rules, third_class_rules};
240 * Merge slave connections into master
244 void CombineData(DataPtr &master, DataPtr &slave) {
245 for (auto &kvp : slave->getInputTo()) {
246 auto &slave_layer = kvp.second;
247 for (auto &slv_ins_wptr : slave_layer->insData) {
248 auto slv_ins = slv_ins_wptr.lock();
249 // Replace slave ptr with master
250 if (slv_ins == slave) slv_ins_wptr = master;
252 master->getInputTo()[slave_layer->name] = slave_layer;
256 /************************************************************/
257 /**** Converter Passes ************************************/
258 /************************************************************/
260 static RNNSequenceLayer::CellType cell_type_from_name(std::string &layer_type) {
261 RNNSequenceLayer::CellType res;
262 if (layer_type == "LSTMCell")
263 res = RNNSequenceLayer::LSTM;
264 else if (layer_type == "GRUCell")
265 res = RNNSequenceLayer::GRU;
266 else if (layer_type == "RNNCell")
267 res = RNNSequenceLayer::RNN;
269 THROW_IE_EXCEPTION << "Unknown Cell type (" << layer_type << "). Expected LSTMCell|GRUCell|RNNCell";
273 static std::string cell_name(RNNSequenceLayer::CellType type) {
276 case RNNSequenceLayer::LSTM:
279 case RNNSequenceLayer::GRU:
280 case RNNSequenceLayer::GRU_LBR:
283 case RNNSequenceLayer::RNN:
291 bool convertToRNNSeq(CNNLayerPtr cur, const N &net) {
292 if (cur->type != "TensorIterator") return true;
294 auto ti = std::dynamic_pointer_cast<TensorIterator>(cur);
295 IE_ASSERT(ti) << "Cannot cast object with type TensorIterator to TensorIterator object";
297 auto all_body_layers = TIBodySortTopologically(ti->body);
299 // Check if body is: squeeze -> lstm_cell -> unsqueeze
300 if (all_body_layers.size() != 3
301 || all_body_layers[0]->type != "Reshape"
302 || !one_of(all_body_layers[1]->type, "GRUCell", "RNNCell", "LSTMCell")
303 || all_body_layers[2]->type != "Reshape")
306 auto rsp1 = std::dynamic_pointer_cast<ReshapeLayer>(all_body_layers[0]);
307 auto cell = std::dynamic_pointer_cast<RNNCellBase>(all_body_layers[1]);
308 auto rsp2 = std::dynamic_pointer_cast<ReshapeLayer>(all_body_layers[2]);
314 int NS = (cell->cellType == RNNSequenceLayer::LSTM) ? 2 : 1; // number of states
316 IE_ASSERT(cell->insData.size() == NS + 1); // {data, state1, [state2]}
317 IE_ASSERT(cell->outData.size() == NS); // {state1, [state2]}
319 if (cell->insData[0].lock()->getCreatorLayer().lock() != rsp1 ||
320 cell->outData[0]->getInputTo().begin()->second != rsp2)
323 // Check port mapping
324 auto _indx_in = [&] (const std::vector<DataPtr> &scope, const DataPtr &data) {
325 int indx = std::find(scope.begin(), scope.end(), data) - scope.begin();
326 return indx == scope.size() ? -1 : indx;
329 int in_dt_idx = _indx_in(ti->body.inputs, rsp1->insData[0].lock());
330 int in_hs_idx = _indx_in(ti->body.inputs, cell->insData[1].lock());
331 int in_cs_idx = NS == 2 ? _indx_in(ti->body.inputs, cell->insData[2].lock()) : -1;
333 int out_dt_idx = _indx_in(ti->body.outputs, rsp2->outData[0]);
334 int out_hs_idx = _indx_in(ti->body.outputs, cell->outData[0]);
335 int out_cs_idx = NS == 2 ? _indx_in(ti->body.outputs, cell->outData[1]) : -1;
337 // indexes should be [0,1,2] : sum == 3 or [0,1,-1] : sum == 0
338 int sum = (NS - 1) * 3;
339 if (in_hs_idx + in_cs_idx + in_dt_idx != sum || out_hs_idx + out_cs_idx + out_dt_idx != sum)
342 std::map<int, TensorIterator::PortMap> i2map, o2map, be2map;
343 for (auto &m : ti->input_port_map) i2map[m.to] = m;
344 for (auto &m : ti->output_port_map) o2map[m.to] = m;
345 for (auto &m : ti->back_edges) be2map[m.to] = m;
347 if (!one_of(i2map.size(), NS + 1, 1) ||
348 !one_of(o2map.size(), NS + 1, 1) ||
349 !one_of(be2map.size(), NS))
352 auto in_iter_rule = i2map[in_dt_idx];
353 auto in_iter_data = ti->insData[in_iter_rule.from].lock();
355 auto out_iter_rule = o2map[out_dt_idx];
356 auto out_iter_data = ti->outData[out_iter_rule.from];
358 // TI iterates only for full range of tensor
359 if (!is_full_ranged(in_iter_rule, in_iter_data) ||
360 !is_full_ranged(out_iter_rule, out_iter_data))
363 // supported only same axis and strides for in/out data tensors
364 if (in_iter_rule.axis != out_iter_rule.axis ||
365 in_iter_rule.stride != out_iter_rule.stride)
368 // supported only firs and second dim for LSTM-Sequence
369 if (!one_of(in_iter_rule.axis, 0, 1))
372 bool no_init_state = i2map.size() == 1;
373 bool no_last_state = o2map.size() == 1;
375 if (!no_init_state && ( i2map[in_hs_idx].axis != -1 || (NS == 2 && i2map[in_cs_idx].axis != -1) ))
377 if (!no_last_state && ( o2map[out_hs_idx].axis != -1 || (NS == 2 && o2map[out_cs_idx].axis != -1) ))
380 std::vector<int> i_order {i2map[in_dt_idx].from };
382 i_order.push_back(i2map[in_hs_idx].from);
383 if (!no_init_state && NS == 2)
384 i_order.push_back(i2map[in_cs_idx].from);
386 std::vector<int> o_order {o2map[out_dt_idx].from};
388 o_order.push_back(o2map[out_hs_idx].from);
389 if (!no_last_state && NS == 2)
390 o_order.push_back(o2map[out_cs_idx].from);
392 // need swap an i/o ports if it is not in natural order
393 std::string name = cell->name + "_sequence";
394 std::string type = cell_name(cell->cellType) + "Sequence";
396 auto rnn = std::make_shared<RNNSequenceLayer>(LayerParams{ name, type, cell->precision});
397 rnn->axis = in_iter_rule.axis;
398 rnn->direction = in_iter_rule.stride == 1
399 ? RNNSequenceLayer::FWD
400 : RNNSequenceLayer::BWD;
402 // copy base RNN cell fields
403 rnn->cellType = cell->cellType;
404 rnn->_weights = cell->_weights;
405 rnn->_biases = cell->_biases;
406 rnn->blobs["weights"] = rnn->_weights;
407 rnn->blobs["biases"] = rnn->_biases;
408 rnn->blobs = cell->blobs;
409 rnn->activations = cell->activations;
410 rnn->activation_alpha = cell->activation_alpha;
411 rnn->activation_beta = cell->activation_beta;
412 rnn->hidden_size = cell->hidden_size;
413 rnn->clip = cell->clip;
415 for (int i : i_order) {
416 auto in_data = ti->insData[i].lock();
417 in_data->getInputTo().erase(ti->name);
418 in_data->getInputTo()[rnn->name] = rnn;
419 rnn->insData.push_back(in_data);
421 for (int i : o_order) {
422 rnn->outData.push_back(ti->outData[i]);
423 rnn->outData.back()->getCreatorLayer() = rnn;
429 bool unrollTI(CNNLayerPtr cur, ICNNNetwork &net) {
430 if (cur->type != "TensorIterator")
433 auto ti = std::dynamic_pointer_cast<TensorIterator>(cur);
434 IE_ASSERT(ti) << "Cannot cast object with type TensorIterator to TensorIterator object";
436 int num = getNumIteration(*ti); // -1 means inconsistent TI
437 if (num == -1) return false; // TODO: better to throw exception
439 const auto &body = ti->body;
441 std::vector<TensorIterator::Body> body_list(num);
442 for (int i = 0; i < num; i++) {
443 // copy with additional suffix to each object name
444 body_list[i] = CopyTIBody(body, ":" + std::to_string(i));
446 auto holder = body_list[i].inputs.back();
447 if (holder->getPrecision() == Precision::UNSPECIFIED) {
448 for (auto kvp : holder->getInputTo())
450 net.addLayer(kvp.second);
454 RuleSet first_class, second_class, third_class;
455 std::tie(first_class, second_class, third_class) = classifyInputRules(*ti);
457 /** Clean links on TI */
458 for (auto &ins : ti->insData)
459 ins.lock()->getInputTo().erase(ti->name);
460 for (auto &outs : ti->outData)
461 outs->getCreatorLayer().reset();
463 /** FIRST class comes */
464 for (int i = 0; i < first_class.size(); i++) {
465 auto &rule = first_class[i];
466 auto in_data = ti->insData[rule.from].lock();
468 std::string name = ti->name + ":in_split_" + std::to_string(i);
469 auto split = std::make_shared<SplitLayer>(LayerParams{ name, "Split", cur->precision });
470 split->_axis = rule.axis;
471 split->outData.resize(num);
472 split->insData.emplace_back(in_data);
473 in_data->getInputTo()[split->name] = split;
475 for (int j = 0; j < num; j++) {
476 auto body_idx = rule.stride == 1 ? j : num - 1 - j;
477 auto &chunk = body_list[body_idx].inputs[rule.to];
478 chunk->getCreatorLayer() = split;
479 split->outData[j] = chunk;
483 /** SECOND class come on */
484 for (const auto &rule : second_class) {
485 auto in_data = ti->insData[rule.from].lock();
487 for (int j = 0; j < num; j++) {
488 auto &chunk = body_list[j].inputs[rule.to];
489 CombineData(in_data, chunk);
493 /** BACK EDGES that's your time */
494 for (const auto &rule : ti->back_edges) {
495 for (int i = 1; i < num; i++) {
496 auto &from_data = body_list[i-1].outputs[rule.from];
497 auto &to_data = body_list[i].inputs[rule.to];
498 CombineData(from_data, to_data);
502 /** THIRD class end up */
503 for (const auto &rule : third_class) {
505 auto from_data = ti->insData[rule.from].lock();
506 auto &to_data = body_list[0].inputs[rule.to];;
507 CombineData(from_data, to_data);
510 /** And the same actions for outputs connections */
511 std::tie(first_class, second_class, third_class) = classifyOutputRules(*ti);
513 /** FIRST class comes */
514 for (int i = 0; i < first_class.size(); i++) {
515 auto &rule = first_class[i];
516 auto out_data = ti->outData[rule.from];
518 std::string name = ti->name + ":out_concat_" + std::to_string(i);
519 auto concat = std::make_shared<ConcatLayer>(LayerParams{ name, "Concat", cur->precision });
520 concat->_axis = rule.axis;
521 concat->insData.resize(num);
522 concat->outData.emplace_back(out_data);
523 out_data->getCreatorLayer() = concat;
525 for (int j = 0; j < num; j++) {
526 auto body_idx = rule.stride == 1 ? j : num - 1 - j;
527 auto &chunk = body_list[body_idx].outputs[rule.to];
528 chunk->getInputTo()[concat->name] = concat;
529 concat->insData[j] = chunk;
533 /** SECOND class come on */
534 for (const auto &rule : second_class) {
535 auto out_data = ti->outData[rule.from];
537 for (int j = 0; j < num; j++) {
538 auto &chunk = body_list[j].outputs[rule.to];
539 CombineData(chunk, out_data);
543 /** THIRD class end up */
544 for (const auto &rule : third_class) {
546 auto &from_data = ti->outData[rule.from];
547 auto &to_data = body_list[num-1].outputs[rule.to];
549 auto parent = to_data->getCreatorLayer().lock();
550 std::replace(parent->outData.begin(), parent->outData.end(), to_data, from_data);
551 from_data->getCreatorLayer() = parent;
553 CombineData(from_data, to_data);
558 /************************************************************/
559 /**** Builder helpers ************************************/
560 /************************************************************/
562 static CNNLayerPtr _concat(std::string name, Precision prc, SizeVector dims, int num) {
563 auto res = std::make_shared<ConcatLayer>(LayerParams{name, "Concat", prc});
566 res->insData.resize(num);
567 res->outData.resize(1);
569 auto out_data = DataPtr(new Data(name,
570 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
571 out_data->getCreatorLayer() = res;
573 res->outData[0] = out_data;
577 static CNNLayerPtr _split(std::string name, Precision prc, SizeVector dims, int num) {
578 auto res = std::make_shared<SplitLayer>(LayerParams{name, "Split", prc});
580 res->params["axis"] = std::to_string(res->_axis);
582 res->insData.resize(1);
583 res->outData.resize(num);
585 for (int i = 0; i < num; i++) {
586 auto out_data = DataPtr(new Data(name + "_part_" + std::to_string(i),
587 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
588 out_data->getCreatorLayer() = res;
590 res->outData[i] = out_data;
595 static CNNLayerPtr _fc(std::string name, Precision prc, SizeVector dims, Blob::Ptr &W, Blob::Ptr &B) {
596 auto res = std::make_shared<FullyConnectedLayer>(LayerParams{name, "FullyConnected", prc});
600 res->_out_num = dims[1];
601 res->blobs["weights"] = W;
602 res->blobs["biases"] = B;
603 res->params["out-size"] = std::to_string(dims[1]);
605 res->insData.resize(1);
606 res->outData.resize(1);
608 auto out_data = DataPtr(new Data(name,
609 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
610 out_data->getCreatorLayer() = res;
612 res->outData[0] = out_data;
616 static CNNLayerPtr _act(std::string name, Precision prc, SizeVector dims, std::string type) {
617 auto res = std::make_shared<CNNLayer>(LayerParams{name, type, prc});
619 res->params["type"] = type;
621 res->insData.resize(1);
622 res->outData.resize(1);
624 auto out_data = DataPtr(new Data(name,
625 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
626 out_data->getCreatorLayer() = res;
628 res->outData[0] = out_data;
632 static CNNLayerPtr _pwr(std::string name, Precision prc, SizeVector dims, float scale, float shift) {
633 auto res = std::make_shared<PowerLayer>(LayerParams{name, "Power", prc});
638 res->params["power"] = std::to_string(res->power);
639 res->params["scale"] = std::to_string(res->scale);
640 res->params["shift"] = std::to_string(res->offset);
642 res->insData.resize(1);
643 res->outData.resize(1);
645 auto out_data = DataPtr(new Data(name,
646 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
647 out_data->getCreatorLayer() = res;
649 res->outData[0] = out_data;
654 static CNNLayerPtr _eltw(std::string name, Precision prc, SizeVector dims, std::string type) {
655 auto res = std::make_shared<EltwiseLayer>(LayerParams{name, "Eltwise", prc});
657 res->params["operation"] = type;
658 res->_operation = type == "sum" ? EltwiseLayer::Sum : EltwiseLayer::Prod;
660 res->insData.resize(2);
661 res->outData.resize(1);
663 auto out_data = DataPtr(new Data(name,
664 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
665 out_data->getCreatorLayer() = res;
667 res->outData[0] = out_data;
671 static std::shared_ptr<ReshapeLayer> _resh(std::string name, Precision prc, SizeVector dims) {
672 auto res = std::make_shared<ReshapeLayer>(LayerParams{name, "Reshape", prc});
674 res->insData.resize(1);
675 res->outData.resize(1);
677 auto out_data = DataPtr(new Data(name,
678 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
679 out_data->getCreatorLayer() = res;
681 res->outData[0] = out_data;
685 static std::shared_ptr<RNNCellBase> _cell(std::string name, Precision prc, SizeVector data_dims, SizeVector state_dims, RNNSequenceLayer::CellType type) {
686 std::shared_ptr<RNNCellBase> res;
689 case RNNSequenceLayer::LSTM:
690 res = std::make_shared<LSTMCell>(LayerParams{name, "LSTMCell", prc}); NS = 2;
692 case RNNSequenceLayer::GRU:
693 case RNNSequenceLayer::GRU_LBR:
694 res = std::make_shared<GRUCell>(LayerParams{name, "GRUCell", prc});
696 case RNNSequenceLayer::RNN:
697 res = std::make_shared<RNNCell>(LayerParams{name, "RNNCell", prc});
701 res->cellType = type;
702 res->insData.resize(1 + NS);
703 res->outData.resize(NS);
705 auto out_data = DataPtr(new Data(name + ":out_data",
706 TensorDesc { prc, data_dims, TensorDesc::getLayoutByDims(data_dims) }));
707 out_data->getCreatorLayer() = res;
708 res->outData[0] = out_data;
710 for (size_t i = 0; i < NS; i++) {
711 auto out_state = DataPtr(new Data(name + ":out_state_" + std::to_string(i),
712 TensorDesc { prc, state_dims, TensorDesc::getLayoutByDims(state_dims) }));
713 out_state->getCreatorLayer() = res;
714 res->outData[i] = out_state;
720 static std::shared_ptr<TensorIterator> _ti(std::string name, Precision prc, size_t NS) {
721 auto res = std::make_shared<TensorIterator>(LayerParams{name, "TensorIterator", prc});
723 res->insData.resize(1 + NS);
724 res->outData.resize(1 + NS);
729 static void _link(CNNLayerPtr src, CNNLayerPtr dst, size_t src_port = 0, size_t dst_port = 0) {
730 auto data = src->outData[src_port];
731 data->getInputTo()[dst->name] = dst;
732 dst->insData[dst_port] = data;
735 static void _link(DataPtr &data, CNNLayerPtr dst, size_t dst_port = 0) {
736 data->getInputTo()[dst->name] = dst;
737 dst->insData[dst_port] = data;
740 /** Link nodes with clipping data if required (clip_val != 0.0) */
741 static void _link_with_clip(CNNLayerPtr src, CNNLayerPtr dst, const float clip_val,
742 size_t src_port = 0, size_t dst_port = 0) {
743 if (clip_val == 0.0f) {
744 _link(src, dst, src_port, dst_port);
746 auto clip_name = dst->name + "_clip";
747 auto clip_prc = dst->precision;
748 auto clip_shape = src->outData[src_port]->getTensorDesc().getDims();
749 auto clip = _act(clip_name, clip_prc, clip_shape, "clamp");
750 clip->params["min"] = std::to_string(-clip_val);
751 clip->params["max"] = std::to_string(clip_val);
753 _link(src, clip, src_port, 0);
754 _link(clip, dst, 0, dst_port);
759 static Blob::Ptr make_partial_copy(Blob::Ptr src, size_t off, size_t size) {
760 auto res = make_plain_blob(src->getTensorDesc().getPrecision(), {size});
763 size_t elem_size = src->getTensorDesc().getPrecision().size();
764 auto src_ptr = src->buffer().as<uint8_t*>();
765 auto dst_ptr = res->buffer().as<uint8_t*>();
767 ie_memcpy(dst_ptr, res->byteSize(), src_ptr + off * elem_size, size * elem_size);
772 static Blob::Ptr wrap_as_tensor(Blob::Ptr src, SizeVector dims) {
773 auto res = make_blob_with_precision(
774 TensorDesc { src->getTensorDesc().getPrecision(), dims, TensorDesc::getLayoutByDims(dims) },
776 IE_ASSERT(src->size() == res->size());
780 static Blob::Ptr make_region_copy(Blob::Ptr src, SizeVector region, SizeVector offset) {
781 IE_ASSERT(region.size() == offset.size());
782 IE_ASSERT(region.size() == src->getTensorDesc().getDims().size());
784 auto res = make_plain_blob(src->getTensorDesc().getPrecision(), region);
787 size_t elem_size = src->getTensorDesc().getPrecision().size();
788 auto src_ptr = src->buffer().as<uint8_t*>();
789 auto dst_ptr = res->buffer().as<uint8_t*>();
791 auto &dd = src->getTensorDesc().getDims();
792 SizeVector src_dims {1, 1, 1};
793 std::copy(dd.begin(), dd.end(), src_dims.end() - dd.size());
795 SizeVector dims {1, 1, 1};
796 std::copy(region.begin(), region.end(), dims.end() - region.size());
798 SizeVector off {0, 0, 0};
799 std::copy(offset.begin(), offset.end(), off.end() - offset.size());
801 const auto D1 = dims[0];
802 const auto D2 = dims[1];
803 const auto D3 = dims[2];
804 const auto off1 = off[0];
805 const auto off2 = off[1];
806 const auto off3 = off[2];
807 const auto str1 = src_dims[1]*src_dims[2];
808 const auto str2 = src_dims[2];
810 for (size_t d1 = 0; d1 < D1; d1++)
811 for (size_t d2 = 0; d2 < D2; d2++) {
812 auto off_src = (off1 + d1)*str1 + (off2 + d2)*str2 + off3;
813 auto off_dst = d1*D2*D3 + d2*D3;
814 ie_memcpy(dst_ptr + off_dst * elem_size, res->byteSize(), src_ptr + off_src * elem_size, D3 * elem_size);
821 static bool unrollRNNCellBody(CNNLayerPtr cur) {
822 if (cur->type != "RNNCell")
825 auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
826 IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
828 auto name = cell->name;
830 auto in_data = cell->insData[0].lock();
831 auto in_h_state = cell->insData[1].lock();
832 auto out_h_state = cell->outData[0];
834 auto d_dims = in_data->getTensorDesc().getDims();
835 auto s_dims = in_h_state->getTensorDesc().getDims();
837 size_t N = d_dims[0];
838 size_t D = d_dims[1];
839 size_t S = s_dims[1];
841 auto prc = cell->precision;
843 /** Release links on TI */
844 for (auto &ins : cell->insData)
845 ins.lock()->getInputTo().erase(cell->name);
846 for (auto &outs : cell->outData)
847 outs->getCreatorLayer().reset();
850 auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
851 auto fc = _fc(name + ":fc", prc, {N, S}, cell->_weights, cell->_biases);
852 auto act = _act(name + ":act", prc, {N, S}, cell->activations[0]);
855 _link(in_data, concat, 0);
856 _link(in_h_state, concat, 1);
858 _link_with_clip(fc, act, cell->clip);
861 act->outData[0] = out_h_state;
862 out_h_state->getCreatorLayer() = act;
867 static bool unrollLSTMCellBody(CNNLayerPtr cur) {
868 if (cur->type != "LSTMCell")
871 auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
872 IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
874 auto name = cell->name;
876 auto in_data = cell->insData[0].lock();
877 auto in_h_state = cell->insData[1].lock();
878 auto in_c_state = cell->insData[2].lock();
879 auto out_h_state = cell->outData[0];
880 auto out_c_state = cell->outData[1];
882 auto d_dims = in_data->getTensorDesc().getDims();
883 auto s_dims = in_h_state->getTensorDesc().getDims();
885 size_t N = d_dims[0];
886 size_t D = d_dims[1];
887 size_t S = s_dims[1];
890 auto prc = cell->precision;
892 /** Release links on TI */
893 for (auto &ins : cell->insData)
894 ins.lock()->getInputTo().erase(cell->name);
895 for (auto &outs : cell->outData)
896 outs->getCreatorLayer().reset();
899 auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
900 auto split = _split(name + ":split", prc, {N, S}, G);
901 auto fc = _fc(name + ":fc", prc, {N, S*G}, cell->_weights, cell->_biases);
903 const std::string _f = cell->activations[0], _g = cell->activations[1], _h = cell->activations[2];
905 auto act_f = _act(name + ":act_f", prc, {N, S}, _f);
906 auto act_i = _act(name + ":act_i", prc, {N, S}, _f);
907 auto act_c = _act(name + ":act_c", prc, {N, S}, _g);
908 auto act_o = _act(name + ":act_o", prc, {N, S}, _f);
909 auto act_x = _act(name + ":act_x", prc, {N, S}, _h);
911 auto mul_ic = _eltw(name + ":mul_ic", prc, {N, S}, "mul");
912 auto mul_f = _eltw(name + ":mul_f" , prc, {N, S}, "mul");
913 auto sum = _eltw(name + ":sum" , prc, {N, S}, "sum");
914 auto mul = _eltw(name + ":mul" , prc, {N, S}, "mul");
917 _link(in_data, concat, 0);
918 _link(in_h_state, concat, 1);
921 _link_with_clip(fc, split, cell->clip);
923 _link(split, act_f, 0, 0);
924 _link(split, act_i, 1, 0);
925 _link(split, act_c, 2, 0);
926 _link(split, act_o, 3, 0);
928 _link(act_i, mul_ic, 0, 0);
929 _link(act_c, mul_ic, 0, 1);
931 _link(act_f, mul_f, 0, 0);
932 _link(in_c_state, mul_f, 1);
934 _link(mul_f, sum, 0, 0);
935 _link(mul_ic, sum, 0, 1);
939 _link(act_x, mul, 0, 0);
940 _link(act_o, mul, 0, 1);
943 mul->outData[0] = out_h_state;
944 out_h_state->getCreatorLayer() = mul;
946 CombineData(out_c_state, sum->outData[0]);
947 sum->outData[0] = out_c_state;
948 out_c_state->getCreatorLayer() = sum;
953 static bool unrollGRUCellBody(CNNLayerPtr cur, bool linear_before_reset = false) {
954 if (cur->type != "GRUCell")
957 auto cell = std::dynamic_pointer_cast<GRUCell>(cur);
958 IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
960 auto name = cell->name;
962 auto in_data = cell->insData[0].lock();
963 auto in_h_state = cell->insData[1].lock();
964 auto out_h_state = cell->outData[0];
966 auto d_dims = in_data->getTensorDesc().getDims();
967 auto s_dims = in_h_state->getTensorDesc().getDims();
969 size_t N = d_dims[0];
970 size_t D = d_dims[1];
971 size_t S = s_dims[1];
973 // Split weights UR and O gates. Original gates are URO
974 size_t bG = linear_before_reset ? 4 : 3;
975 auto orig_W = wrap_as_tensor(cell->_weights, {3, S, D+S});
976 auto orig_B = wrap_as_tensor(cell->_biases, {bG, S});
978 auto ur_W = make_region_copy(orig_W, {2, S, D+S}, {0, 0, 0});
979 auto o_W = make_region_copy(orig_W, {1, S, D+S}, {2, 0, 0});
980 auto ur_B = make_region_copy(orig_B, {2, S}, {0, 0});
981 auto o_B = make_region_copy(orig_B, {1, S}, {2, 0});
983 auto prc = cell->precision;
985 /** Release links on TI */
986 for (auto &ins : cell->insData)
987 ins.lock()->getInputTo().erase(cell->name);
988 for (auto &outs : cell->outData)
989 outs->getCreatorLayer().reset();
992 auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
993 auto split = _split(name + ":split", prc, {N, S}, 2);
994 auto fc_ur = _fc(name + ":fc_ur", prc, {N, S*2}, ur_W, ur_B);
996 const std::string _f = cell->activations[0], _g = cell->activations[1];
998 auto act_ur = _act(name + ":act_ur", prc, {N, 2*S}, _f);
999 auto act_o = _act(name + ":act_o", prc, {N, S}, _g);
1001 auto mul_u = _eltw(name + ":mul_u", prc, {N, S}, "mul");
1002 auto mul_r = _eltw(name + ":mul_r", prc, {N, S}, "mul");
1004 auto pwr_m1 = _pwr(name + ":pwr", prc, {N, S}, -1.0, 1.0);
1006 auto mul = _eltw(name + ":mul" , prc, {N, S}, "mul");
1007 auto sum = _eltw(name + ":sum" , prc, {N, S}, "sum");
1010 * - zt = _f(Wz*[Xt + Ht-1] + Bz)
1011 * - rt = _f(Wr*[Xt + Ht-1] + Br)
1012 * - ht = _g(Wh*[Xt + (rt (.) Ht-1)] + Bh) # default, when linear_before_reset = 0
1013 * - ht = _g(Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr))) # when linear_before_reset != 0
1014 * - Ht = (1 - zt) (.) ht + zt (.) Ht-1
1016 _link(in_data, concat, 0);
1017 _link(in_h_state, concat, 1);
1018 _link(concat, fc_ur);
1019 _link_with_clip(fc_ur, act_ur, cell->clip);
1020 _link(act_ur, split); // split[0] - zt, split[1] - rt
1022 if (linear_before_reset) {
1023 auto lbr_B = wrap_as_tensor(orig_B, {4, S});
1025 auto whw_W = make_region_copy(o_W, {1, S, D}, {0, 0, 0});
1026 auto whr_W = make_region_copy(o_W, {1, S, S}, {0, 0, D});
1027 auto whw_B = make_region_copy(lbr_B, {1, S}, {2, 0});
1028 auto whr_B = make_region_copy(lbr_B, {1, S}, {3, 0});
1030 auto fc_whr = _fc(name + ":fc_whr", prc, {N, S}, whr_W, whr_B);
1031 auto fc_whw = _fc(name + ":fc_whw", prc, {N, S}, whw_W, whw_B);
1032 auto sum_h = _eltw(name + ":sum_h", prc, {N, S}, "sum");
1034 _link(in_h_state, fc_whr); // Whr*Ht-1 + Bhr
1035 _link(fc_whr, mul_r, 0); //
1036 _link(split, mul_r, 1, 1); // rt (.) (Whr*Ht-1 + Bhr)
1037 _link(in_data, fc_whw); // Whw*Xt + Bhw
1038 _link(fc_whw, sum_h, 0, 0); //
1039 _link(mul_r, sum_h, 0, 1); // Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr))
1040 _link_with_clip(sum_h, act_o, cell->clip); // _g(Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr)))
1042 auto fc_wh = _fc(name + ":fc_o", prc, {N, S}, o_W, o_B);
1043 auto concat_h = _concat(name + ":concat_h", prc, {N, D+S}, 2);
1045 _link(split, mul_r, 1, 0); //
1046 _link(in_h_state, mul_r, 1); // rt (.) Ht-1
1047 _link(in_data, concat_h, 0); //
1048 _link(mul_r, concat_h, 0, 1); // [Xt + (rt (.) Ht-1)]
1049 _link(concat_h, fc_wh); // Wh*[Xt + (rt (.) Ht-1)] + Bh
1050 _link_with_clip(fc_wh, act_o, cell->clip); // _g(Wh*[Xt + (rt (.) Ht-1)] + Bh)
1053 _link(split, pwr_m1, 0, 0); // 1 - zt
1054 _link(act_o, mul, 0, 0); //
1055 _link(pwr_m1, mul, 0, 1); // (1 - zt) (.) ht
1056 _link(split, mul_u, 0, 0); //
1057 _link(in_h_state, mul_u, 1); // zt (.) Ht-1
1058 _link(mul, sum, 0, 0); //
1059 _link(mul_u, sum, 0, 1); // (1 - zt) (.) ht + zt (.) Ht-1
1062 sum->outData[0] = out_h_state;
1063 out_h_state->getCreatorLayer() = sum;
1068 static bool unrollCell(CNNLayerPtr cur) {
1069 auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
1070 switch (cell->cellType) {
1071 case RNNCellBase::LSTM: return unrollLSTMCellBody(cur);
1072 case RNNCellBase::GRU: return unrollGRUCellBody(cur);
1073 case RNNCellBase::GRU_LBR: return unrollGRUCellBody(cur, true);
1074 case RNNCellBase::RNN: return unrollRNNCellBody(cur);
1079 static bool unrollSeq(CNNLayerPtr cur) {
1080 if (!one_of(cur->type, "LSTMSequence", "GRUSequence", "RNNSequence"))
1083 auto seq = std::dynamic_pointer_cast<RNNSequenceLayer>(cur);
1084 IE_ASSERT(seq) << "Cannot cast object with type ***Sequence to RNNSequenceLayer object";
1086 auto name = seq->name;
1088 auto in_data = seq->insData[0].lock();
1089 auto in_h_state = seq->insData[1].lock();
1090 auto out_data = seq->outData[0];
1092 auto in_d_dims = in_data->getTensorDesc().getDims();
1093 auto state_dims = in_h_state->getTensorDesc().getDims();
1094 auto out_d_dims = out_data->getTensorDesc().getDims();
1096 const int axis = seq->axis;
1097 const auto direct = seq->direction;
1098 const auto prc = seq->precision;
1100 /** Release links on Seq */
1101 for (auto &ins : seq->insData)
1102 ins.lock()->getInputTo().erase(seq->name);
1103 for (auto &outs : seq->outData)
1104 outs->getCreatorLayer().reset();
1107 auto in_d_body_dims = in_d_dims;
1108 in_d_body_dims[axis] = 1;
1110 auto in_d_body_squeeze_dims = in_d_dims;
1111 in_d_body_squeeze_dims.erase(in_d_body_squeeze_dims.begin() + axis);
1113 auto out_d_body_dims = out_d_dims;
1114 out_d_body_dims[axis] = 1;
1116 auto out_d_body_squeeze_dims = out_d_dims;
1117 out_d_body_squeeze_dims.erase(out_d_body_squeeze_dims.begin() + axis);
1119 auto body_in_data = DataPtr(new Data(name + ":data_in",
1120 TensorDesc { prc, in_d_body_dims, TensorDesc::getLayoutByDims(in_d_body_dims) }));
1122 auto resh1 = _resh(name + ":resh1", prc, in_d_body_squeeze_dims);
1123 auto cell = _cell(name + ":cell", prc, out_d_body_squeeze_dims, state_dims, seq->cellType);
1124 auto resh2 = _resh(name + ":resh2", prc, out_d_body_dims);
1126 _link(body_in_data, resh1);
1130 cell->_weights = seq->_weights;
1131 cell->_biases = seq->_biases;
1132 cell->blobs["weights"] = cell->_weights;
1133 cell->blobs["biases"] = cell->_biases;
1134 cell->hidden_size = seq->hidden_size;
1135 cell->clip = seq->clip;
1136 cell->activations = seq->activations;
1137 cell->activation_alpha = seq->activation_alpha;
1138 cell->activation_beta = seq->activation_beta;
1140 const size_t NS = cell->outData.size(); // num of state
1143 auto ti = _ti(name + ":ti", prc, NS);
1144 _link(in_data, ti, 0);
1146 ti->outData[0] = out_data;
1147 out_data->getCreatorLayer() = ti;
1149 ti->body.inputs.push_back(body_in_data);
1150 ti->body.outputs.push_back(resh2->outData[0]);
1152 int start = direct == RNNSequenceLayer::FWD ? 0 : -1;
1153 int end = direct == RNNSequenceLayer::FWD ? -1 : 0;
1154 int step = direct == RNNSequenceLayer::FWD ? 1 : -1;
1155 ti->input_port_map.push_back({0, 0, axis, step, start, end, 1});
1156 ti->output_port_map.push_back({0, 0, axis, step, start, end, 1});
1158 for (size_t i = 0; i < NS; i++) {
1159 auto in_state = seq->insData[1 + i].lock();
1160 _link(in_state, ti, 1 + i);
1162 auto out_state = seq->outData[1 + i];
1163 ti->outData[1 + i] = out_state;
1164 out_state->getCreatorLayer() = ti;
1166 auto body_in_state = DataPtr(new Data(name + ":state_in_" + std::to_string(i),
1167 TensorDesc { prc, state_dims, TensorDesc::getLayoutByDims(state_dims) }));
1169 _link(body_in_state, cell, 1 + i);
1171 ti->body.inputs.push_back(body_in_state);
1172 ti->body.outputs.push_back(cell->outData[i]);
1174 const int ii = 1 + static_cast<int>(i);
1175 ti->input_port_map.push_back({ii, ii, -1, 0, 0, 0, 0});
1176 ti->output_port_map.push_back({ii, ii, -1, 0, 0, 0, 0});
1177 ti->back_edges.push_back({ii, ii, -1, 0, 0, 0, 0});
1183 /************************************************************/
1184 /**** Converter API ***************************************/
1185 /************************************************************/
1187 template <typename N>
1188 std::vector<CNNLayerPtr> TopolSort(const N &net);
1191 std::vector<CNNLayerPtr> TopolSort(const ICNNNetwork &net) {
1192 return details::CNNNetSortTopologically(net);
1196 std::vector<CNNLayerPtr> TopolSort(const TensorIterator::Body &net) {
1197 return TIBodySortTopologically(net);
1201 template <typename N, typename T>
1202 bool ApplyForAll(N &net, T action) {
1203 auto all_layers = TopolSort(net);
1206 for (auto &layer : all_layers)
1207 sts &= action(layer, net);
1214 template <typename N, typename T, typename P>
1215 bool ApplyForAll_if(N &net, T action, P pred) {
1216 auto all_layers = TopolSort(net);
1219 for (auto &layer : all_layers)
1221 sts &= action(layer);
1226 bool CombineRNNSeq(ICNNNetwork &net) {
1227 return ApplyForAll(net, convertToRNNSeq<ICNNNetwork>);
1229 bool CombineRNNSeq(TensorIterator::Body &net) {
1230 return ApplyForAll(net, convertToRNNSeq<TensorIterator::Body>);
1233 bool UnrollTI(ICNNNetwork &net) {
1234 return ApplyForAll(net, unrollTI);
1238 template <typename NET>
1239 bool UnrollRNN_if_impl(NET &net, const std::function<bool(const RNNCellBase&)> pred) {
1240 // Filter layers by RNN specific type
1241 auto _seq_pred = [&] (CNNLayerPtr layer) {
1242 auto rnn = std::dynamic_pointer_cast<RNNSequenceLayer>(layer);
1243 if (!rnn) return false;
1244 return pred(*rnn.get());
1246 auto _cell_pred = [&] (CNNLayerPtr layer) {
1247 auto rnn = std::dynamic_pointer_cast<RNNCellBase>(layer);
1248 if (!rnn || !one_of(rnn->type, "LSTMCell", "GRUCell", "RNNCell")) return false;
1249 return pred(*rnn.get());
1253 res &= ApplyForAll_if(net, unrollSeq, _seq_pred);
1254 res &= ApplyForAll_if(net, unrollCell, _cell_pred);
1258 bool UnrollRNN_if(ICNNNetwork &net, const std::function<bool(const RNNCellBase&)> pred) {
1259 return UnrollRNN_if_impl(net, pred);
1262 bool UnrollRNN_if(TensorIterator::Body &net, const std::function<bool(const RNNCellBase&)> pred) {
1263 return UnrollRNN_if_impl(net, pred);
1267 } // namespace NetPass
1268 } // namespace InferenceEngine