1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
6 #include "blob_factory.hpp"
8 #include "details/ie_cnn_network_tools.h"
9 #include "graph_tools.hpp"
17 #include <unordered_map>
18 #include <unordered_set>
20 namespace InferenceEngine {
23 template <typename T, typename P>
24 inline bool one_of(T val, P item) { return val == item; }
25 template <typename T, typename P, typename... Args>
26 inline bool one_of(T val, P item, Args... item_others) {
27 return val == item || one_of(val, item_others...);
30 /************************************************************/
31 /**** TI Utils ********************************************/
32 /************************************************************/
34 static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr> &heads) {
35 CNNLayerSet inputLayers;
36 std::unordered_set<CNNLayer*> allLayers;
38 // Define all start layers
39 for (const auto & data : heads) {
40 auto &secondLayers = data->getInputTo();
42 details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
43 if (layer->insData.empty()) {
44 inputLayers.insert(layer);
49 std::vector<DataPtr> res = heads;
50 // Add fake input data to point on not achievable
51 // layers from head (like const placeholders)
52 for (auto &starter : inputLayers) {
53 DataPtr holder(new Data(starter->name + ":input_holder", starter->precision));
54 holder->inputTo[starter->name] = starter;
55 res.push_back(holder);
61 static std::vector<CNNLayerPtr> SortTopologically(const TensorIterator::Body &body) {
62 std::vector<CNNLayerPtr> all_layers;
64 auto all_input_layers = getAllInputs(body.inputs);
65 CNNNetForestDFS(all_input_layers, [&](CNNLayerPtr current){
66 all_layers.push_back(current);
68 std::reverse(all_layers.begin(), all_layers.end());
72 static TensorIterator::Body CopyTIBody(ICNNNetwork &net, const TensorIterator::Body &body, std::string suffix = "") {
74 auto cp = [&](CNNLayerPtr lp) {
75 return injectData<NoneStruct>(lp);
78 const auto all_orig = SortTopologically(body);
79 auto num = all_orig.size();
81 std::unordered_map<CNNLayer*, CNNLayerPtr> old2new_l;
82 for (int i = 0; i < num; i++) {
83 auto &orig = all_orig[i];
84 old2new_l[orig.get()] = cp(orig);
87 std::unordered_map<Data*, DataPtr> old2new_d;
88 for (auto &in : body.inputs) {
89 auto new_data = std::make_shared<Data>(*in.get());
90 for (auto &to : new_data->getInputTo())
91 to.second = old2new_l[to.second.get()];
93 old2new_d[in.get()] = new_data;
96 for (const auto &old : all_orig) {
97 auto &new_one = old2new_l[old.get()];
99 for (int i = 0; i != old->outData.size(); i++) {
100 auto old_data = old->outData[i];
101 auto new_data = new_one->outData[i];
102 new_data->getCreatorLayer() = CNNLayerWeakPtr(new_one);
103 old2new_d[old_data.get()] = new_data;
105 for (auto &to : new_data->getInputTo())
106 to.second = old2new_l[to.second.get()];
109 for (int i = 0; i != old->insData.size(); i++) {
110 auto old_data = old->insData[i].lock();
111 auto new_data = old2new_d.at(old_data.get());
112 new_one->insData[i] = new_data;
117 if (!suffix.empty()) {
118 for (auto &kvp : old2new_l) {
119 auto layer = kvp.second;
120 auto old_name = layer->name;
121 layer->name += suffix;
122 for (auto &ins : layer->insData) {
123 ins.lock()->inputTo.erase(old_name);
124 ins.lock()->inputTo[layer->name] = layer;
127 // And also hold newly created layer in parent network.
128 // TI body may contain isolated constant placeholder layers
129 // which are not achievable from body inputs.
132 for (auto &kvp : old2new_d) kvp.second->name += suffix;
135 TensorIterator::Body res;
136 for (auto &in : body.inputs)
137 res.inputs.emplace_back(old2new_d[in.get()]);
139 for (auto &out : body.outputs)
140 res.outputs.emplace_back(old2new_d[out.get()]);
145 /************************************************************/
146 /**** TI rule helpers *************************************/
147 /************************************************************/
149 inline bool is_full_ranged(const TensorIterator::PortMap& rule, const DataPtr &data) {
151 THROW_IE_EXCEPTION << "Internal error. data == nullptr";
153 if (rule.axis == -1 || !one_of(rule.stride, 1, -1))
156 auto &shape = data->getDims();
157 int size = shape[rule.axis];
159 int begin = rule.start >= 0 ? rule.start : size + rule.start + 1;
160 int end = rule.end >= 0 ? rule.end : size + rule.end + 1;
162 return (rule.stride == 1)
163 ? begin == 0 && end == size
164 : begin == size && end == 0;
167 inline int get_num_iteration(const std::shared_ptr<TensorIterator> &ti) {
168 int iter_num = 1; // 1 means no iteration
170 for (auto & rule : ti->input_port_map) {
171 if (rule.axis == -1) continue;
173 auto data = ti->insData[rule.from].lock();
176 auto shape = data->getDims();
177 size_t size = shape[rule.axis];
178 size_t step = std::abs(rule.stride);
179 size_t cur_iter_size = size / step;
182 iter_num = cur_iter_size;
184 if (iter_num != cur_iter_size)
185 return -1; // TI is inconsistent
189 for (auto & rule : ti->output_port_map) {
190 if (rule.axis == -1) continue;
192 auto data = ti->outData[rule.from];
193 auto shape = data->getDims();
195 size_t size = shape[rule.axis];
196 size_t step = std::abs(rule.stride);
197 size_t cur_iter_size = size / step;
200 iter_num = cur_iter_size;
202 if (iter_num != cur_iter_size)
203 return -1; // TI is inconsistent
209 using RuleSet = std::vector<TensorIterator::PortMap>;
211 std::tuple<RuleSet, RuleSet, RuleSet> ClassifyInRules(const std::shared_ptr<TensorIterator> &ti) {
213 * first_class - which has iteration component
214 * second_class - which has no iteration and there are no backedge connection to the same port
215 * third_class - which has no iteration and has corresponding backedge
217 RuleSet first_class_rules, second_class_rules, third_class_rules;
219 std::set<int> ports_with_backedge;
220 for (const auto &back_edge : ti->back_edges) ports_with_backedge.insert(back_edge.to);
222 for (const auto &rule : ti->input_port_map) {
224 first_class_rules.push_back(rule);
226 else if (!ports_with_backedge.count(rule.to))
227 second_class_rules.push_back(rule);
230 third_class_rules.push_back(rule);
232 return std::tuple<RuleSet, RuleSet, RuleSet> {first_class_rules, second_class_rules, third_class_rules};
235 std::tuple<RuleSet, RuleSet, RuleSet> ClassifyOutRules(const std::shared_ptr<TensorIterator> &ti) {
237 * first_class - which has iteration component
238 * second_class - which has no iteration and there are no backedge connection to the same port
239 * third_class - which has no iteration and has corresponding backedge
241 RuleSet first_class_rules, second_class_rules, third_class_rules;
243 std::set<int> ports_with_backedge;
244 for (const auto &back_edge : ti->back_edges) ports_with_backedge.insert(back_edge.from);
246 for (const auto &rule : ti->output_port_map) {
248 first_class_rules.push_back(rule);
250 else if (!ports_with_backedge.count(rule.to))
251 second_class_rules.push_back(rule);
254 third_class_rules.push_back(rule);
256 return std::tuple<RuleSet, RuleSet, RuleSet> {first_class_rules, second_class_rules, third_class_rules};
260 * Merge slave connections into master
264 void CombineData(DataPtr &master, DataPtr &slave) {
265 for (auto &kvp : slave->inputTo) {
266 auto &slave_layer = kvp.second;
267 for (auto &slv_ins_wptr : slave_layer->insData) {
268 auto slv_ins = slv_ins_wptr.lock();
269 // Replace slave ptr with master
270 if (slv_ins == slave) slv_ins_wptr = master;
272 master->inputTo[slave_layer->name] = slave_layer;
276 /************************************************************/
277 /**** Converter Passes ************************************/
278 /************************************************************/
280 static RNNSequenceLayer::CellType cell_type_from_name(std::string &layer_type) {
281 RNNSequenceLayer::CellType res;
282 if (layer_type == "LSTMCell")
283 res = RNNSequenceLayer::LSTM;
284 else if (layer_type == "GRUCell")
285 res = RNNSequenceLayer::GRU;
286 else if (layer_type == "RNNCell")
287 res = RNNSequenceLayer::GRU;
289 THROW_IE_EXCEPTION << "Unknown Cell type (" << layer_type << "). Expected LSTMCell|GRUCell|RNNCell";
293 static std::string cell_name(RNNSequenceLayer::CellType type) {
295 if (type == RNNSequenceLayer::LSTM)
297 else if (type == RNNSequenceLayer::GRU)
299 else if (type == RNNSequenceLayer::GRU)
302 THROW_IE_EXCEPTION << "Unknown Cell type (enum index: " << type << "). Expected LSTM|GRU|RNN";
307 bool convertToRNNSeq(CNNLayerPtr cur, ICNNNetwork &net) {
308 if (cur->type != "TensorIterator") return true;
310 auto ti = std::dynamic_pointer_cast<TensorIterator>(cur);
311 IE_ASSERT(ti) << "Cannot cast object with type TensorIterator to TensorIterator object";
313 auto all_body_layers = SortTopologically(ti->body);
315 // Check if body is: squeeze -> lstm_cell -> unsqueeze
316 if (all_body_layers.size() != 3
317 || all_body_layers[0]->type != "Reshape"
318 || !one_of(all_body_layers[1]->type, "GRUCell", "RNNCell", "LSTMCell")
319 || all_body_layers[2]->type != "Reshape")
322 auto rsp1 = std::dynamic_pointer_cast<ReshapeLayer>(all_body_layers[0]);
323 auto cell = std::dynamic_pointer_cast<RNNCellBase>(all_body_layers[1]);
324 auto rsp2 = std::dynamic_pointer_cast<ReshapeLayer>(all_body_layers[2]);
326 auto cell_type = cell_type_from_name(all_body_layers[1]->type);
328 int NS = cell_type == RNNSequenceLayer::LSTM ? 2 : 1; // number of states
330 IE_ASSERT(cell->insData.size() == NS + 1); // {data, state1, [state2]}
331 IE_ASSERT(cell->outData.size() == NS); // {state1, [state2]}
333 if (cell->insData[0].lock()->creatorLayer.lock() != rsp1 ||
334 cell->outData[0]->inputTo.begin()->second != rsp2)
337 // Check port mapping
338 auto _indx_in = [&] (const std::vector<DataPtr> &scope, const DataPtr &data) {
339 int indx = std::find(scope.begin(), scope.end(), data) - scope.begin();
340 return indx == scope.size() ? -1 : indx;
343 int in_dt_idx = _indx_in(ti->body.inputs, rsp1->insData[0].lock());
344 int in_hs_idx = _indx_in(ti->body.inputs, cell->insData[1].lock());
345 int in_cs_idx = NS == 2 ? _indx_in(ti->body.inputs, cell->insData[2].lock()) : -1;
347 int out_dt_idx = _indx_in(ti->body.outputs, rsp2->outData[0]);
348 int out_hs_idx = _indx_in(ti->body.outputs, cell->outData[0]);
349 int out_cs_idx = NS == 2 ? _indx_in(ti->body.outputs, cell->outData[1]) : -1;
351 // indexes should be [0,1,2] : sum == 3 or [0,1,-1] : sum == 0
352 int sum = (NS - 1) * 3;
353 if (in_hs_idx + in_cs_idx + in_dt_idx != sum || out_hs_idx + out_cs_idx + out_dt_idx != sum)
356 std::map<int, TensorIterator::PortMap> i2map, o2map, be2map;
357 for (auto &m : ti->input_port_map) i2map[m.to] = m;
358 for (auto &m : ti->output_port_map) o2map[m.to] = m;
359 for (auto &m : ti->back_edges) be2map[m.to] = m;
361 if (!one_of(i2map.size(), NS + 1, 1) ||
362 !one_of(o2map.size(), NS + 1, 1) ||
363 !one_of(be2map.size(), 2))
366 auto in_iter_rule = i2map[in_dt_idx];
367 auto in_iter_data = ti->insData[in_iter_rule.from].lock();
369 auto out_iter_rule = o2map[out_dt_idx];
370 auto out_iter_data = ti->outData[out_iter_rule.from];
372 // TI iterates only for full range of tensor
373 if (!is_full_ranged(in_iter_rule, in_iter_data) ||
374 !is_full_ranged(out_iter_rule, out_iter_data))
377 // supported only same axis and strides for in/out data tensors
378 if (in_iter_rule.axis != out_iter_rule.axis ||
379 in_iter_rule.stride != out_iter_rule.stride)
382 // supported only firs and second dim for LSTM-Sequence
383 if (!one_of(in_iter_rule.axis, 0, 1))
386 bool no_init_state = i2map.size() == 1;
387 bool no_last_state = o2map.size() == 1;
389 if (!no_init_state && ( i2map[in_hs_idx].axis != -1 || (NS == 2 && i2map[in_cs_idx].axis != -1) ))
391 if (!no_last_state && ( o2map[out_hs_idx].axis != -1 || (NS == 2 && o2map[out_cs_idx].axis != -1) ))
394 std::vector<int> i_order {i2map[in_dt_idx].from };
396 i_order.push_back(i2map[in_hs_idx].from);
397 if (!no_init_state && NS == 2)
398 i_order.push_back(i2map[in_cs_idx].from);
400 std::vector<int> o_order {o2map[out_dt_idx].from};
402 o_order.push_back(o2map[out_hs_idx].from);
403 if (!no_last_state && NS == 2)
404 o_order.push_back(o2map[out_cs_idx].from);
406 // need swap an i/o ports if it is not in natural order
407 std::string name = cell->name + "_sequence";
408 auto rnn = std::make_shared<RNNSequenceLayer>(LayerParams{ name, cell_name(cell_type) + "Sequence", cell->precision});
409 rnn->cellType = cell_type;
410 rnn->axis = in_iter_rule.axis;
411 rnn->direction = in_iter_rule.stride == 1
412 ? RNNSequenceLayer::FWD
413 : RNNSequenceLayer::BWD;
415 // copy base RNN cell fields
416 rnn->_weights = cell->_weights;
417 rnn->_biases = cell->_biases;
418 rnn->blobs = cell->blobs;
419 rnn->activations = cell->activations;
420 rnn->activation_alpha = cell->activation_alpha;
421 rnn->activation_beta = cell->activation_beta;
422 rnn->hidden_size = cell->hidden_size;
423 rnn->clip = cell->clip;
425 for (int i : i_order) {
426 auto in_data = ti->insData[i].lock();
427 in_data->inputTo.erase(ti->name);
428 in_data->inputTo[rnn->name] = rnn;
429 rnn->insData.push_back(in_data);
431 for (int i : o_order) {
432 rnn->outData.push_back(ti->outData[i]);
433 rnn->outData.back()->creatorLayer = rnn;
439 bool unrollTI(CNNLayerPtr cur, ICNNNetwork &net) {
440 if (cur->type != "TensorIterator")
443 auto ti = std::dynamic_pointer_cast<TensorIterator>(cur);
444 IE_ASSERT(ti) << "Cannot cast object with type TensorIterator to TensorIterator object";
446 int num = get_num_iteration(ti); // -1 means inconsistent TI
447 if (num == -1) return false; // TODO: better to throw exception
449 const auto &body = ti->body;
451 std::vector<TensorIterator::Body> body_list(num);
452 for (int i = 0; i < num; i++) {
453 // copy with additional suffix to each object name
454 body_list[i] = CopyTIBody(net, body, ":" + std::to_string(i));
457 RuleSet first_class, second_class, third_class;
458 std::tie(first_class, second_class, third_class) = ClassifyInRules(ti);
460 /** Clean links on TI */
461 for (auto &ins : ti->insData)
462 ins.lock()->inputTo.erase(ti->name);
463 for (auto &outs : ti->outData)
464 outs->creatorLayer.reset();
466 /** FIRST class comes */
467 for (int i = 0; i < first_class.size(); i++) {
468 auto &rule = first_class[i];
469 auto in_data = ti->insData[rule.from].lock();
471 std::string name = ti->name + ":in_split_" + std::to_string(i);
472 auto split = std::make_shared<SplitLayer>(LayerParams{ name, "Split", cur->precision });
473 split->_axis = rule.axis;
474 split->outData.resize(num);
475 split->insData.emplace_back(in_data);
476 in_data->inputTo[split->name] = split;
478 for (int j = 0; j < num; j++) {
479 auto body_idx = rule.stride == 1 ? j : num - 1 - j;
480 auto &chunk = body_list[body_idx].inputs[rule.to];
481 chunk->creatorLayer = split;
482 split->outData[j] = chunk;
486 /** SECOND class come on */
487 for (const auto &rule : second_class) {
488 auto in_data = ti->insData[rule.from].lock();
490 for (int j = 0; j < num; j++) {
491 auto &chunk = body_list[j].inputs[rule.to];
492 CombineData(in_data, chunk);
496 /** BACK EDGES that's your time */
497 for (const auto &rule : ti->back_edges) {
498 for (int i = 1; i < num; i++) {
499 auto &from_data = body_list[i-1].outputs[rule.from];
500 auto &to_data = body_list[i].inputs[rule.to];
501 CombineData(from_data, to_data);
505 /** THIRD class end up */
506 for (const auto &rule : third_class) {
508 auto from_data = ti->insData[rule.from].lock();
509 auto &to_data = body_list[0].inputs[rule.to];;
510 CombineData(from_data, to_data);
513 /** And the same actions for outputs connections */
514 std::tie(first_class, second_class, third_class) = ClassifyOutRules(ti);
516 /** FIRST class comes */
517 for (int i = 0; i < first_class.size(); i++) {
518 auto &rule = first_class[i];
519 auto out_data = ti->outData[rule.from];
521 std::string name = ti->name + ":out_concat_" + std::to_string(i);
522 auto concat = std::make_shared<ConcatLayer>(LayerParams{ name, "Concat", cur->precision });
523 concat->_axis = rule.axis;
524 concat->insData.resize(num);
525 concat->outData.emplace_back(out_data);
526 out_data->creatorLayer = concat;
528 for (int j = 0; j < num; j++) {
529 auto body_idx = rule.stride == 1 ? j : num - 1 - j;
530 auto &chunk = body_list[body_idx].outputs[rule.to];
531 chunk->inputTo[concat->name] = concat;
532 concat->insData[j] = chunk;
536 /** SECOND class come on */
537 for (const auto &rule : second_class) {
538 auto out_data = ti->outData[rule.from];
540 for (int j = 0; j < num; j++) {
541 auto &chunk = body_list[j].outputs[rule.to];
542 CombineData(chunk, out_data);
546 /** THIRD class end up */
547 for (const auto &rule : third_class) {
549 auto &from_data = ti->outData[rule.from];
550 auto &to_data = body_list[num-1].outputs[rule.to];
552 auto parent = to_data->creatorLayer.lock();
553 std::replace(parent->outData.begin(), parent->outData.end(), to_data, from_data);
554 from_data->creatorLayer = parent;
556 CombineData(from_data, to_data);
561 /************************************************************/
562 /**** Builder helpers ************************************/
563 /************************************************************/
565 static CNNLayerPtr _concat(std::string name, Precision prc, SizeVector dims, int num) {
566 auto res = std::make_shared<ConcatLayer>(LayerParams{name, "Concat", prc});
569 res->insData.resize(num);
570 res->outData.resize(1);
572 auto out_data = DataPtr(new Data(name,
573 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
574 out_data->creatorLayer = res;
576 res->outData[0] = out_data;
580 static CNNLayerPtr _split(std::string name, Precision prc, SizeVector dims, int num) {
581 auto res = std::make_shared<SplitLayer>(LayerParams{name, "Split", prc});
583 res->params["axis"] = res->_axis;
585 res->insData.resize(1);
586 res->outData.resize(num);
588 for (int i = 0; i < num; i++) {
589 auto out_data = DataPtr(new Data(name + "_part_" + std::to_string(i),
590 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
591 out_data->creatorLayer = res;
593 res->outData[i] = out_data;
598 static CNNLayerPtr _fc(std::string name, Precision prc, SizeVector dims, Blob::Ptr &W, Blob::Ptr &B) {
599 auto res = std::make_shared<FullyConnectedLayer>(LayerParams{name, "FullyConnected", prc});
603 res->_out_num = dims[1];
604 res->blobs["weights"] = W;
605 res->blobs["biases"] = B;
606 res->params["out-size"] = std::to_string(dims[1]);
608 res->insData.resize(1);
609 res->outData.resize(1);
611 auto out_data = DataPtr(new Data(name,
612 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
613 out_data->creatorLayer = res;
615 res->outData[0] = out_data;
619 static CNNLayerPtr _act(std::string name, Precision prc, SizeVector dims, std::string type) {
620 auto res = std::make_shared<CNNLayer>(LayerParams{name, "Activation", prc});
622 res->params["type"] = type;
624 res->insData.resize(1);
625 res->outData.resize(1);
627 auto out_data = DataPtr(new Data(name,
628 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
629 out_data->creatorLayer = res;
631 res->outData[0] = out_data;
635 static CNNLayerPtr _pwr(std::string name, Precision prc, SizeVector dims, float scale, float shift) {
636 auto res = std::make_shared<PowerLayer>(LayerParams{name, "Power", prc});
641 res->params["power"] = res->power;
642 res->params["scale"] = res->scale;
643 res->params["shift"] = res->offset;
645 res->insData.resize(1);
646 res->outData.resize(1);
648 auto out_data = DataPtr(new Data(name,
649 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
650 out_data->creatorLayer = res;
652 res->outData[0] = out_data;
657 static CNNLayerPtr _eltw(std::string name, Precision prc, SizeVector dims, std::string type) {
658 auto res = std::make_shared<EltwiseLayer>(LayerParams{name, "Eltwise", prc});
660 res->params["operation"] = type;
661 res->_operation = type == "sum" ? EltwiseLayer::Sum : EltwiseLayer::Prod;
663 res->insData.resize(2);
664 res->outData.resize(1);
666 auto out_data = DataPtr(new Data(name,
667 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
668 out_data->creatorLayer = res;
670 res->outData[0] = out_data;
674 static std::shared_ptr<ReshapeLayer> _resh(std::string name, Precision prc, SizeVector dims) {
675 auto res = std::make_shared<ReshapeLayer>(LayerParams{name, "Reshape", prc});
677 res->insData.resize(1);
678 res->outData.resize(1);
680 auto out_data = DataPtr(new Data(name,
681 TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
682 out_data->creatorLayer = res;
684 res->outData[0] = out_data;
688 static std::shared_ptr<RNNCellBase> _cell(std::string name, Precision prc, SizeVector data_dims, SizeVector state_dims, RNNSequenceLayer::CellType type) {
689 std::shared_ptr<RNNCellBase> res;
692 case RNNSequenceLayer::LSTM:
693 res = std::make_shared<LSTMCell>(LayerParams{name, "LSTMCell", prc}); NS = 2;
695 case RNNSequenceLayer::GRU:
696 case RNNSequenceLayer::GRU_LBR:
697 res = std::make_shared<GRUCell>(LayerParams{name, "GRUCell", prc});
699 case RNNSequenceLayer::RNN:
700 res = std::make_shared<RNNCell>(LayerParams{name, "RNNCell", prc});
704 res->cellType = type;
705 res->insData.resize(1 + NS);
706 res->outData.resize(NS);
708 auto out_data = DataPtr(new Data(name + ":out_data",
709 TensorDesc { prc, data_dims, TensorDesc::getLayoutByDims(data_dims) }));
710 out_data->creatorLayer = res;
711 res->outData[0] = out_data;
713 for (size_t i = 0; i < NS; i++) {
714 auto out_state = DataPtr(new Data(name + ":out_state_" + std::to_string(i),
715 TensorDesc { prc, state_dims, TensorDesc::getLayoutByDims(state_dims) }));
716 out_state->creatorLayer = res;
717 res->outData[i] = out_state;
723 static std::shared_ptr<TensorIterator> _ti(std::string name, Precision prc, size_t NS) {
724 auto res = std::make_shared<TensorIterator>(LayerParams{name, "TensorIterator", prc});
726 res->insData.resize(1 + NS);
727 res->outData.resize(1 + NS);
732 static void _link(CNNLayerPtr src, CNNLayerPtr dst, size_t src_port = 0, size_t dst_port = 0) {
733 auto data = src->outData[src_port];
734 data->inputTo[dst->name] = dst;
735 dst->insData[dst_port] = data;
738 static void _link(DataPtr &data, CNNLayerPtr dst, size_t dst_port = 0) {
739 data->inputTo[dst->name] = dst;
740 dst->insData[dst_port] = data;
743 /** Link nodes with clipping data if required (clip_val != 0.0) */
744 static void _link_with_clip(CNNLayerPtr src, CNNLayerPtr dst, const float clip_val,
745 size_t src_port = 0, size_t dst_port = 0) {
746 if (clip_val == 0.0f) {
747 _link(src, dst, src_port, dst_port);
749 auto clip_name = dst->name + "_clip";
750 auto clip_prc = dst->precision;
751 auto clip_shape = src->outData[src_port]->getTensorDesc().getDims();
752 auto clip = _act(clip_name, clip_prc, clip_shape, "clamp");
753 clip->params["min"] = std::to_string(-clip_val);
754 clip->params["max"] = std::to_string(clip_val);
756 _link(src, clip, src_port, 0);
757 _link(clip, dst, 0, dst_port);
762 static Blob::Ptr make_partial_copy(Blob::Ptr src, size_t off, size_t size) {
763 auto res = make_plain_blob(src->precision(), {size});
766 size_t elem_size = src->precision().size();
767 auto src_ptr = src->buffer().as<uint8_t*>();
768 auto dst_ptr = res->buffer().as<uint8_t*>();
770 ie_memcpy(dst_ptr, res->byteSize(), src_ptr + off * elem_size, size * elem_size);
775 static Blob::Ptr wrap_as_tensor(Blob::Ptr src, SizeVector dims) {
776 auto res = make_blob_with_precision(
777 TensorDesc { src->precision(), dims, plain_layout(dims) },
779 IE_ASSERT(src->size() == res->size());
783 static Blob::Ptr make_region_copy(Blob::Ptr src, SizeVector region, SizeVector offset) {
784 IE_ASSERT(region.size() == offset.size());
785 IE_ASSERT(region.size() == src->dims().size());
787 auto res = make_plain_blob(src->precision(), region);
790 size_t elem_size = src->precision().size();
791 auto src_ptr = src->buffer().as<uint8_t*>();
792 auto dst_ptr = res->buffer().as<uint8_t*>();
794 auto &dd = src->getTensorDesc().getDims();
795 SizeVector src_dims {1, 1, 1};
796 std::copy(dd.begin(), dd.end(), src_dims.end() - dd.size());
798 SizeVector dims {1, 1, 1};
799 std::copy(region.begin(), region.end(), dims.end() - region.size());
801 SizeVector off {0, 0, 0};
802 std::copy(offset.begin(), offset.end(), off.end() - offset.size());
804 const auto D1 = dims[0];
805 const auto D2 = dims[1];
806 const auto D3 = dims[2];
807 const auto off1 = off[0];
808 const auto off2 = off[1];
809 const auto off3 = off[2];
810 const auto str1 = src_dims[1]*src_dims[2];
811 const auto str2 = src_dims[2];
813 for (size_t d1 = 0; d1 < D1; d1++)
814 for (size_t d2 = 0; d2 < D2; d2++) {
815 auto off_src = (off1 + d1)*str1 + (off2 + d2)*str2 + off3;
816 auto off_dst = d1*D2*D3 + d2*D3;
817 ie_memcpy(dst_ptr + off_dst * elem_size, res->byteSize(), src_ptr + off_src * elem_size, D3 * elem_size);
824 static bool unrollRNNCellBody(CNNLayerPtr cur) {
825 if (cur->type != "RNNCell")
828 auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
829 IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
831 auto name = cell->name;
833 auto in_data = cell->insData[0].lock();
834 auto in_h_state = cell->insData[1].lock();
835 auto out_h_state = cell->outData[0];
837 auto d_dims = in_data->getTensorDesc().getDims();
838 auto s_dims = in_h_state->getTensorDesc().getDims();
840 size_t N = d_dims[0];
841 size_t D = d_dims[1];
842 size_t S = s_dims[1];
844 auto prc = cell->precision;
846 /** Release links on TI */
847 for (auto &ins : cell->insData)
848 ins.lock()->inputTo.erase(cell->name);
849 for (auto &outs : cell->outData)
850 outs->creatorLayer.reset();
853 auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
854 auto fc = _fc(name + ":fc", prc, {N, S}, cell->_weights, cell->_biases);
855 auto act = _act(name + ":act", prc, {N, S}, cell->activations[0]);
858 _link(in_data, concat, 0);
859 _link(in_h_state, concat, 1);
861 _link_with_clip(fc, act, cell->clip);
864 act->outData[0] = out_h_state;
865 out_h_state->creatorLayer = act;
870 static bool unrollLSTMCellBody(CNNLayerPtr cur) {
871 if (cur->type != "LSTMCell")
874 auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
875 IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
877 auto name = cell->name;
879 auto in_data = cell->insData[0].lock();
880 auto in_h_state = cell->insData[1].lock();
881 auto in_c_state = cell->insData[2].lock();
882 auto out_h_state = cell->outData[0];
883 auto out_c_state = cell->outData[1];
885 auto d_dims = in_data->getTensorDesc().getDims();
886 auto s_dims = in_h_state->getTensorDesc().getDims();
888 size_t N = d_dims[0];
889 size_t D = d_dims[1];
890 size_t S = s_dims[1];
893 auto prc = cell->precision;
895 /** Release links on TI */
896 for (auto &ins : cell->insData)
897 ins.lock()->inputTo.erase(cell->name);
898 for (auto &outs : cell->outData)
899 outs->creatorLayer.reset();
902 auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
903 auto split = _split(name + ":split", prc, {N, S}, G);
904 auto fc = _fc(name + ":fc", prc, {N, S*G}, cell->_weights, cell->_biases);
906 const std::string _f = cell->activations[0], _g = cell->activations[1], _h = cell->activations[2];
908 auto act_f = _act(name + ":act_f", prc, {N, S}, _f);
909 auto act_i = _act(name + ":act_i", prc, {N, S}, _f);
910 auto act_c = _act(name + ":act_c", prc, {N, S}, _g);
911 auto act_o = _act(name + ":act_o", prc, {N, S}, _f);
912 auto act_x = _act(name + ":act_x", prc, {N, S}, _h);
914 auto mul_ic = _eltw(name + ":mul_ic", prc, {N, S}, "mul");
915 auto mul_f = _eltw(name + ":mul_f" , prc, {N, S}, "mul");
916 auto sum = _eltw(name + ":sum" , prc, {N, S}, "sum");
917 auto mul = _eltw(name + ":mul" , prc, {N, S}, "mul");
920 _link(in_data, concat, 0);
921 _link(in_h_state, concat, 1);
924 _link_with_clip(fc, split, cell->clip);
926 _link(split, act_f, 0, 0);
927 _link(split, act_i, 1, 0);
928 _link(split, act_c, 2, 0);
929 _link(split, act_o, 3, 0);
931 _link(act_i, mul_ic, 0, 0);
932 _link(act_c, mul_ic, 0, 1);
934 _link(act_f, mul_f, 0, 0);
935 _link(in_c_state, mul_f, 1);
937 _link(mul_f, sum, 0, 0);
938 _link(mul_ic, sum, 0, 1);
942 _link(act_x, mul, 0, 0);
943 _link(act_o, mul, 0, 1);
946 mul->outData[0] = out_h_state;
947 out_h_state->creatorLayer = mul;
949 CombineData(out_c_state, sum->outData[0]);
950 sum->outData[0] = out_c_state;
951 out_c_state->creatorLayer = sum;
956 static bool unrollGRUCellBody(CNNLayerPtr cur, bool linear_before_reset = false) {
957 if (cur->type != "GRUCell")
960 auto cell = std::dynamic_pointer_cast<GRUCell>(cur);
961 IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
963 auto name = cell->name;
965 auto in_data = cell->insData[0].lock();
966 auto in_h_state = cell->insData[1].lock();
967 auto out_h_state = cell->outData[0];
969 auto d_dims = in_data->getTensorDesc().getDims();
970 auto s_dims = in_h_state->getTensorDesc().getDims();
972 size_t N = d_dims[0];
973 size_t D = d_dims[1];
974 size_t S = s_dims[1];
976 // Split weights UR and O gates. Original gates are URO
977 size_t bG = linear_before_reset ? 4 : 3;
978 auto orig_W = wrap_as_tensor(cell->_weights, {3, S, D+S});
979 auto orig_B = wrap_as_tensor(cell->_biases, {bG, S});
981 auto ur_W = make_region_copy(orig_W, {2, S, D+S}, {0, 0, 0});
982 auto o_W = make_region_copy(orig_W, {1, S, D+S}, {2, 0, 0});
983 auto ur_B = make_region_copy(orig_B, {2, S}, {0, 0});
984 auto o_B = make_region_copy(orig_B, {1, S}, {2, 0});
986 auto prc = cell->precision;
988 /** Release links on TI */
989 for (auto &ins : cell->insData)
990 ins.lock()->inputTo.erase(cell->name);
991 for (auto &outs : cell->outData)
992 outs->creatorLayer.reset();
995 auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
996 auto split = _split(name + ":split", prc, {N, S}, 2);
997 auto fc_ur = _fc(name + ":fc_ur", prc, {N, S*2}, ur_W, ur_B);
999 const std::string _f = cell->activations[0], _g = cell->activations[1];
1001 auto act_ur = _act(name + ":act_ur", prc, {N, 2*S}, _f);
1002 auto act_o = _act(name + ":act_o", prc, {N, S}, _g);
1004 auto mul_u = _eltw(name + ":mul_u", prc, {N, S}, "mul");
1005 auto mul_r = _eltw(name + ":mul_r", prc, {N, S}, "mul");
1007 auto pwr_m1 = _pwr(name + ":pwr", prc, {N, S}, -1.0, 1.0);
1009 auto mul = _eltw(name + ":mul" , prc, {N, S}, "mul");
1010 auto sum = _eltw(name + ":sum" , prc, {N, S}, "sum");
1013 * - zt = _f(Wz*[Xt + Ht-1] + Bz)
1014 * - rt = _f(Wr*[Xt + Ht-1] + Br)
1015 * - ht = _g(Wh*[Xt + (rt (.) Ht-1)] + Bh) # default, when linear_before_reset = 0
1016 * - ht = _g(Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr))) # when linear_before_reset != 0
1017 * - Ht = (1 - zt) (.) ht + zt (.) Ht-1
1019 _link(in_data, concat, 0);
1020 _link(in_h_state, concat, 1);
1021 _link(concat, fc_ur);
1022 _link_with_clip(fc_ur, act_ur, cell->clip);
1023 _link(act_ur, split); // split[0] - zt, split[1] - rt
1025 if (linear_before_reset) {
1026 auto lbr_B = wrap_as_tensor(orig_B, {4, S});
1028 auto whw_W = make_region_copy(o_W, {1, S, D}, {0, 0, 0});
1029 auto whr_W = make_region_copy(o_W, {1, S, S}, {0, 0, D});
1030 auto whw_B = make_region_copy(lbr_B, {1, S}, {2, 0});
1031 auto whr_B = make_region_copy(lbr_B, {1, S}, {3, 0});
1033 auto fc_whr = _fc(name + ":fc_whr", prc, {N, S}, whr_W, whr_B);
1034 auto fc_whw = _fc(name + ":fc_whw", prc, {N, S}, whw_W, whw_B);
1035 auto sum_h = _eltw(name + ":sum_h", prc, {N, S}, "sum");
1037 _link(in_h_state, fc_whr); // Whr*Ht-1 + Bhr
1038 _link(fc_whr, mul_r, 0); //
1039 _link(split, mul_r, 1, 1); // rt (.) (Whr*Ht-1 + Bhr)
1040 _link(in_data, fc_whw); // Whw*Xt + Bhw
1041 _link(fc_whw, sum_h, 0, 0); //
1042 _link(mul_r, sum_h, 0, 1); // Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr))
1043 _link_with_clip(sum_h, act_o, cell->clip); // _g(Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr)))
1045 auto fc_wh = _fc(name + ":fc_o", prc, {N, S}, o_W, o_B);
1046 auto concat_h = _concat(name + ":concat_h", prc, {N, D+S}, 2);
1048 _link(split, mul_r, 1, 0); //
1049 _link(in_h_state, mul_r, 1); // rt (.) Ht-1
1050 _link(in_data, concat_h, 0); //
1051 _link(mul_r, concat_h, 0, 1); // [Xt + (rt (.) Ht-1)]
1052 _link(concat_h, fc_wh); // Wh*[Xt + (rt (.) Ht-1)] + Bh
1053 _link_with_clip(fc_wh, act_o, cell->clip); // _g(Wh*[Xt + (rt (.) Ht-1)] + Bh)
1056 _link(split, pwr_m1, 0, 0); // 1 - zt
1057 _link(act_o, mul, 0, 0); //
1058 _link(pwr_m1, mul, 0, 1); // (1 - zt) (.) ht
1059 _link(split, mul_u, 0, 0); //
1060 _link(in_h_state, mul_u, 1); // zt (.) Ht-1
1061 _link(mul, sum, 0, 0); //
1062 _link(mul_u, sum, 0, 1); // (1 - zt) (.) ht + zt (.) Ht-1
1065 sum->outData[0] = out_h_state;
1066 out_h_state->creatorLayer = sum;
1071 static bool unrollCell(CNNLayerPtr cur, ICNNNetwork &net) {
1072 auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
1073 switch (cell->cellType) {
1074 case RNNCellBase::LSTM: return unrollLSTMCellBody(cur);
1075 case RNNCellBase::GRU: return unrollGRUCellBody(cur);
1076 case RNNCellBase::GRU_LBR: return unrollGRUCellBody(cur, true);
1077 case RNNCellBase::RNN: return unrollRNNCellBody(cur);
1082 static bool unrollSeq(CNNLayerPtr cur, ICNNNetwork &net) {
1083 if (!one_of(cur->type, "LSTMSequence", "GRUSequence", "RNNSequence"))
1086 auto seq = std::dynamic_pointer_cast<RNNSequenceLayer>(cur);
1087 IE_ASSERT(seq) << "Cannot cast object with type ***Sequence to RNNSequenceLayer object";
1089 auto name = seq->name;
1091 auto in_data = seq->insData[0].lock();
1092 auto in_h_state = seq->insData[1].lock();
1093 auto out_data = seq->outData[0];
1095 auto in_d_dims = in_data->getTensorDesc().getDims();
1096 auto state_dims = in_h_state->getTensorDesc().getDims();
1097 auto out_d_dims = out_data->getTensorDesc().getDims();
1099 const int axis = seq->axis;
1100 const auto direct = seq->direction;
1101 const auto prc = seq->precision;
1103 /** Release links on Seq */
1104 for (auto &ins : seq->insData)
1105 ins.lock()->inputTo.erase(seq->name);
1106 for (auto &outs : seq->outData)
1107 outs->creatorLayer.reset();
1110 auto in_d_body_dims = in_d_dims;
1111 in_d_body_dims[axis] = 1;
1113 auto in_d_body_squeeze_dims = in_d_dims;
1114 in_d_body_squeeze_dims.erase(in_d_body_squeeze_dims.begin() + axis);
1116 auto out_d_body_dims = out_d_dims;
1117 out_d_body_dims[axis] = 1;
1119 auto out_d_body_squeeze_dims = out_d_dims;
1120 out_d_body_squeeze_dims.erase(out_d_body_squeeze_dims.begin() + axis);
1122 auto body_in_data = DataPtr(new Data(name + ":data_in",
1123 TensorDesc { prc, in_d_body_dims, TensorDesc::getLayoutByDims(in_d_body_dims) }));
1125 auto resh1 = _resh(name + ":resh1", prc, in_d_body_squeeze_dims);
1126 auto cell = _cell(name + ":cell", prc, out_d_body_squeeze_dims, state_dims, seq->cellType);
1127 auto resh2 = _resh(name + ":resh2", prc, out_d_body_dims);
1129 _link(body_in_data, resh1);
1133 cell->_weights = seq->_weights;
1134 cell->_biases = seq->_biases;
1135 cell->hidden_size = seq->hidden_size;
1136 cell->clip = seq->clip;
1137 cell->activations = seq->activations;
1138 cell->activation_alpha = seq->activation_alpha;
1139 cell->activation_beta = seq->activation_beta;
1141 const size_t NS = cell->outData.size(); // num of state
1144 auto ti = _ti(name + ":ti", prc, NS);
1145 _link(in_data, ti, 0);
1147 ti->outData[0] = out_data;
1148 out_data->creatorLayer = ti;
1150 ti->body.inputs.push_back(body_in_data);
1151 ti->body.outputs.push_back(resh2->outData[0]);
1153 int start = direct == RNNSequenceLayer::FWD ? 0 : -1;
1154 int end = direct == RNNSequenceLayer::FWD ? -1 : 0;
1155 int step = direct == RNNSequenceLayer::FWD ? 1 : -1;
1156 ti->input_port_map.push_back({0, 0, axis, step, start, end, 1});
1157 ti->output_port_map.push_back({0, 0, axis, step, start, end, 1});
1159 for (size_t i = 0; i < NS; i++) {
1160 auto in_state = seq->insData[1 + i].lock();
1161 _link(in_state, ti, 1 + i);
1163 auto out_state = seq->outData[1 + i];
1164 ti->outData[1 + i] = out_state;
1165 out_state->creatorLayer = ti;
1167 auto body_in_state = DataPtr(new Data(name + ":state_in_" + std::to_string(i),
1168 TensorDesc { prc, state_dims, TensorDesc::getLayoutByDims(state_dims) }));
1170 _link(body_in_state, cell, 1 + i);
1172 ti->body.inputs.push_back(body_in_state);
1173 ti->body.outputs.push_back(cell->outData[i]);
1175 const int ii = 1 + static_cast<int>(i);
1176 ti->input_port_map.push_back({ii, ii, -1, 0, 0, 0, 0});
1177 ti->output_port_map.push_back({ii, ii, -1, 0, 0, 0, 0});
1178 ti->back_edges.push_back({ii, ii, -1, 0, 0, 0, 0});
1186 /************************************************************/
1187 /**** Converter API ***************************************/
1188 /************************************************************/
1190 template <typename T>
1191 bool ApplyForAll(ICNNNetwork &net, T action) {
1192 auto all_layers = details::CNNNetSortTopologically(net);
1195 for (auto &layer : all_layers)
1196 sts &= action(layer, net);
1201 template <typename T, typename P>
1202 bool ApplyForAll_if(ICNNNetwork &net, T action, P pred) {
1203 auto all_layers = details::CNNNetSortTopologically(net);
1206 for (auto &layer : all_layers)
1208 sts &= action(layer, net);
1213 bool CombineRNNSeq(ICNNNetwork &net) {
1214 return ApplyForAll(net, convertToRNNSeq);
1217 bool UnrollTI(ICNNNetwork &net) {
1218 return ApplyForAll(net, unrollTI);
1221 bool UnrollRNN_if(ICNNNetwork &net, const std::function<bool(const RNNCellBase&)> pred) {
1222 // Filter layers by RNN specific type
1223 auto _seq_pred = [&] (CNNLayerPtr layer) {
1224 auto rnn = std::dynamic_pointer_cast<RNNSequenceLayer>(layer);
1225 if (!rnn) return false;
1226 return pred(*rnn.get());
1228 auto _cell_pred = [&] (CNNLayerPtr layer) {
1229 auto rnn = std::dynamic_pointer_cast<RNNCellBase>(layer);
1230 if (!rnn || !one_of(rnn->type, "LSTMCell", "GRUCell", "RNNCell")) return false;
1231 return pred(*rnn.get());
1235 res &= ApplyForAll_if(net, unrollSeq, _seq_pred);
1236 res &= ApplyForAll_if(net, unrollCell, _cell_pred);
1240 } // namespace NetPass
1241 } // namespace InferenceEngine