Remove obsoleted v0::Broadcast and BroadcastLike operators (#2779)
[platform/upstream/dldt.git] / ngraph / test / runtime / interpreter / int_executable.cpp
1 //*****************************************************************************
2 // Copyright 2017-2020 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //*****************************************************************************
16
17 #include "int_executable.hpp"
18 #include "backend_manager.hpp"
19 #include "ngraph/chrome_trace.hpp"
20 #include "ngraph/except.hpp"
21 #include "ngraph/op/util/op_types.hpp"
22 #include "ngraph/ops.hpp"
23 #include "ngraph/pass/manager.hpp"
24 #include "ngraph/util.hpp"
25 #include "pass/fused_op_decomposition.hpp"
26 #include "pass/liveness.hpp"
27 #include "pass/opset0_downgrade.hpp"
28 #include "pass/opset1_downgrade.hpp"
29
30 using namespace std;
31 using namespace ngraph;
32
33 NGRAPH_SUPPRESS_DEPRECATED_START
34
35 runtime::interpreter::OP_TYPEID runtime::interpreter::INTExecutable::get_typeid(const Node& node)
36 {
37     const NodeTypeInfo& type_info = node.get_type_info();
38     // This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
39     // {Abs::type_info, OP_TYPEID::Abs},
40     // {Acos::type_info, OP_TYPEID::Acos},
41     // ...
42     static const map<NodeTypeInfo, OP_TYPEID> type_info_map{
43 #define NGRAPH_OP(NAME, NAMESPACE) {NAMESPACE::NAME::type_info, OP_TYPEID::ID_SUFFIX(NAME)},
44 #include "opset_int_tbl.hpp"
45 #undef NGRAPH_OP
46     };
47     OP_TYPEID rc = OP_TYPEID::UnknownOp;
48
49     auto it = type_info_map.find(type_info);
50     if (it != type_info_map.end())
51     {
52         rc = it->second;
53     }
54     return rc;
55 }
56
57 runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr<Function>& function,
58                                                    bool enable_performance_collection)
59     : m_is_compiled{true}
60     , m_performance_counters_enabled{enable_performance_collection}
61 {
62     m_function = clone_function(*function);
63     auto is_supported = [](const Node& node) {
64         bool retval = false;
65         switch (INTExecutable::get_typeid(node))
66         {
67         case OP_TYPEID::Clamp:
68         case OP_TYPEID::MatMul:
69         case OP_TYPEID::NormalizeL2:
70         case OP_TYPEID::PRelu:
71         case OP_TYPEID::Squeeze:
72         case OP_TYPEID::Unsqueeze: retval = true; break;
73         default: break;
74         }
75         return retval;
76     };
77     pass::Manager pass_manager;
78     pass_manager.register_pass<pass::FusedOpDecomposition>(is_supported);
79     pass_manager.register_pass<pass::Opset1Downgrade>();
80     pass_manager.register_pass<pass::Opset0Downgrade>();
81     // Need to decompose any v0 fused ops, which were produced by the downgrade pass
82     pass_manager.register_pass<pass::FusedOpDecomposition>(is_supported);
83     pass_manager.run_passes(m_function);
84     for (auto node : m_function->get_ordered_ops())
85     {
86         m_nodes.push_back(node);
87     }
88     set_parameters_and_results(*m_function);
89 }
90
91 bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
92                                                const vector<shared_ptr<runtime::Tensor>>& inputs)
93 {
94     event::Duration d1("call", "Interpreter");
95
96     // convert inputs to HostTensor
97     vector<shared_ptr<HostTensor>> func_inputs;
98     for (auto tensor : inputs)
99     {
100         auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
101         func_inputs.push_back(host_tensor);
102     }
103     if (m_nan_check_enabled)
104     {
105         perform_nan_check(func_inputs);
106     }
107
108     // convert outputs to HostTensor
109     vector<shared_ptr<HostTensor>> func_outputs;
110     for (auto tensor : outputs)
111     {
112         auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
113         func_outputs.push_back(host_tensor);
114     }
115
116     // map function params -> HostTensor
117     unordered_map<descriptor::Tensor*, shared_ptr<HostTensor>> tensor_map;
118     size_t input_count = 0;
119     for (auto param : get_parameters())
120     {
121         for (size_t i = 0; i < param->get_output_size(); ++i)
122         {
123             descriptor::Tensor* tensor = &param->output(i).get_tensor();
124             tensor_map.insert({tensor, func_inputs[input_count++]});
125         }
126     }
127
128     // map function outputs -> HostTensor
129     for (size_t output_count = 0; output_count < get_results().size(); ++output_count)
130     {
131         auto output = get_results()[output_count];
132         if (!is_type<op::Result>(output))
133         {
134             throw ngraph_error("One of function's outputs isn't op::Result");
135         }
136         descriptor::Tensor* tensor = &output->get_output_tensor(0);
137         tensor_map.insert({tensor, func_outputs[output_count]});
138     }
139
140     // for each ordered op in the graph
141     for (auto op : m_nodes)
142     {
143         event::Duration d2(op->description(), "Interpreter");
144         if (op::is_parameter(op))
145         {
146             continue;
147         }
148
149         // get op inputs from map
150         vector<shared_ptr<HostTensor>> op_inputs;
151         for (auto input : op->inputs())
152         {
153             descriptor::Tensor* tensor = &input.get_tensor();
154             op_inputs.push_back(tensor_map.at(tensor));
155         }
156
157         // get op outputs from map or create
158         vector<shared_ptr<HostTensor>> op_outputs;
159         for (size_t i = 0; i < op->get_output_size(); ++i)
160         {
161             descriptor::Tensor* tensor = &op->output(i).get_tensor();
162             shared_ptr<HostTensor> host_tensor;
163             auto it = tensor_map.find(tensor);
164             if (it == tensor_map.end())
165             {
166                 host_tensor = make_shared<HostTensor>(op->output(i));
167                 tensor_map.insert({tensor, host_tensor});
168             }
169             else
170             {
171                 host_tensor = it->second;
172             }
173             op_outputs.push_back(host_tensor);
174         }
175
176         // get op type
177         element::Type type;
178         if (is_type<op::Convert>(op) || is_type<op::Quantize>(op) || is_type<op::PriorBox>(op))
179         {
180             type = op->get_input_element_type(0);
181         }
182         else if (is_type<op::Equal>(op) || is_type<op::Greater>(op) || is_type<op::GreaterEq>(op) ||
183                  is_type<op::Less>(op) || is_type<op::LessEq>(op) || is_type<op::NotEqual>(op))
184         {
185             // Get the type of the second input, not the first
186             // All BinaryElementwiseComparision ops have the same type for inputs
187             // Select has bool for first input and the type we are interested in for the second
188             type = op->get_input_element_type(1);
189         }
190         else if (is_type<op::TopK>(op))
191         {
192             type = op->get_output_element_type(1);
193         }
194         else
195         {
196             type = op->get_output_element_type(0);
197         }
198
199         if (m_performance_counters_enabled)
200         {
201             m_timer_map[op].start();
202         }
203         if (!op->evaluate(op_outputs, op_inputs))
204         {
205             generate_calls(type, *op.get(), op_outputs, op_inputs);
206         }
207         if (m_performance_counters_enabled)
208         {
209             m_timer_map[op].stop();
210         }
211         if (m_nan_check_enabled)
212         {
213             perform_nan_check(op_outputs, op.get());
214         }
215     }
216
217     return true;
218 }
219
220 void runtime::interpreter::INTExecutable::generate_calls(const element::Type& type,
221                                                          const Node& op,
222                                                          const vector<shared_ptr<HostTensor>>& out,
223                                                          const vector<shared_ptr<HostTensor>>& in)
224 {
225     stringstream ss;
226     switch (type)
227     {
228     case element::Type_t::boolean: op_engine<char>(op, out, in); break;
229     case element::Type_t::f32: op_engine<float>(op, out, in); break;
230     case element::Type_t::f64: op_engine<double>(op, out, in); break;
231     case element::Type_t::i8: op_engine<int8_t>(op, out, in); break;
232     case element::Type_t::i16: op_engine<int16_t>(op, out, in); break;
233     case element::Type_t::i32: op_engine<int32_t>(op, out, in); break;
234     case element::Type_t::i64: op_engine<int64_t>(op, out, in); break;
235     case element::Type_t::u8: op_engine<uint8_t>(op, out, in); break;
236     case element::Type_t::u16: op_engine<uint16_t>(op, out, in); break;
237     case element::Type_t::u32: op_engine<uint32_t>(op, out, in); break;
238     case element::Type_t::u64: op_engine<uint64_t>(op, out, in); break;
239     case element::Type_t::undefined:
240     case element::Type_t::dynamic:
241     case element::Type_t::u1:
242     case element::Type_t::bf16:
243     case element::Type_t::f16:
244         ss << "unsupported element type " << type << " op " << op.get_name();
245         throw ngraph_error(ss.str());
246     }
247 }
248
249 void runtime::interpreter::INTExecutable::set_nan_check(bool enable)
250 {
251     m_nan_check_enabled = enable;
252 }
253
254 vector<runtime::PerformanceCounter>
255     runtime::interpreter::INTExecutable::get_performance_data() const
256 {
257     vector<runtime::PerformanceCounter> rc;
258     for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
259     {
260         rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
261     }
262     return rc;
263 }
264
265 void runtime::interpreter::INTExecutable::perform_nan_check(
266     const vector<shared_ptr<HostTensor>>& tensors, const Node* op)
267 {
268     size_t arg_number = 1;
269     for (const shared_ptr<HostTensor>& tensor : tensors)
270     {
271         const element::Type& type = tensor->get_element_type();
272         if (type == element::f32)
273         {
274             const float* data = tensor->get_data_ptr<float>();
275             for (size_t i = 0; i < tensor->get_element_count(); i++)
276             {
277                 if (std::isnan(data[i]))
278                 {
279                     if (op)
280                     {
281                         throw runtime_error("nan found in op '" + op->get_name() + "' output");
282                     }
283                     else
284                     {
285                         throw runtime_error("nan found in function's input tensor number " +
286                                             to_string(arg_number));
287                     }
288                 }
289             }
290         }
291         else if (type == element::f64)
292         {
293             const double* data = tensor->get_data_ptr<double>();
294             for (size_t i = 0; i < tensor->get_element_count(); i++)
295             {
296                 if (std::isnan(data[i]))
297                 {
298                     if (op)
299                     {
300                         throw runtime_error("nan found in op '" + op->get_name() + "' output");
301                     }
302                     else
303                     {
304                         throw runtime_error("nan found in function's input tensor number " +
305                                             to_string(arg_number));
306                     }
307                 }
308             }
309         }
310         arg_number++;
311     }
312 }
313
314 shared_ptr<ngraph::op::Parameter>
315     runtime::interpreter::INTExecutable::get_parameter(size_t index) const
316 {
317     const ParameterVector& parameters = get_parameters();
318     NGRAPH_CHECK(index < parameters.size(), "create_tensor for input out of bounds");
319     return parameters[index];
320 }
321
322 shared_ptr<ngraph::op::Result> runtime::interpreter::INTExecutable::get_result(size_t index) const
323 {
324     const ResultVector& results = get_results();
325     NGRAPH_CHECK(index < results.size(), "create_tensor for input out of bounds");
326     return results[index];
327 }
328 shared_ptr<runtime::Tensor>
329     runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index)
330 {
331     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
332     return make_shared<runtime::HostTensor>(parameter->get_element_type(), parameter->get_shape());
333 }
334
335 shared_ptr<runtime::Tensor>
336     runtime::interpreter::INTExecutable::create_output_tensor(size_t output_index)
337 {
338     shared_ptr<op::Result> result = get_result(output_index);
339     return make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
340 }
341
342 vector<shared_ptr<runtime::Tensor>>
343     runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index,
344                                                              size_t pipeline_depth)
345 {
346     vector<shared_ptr<runtime::HostTensor>> tensors;
347     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
348     for (size_t i = 0; i < pipeline_depth; i++)
349     {
350         shared_ptr<runtime::HostTensor> tensor;
351         auto t =
352             make_shared<runtime::HostTensor>(parameter->get_element_type(), parameter->get_shape());
353         tensor = static_pointer_cast<runtime::HostTensor>(t);
354         tensors.push_back(tensor);
355     }
356     vector<shared_ptr<runtime::Tensor>> result_tensors;
357     for (const shared_ptr<runtime::HostTensor>& tensor : tensors)
358     {
359         result_tensors.push_back(tensor);
360     }
361     return result_tensors;
362 }
363
364 vector<shared_ptr<runtime::Tensor>>
365     runtime::interpreter::INTExecutable::create_output_tensor(size_t output_index,
366                                                               size_t pipeline_depth)
367 {
368     vector<shared_ptr<runtime::HostTensor>> tensors;
369     shared_ptr<op::Result> result = get_result(output_index);
370     for (size_t i = 0; i < pipeline_depth; i++)
371     {
372         shared_ptr<runtime::HostTensor> tensor;
373         auto t = make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
374         tensor = static_pointer_cast<runtime::HostTensor>(t);
375         tensors.push_back(tensor);
376     }
377     vector<shared_ptr<runtime::Tensor>> result_tensors;
378     for (const shared_ptr<runtime::HostTensor>& tensor : tensors)
379     {
380         result_tensors.push_back(tensor);
381     }
382     return result_tensors;
383 }