Move downgrade passes to pass folder (#1675)
[platform/upstream/dldt.git] / ngraph / test / runtime / interpreter / int_executable.cpp
1 //*****************************************************************************
2 // Copyright 2017-2020 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //*****************************************************************************
16
17 #include "int_executable.hpp"
18 #include "backend_manager.hpp"
19 #include "ngraph/chrome_trace.hpp"
20 #include "ngraph/cpio.hpp"
21 #include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
22 #include "ngraph/except.hpp"
23 #include "ngraph/op/util/op_types.hpp"
24 #include "ngraph/ops.hpp"
25 #include "ngraph/pass/manager.hpp"
26 #include "ngraph/util.hpp"
27 #include "pass/fused_op_decomposition.hpp"
28 #include "pass/like_replacement.hpp"
29 #include "pass/liveness.hpp"
30 #include "pass/opset0_downgrade.hpp"
31 #include "pass/opset1_downgrade.hpp"
32
33 using namespace std;
34 using namespace ngraph;
35
36 NGRAPH_SUPPRESS_DEPRECATED_START
37
38 using descriptor::layout::DenseTensorLayout;
39
40 runtime::interpreter::OP_TYPEID runtime::interpreter::INTExecutable::get_typeid(const Node& node)
41 {
42     const NodeTypeInfo& type_info = node.get_type_info();
43     // This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
44     // {Abs::type_info, OP_TYPEID::Abs},
45     // {Acos::type_info, OP_TYPEID::Acos},
46     // ...
47     static const map<NodeTypeInfo, OP_TYPEID> type_info_map{
48 #define NGRAPH_OP(NAME, NAMESPACE) {NAMESPACE::NAME::type_info, OP_TYPEID::ID_SUFFIX(NAME)},
49 #include "opset_int_tbl.hpp"
50 #undef NGRAPH_OP
51     };
52     OP_TYPEID rc = OP_TYPEID::UnknownOp;
53
54     auto it = type_info_map.find(type_info);
55     if (it != type_info_map.end())
56     {
57         rc = it->second;
58     }
59     return rc;
60 }
61
62 runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr<Function>& function,
63                                                    bool enable_performance_collection)
64     : m_is_compiled{true}
65     , m_performance_counters_enabled{enable_performance_collection}
66 {
67     m_function = clone_function(*function);
68     auto is_supported = [](const Node& node) {
69         bool retval = false;
70         switch (INTExecutable::get_typeid(node))
71         {
72         case OP_TYPEID::Clamp:
73         case OP_TYPEID::MatMul:
74         case OP_TYPEID::Squeeze:
75         case OP_TYPEID::PRelu:
76         case OP_TYPEID::Unsqueeze: retval = true; break;
77         default: break;
78         }
79         return retval;
80     };
81     pass::Manager pass_manager;
82     pass_manager.register_pass<pass::LikeReplacement>();
83     pass_manager.register_pass<pass::FusedOpDecomposition>(is_supported);
84     pass_manager.register_pass<pass::Opset1Downgrade>();
85     pass_manager.register_pass<pass::Opset0Downgrade>();
86     // Need to decompose any v0 fused ops, which were produced by the downgrade pass
87     pass_manager.register_pass<pass::FusedOpDecomposition>(is_supported);
88     pass_manager.run_passes(m_function);
89     for (auto node : m_function->get_ordered_ops())
90     {
91         m_nodes.push_back(node);
92     }
93     set_parameters_and_results(*m_function);
94 }
95
96 bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
97                                                const vector<shared_ptr<runtime::Tensor>>& inputs)
98 {
99     event::Duration d1("call", "Interpreter");
100
101     // convert inputs to HostTensor
102     vector<shared_ptr<HostTensor>> func_inputs;
103     for (auto tensor : inputs)
104     {
105         auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
106         func_inputs.push_back(host_tensor);
107     }
108     if (m_nan_check_enabled)
109     {
110         perform_nan_check(func_inputs);
111     }
112
113     // convert outputs to HostTensor
114     vector<shared_ptr<HostTensor>> func_outputs;
115     for (auto tensor : outputs)
116     {
117         auto host_tensor = static_pointer_cast<runtime::HostTensor>(tensor);
118         func_outputs.push_back(host_tensor);
119     }
120
121     // map function params -> HostTensor
122     unordered_map<descriptor::Tensor*, shared_ptr<HostTensor>> tensor_map;
123     size_t input_count = 0;
124     for (auto param : get_parameters())
125     {
126         for (size_t i = 0; i < param->get_output_size(); ++i)
127         {
128             descriptor::Tensor* tensor = &param->output(i).get_tensor();
129             tensor_map.insert({tensor, func_inputs[input_count++]});
130         }
131     }
132
133     // map function outputs -> HostTensor
134     for (size_t output_count = 0; output_count < get_results().size(); ++output_count)
135     {
136         auto output = get_results()[output_count];
137         if (!is_type<op::Result>(output))
138         {
139             throw ngraph_error("One of function's outputs isn't op::Result");
140         }
141         descriptor::Tensor* tensor = &output->get_output_tensor(0);
142         tensor_map.insert({tensor, func_outputs[output_count]});
143     }
144
145     // for each ordered op in the graph
146     for (auto op : m_nodes)
147     {
148         event::Duration d2(op->description(), "Interpreter");
149         if (op::is_parameter(op))
150         {
151             continue;
152         }
153
154         // get op inputs from map
155         vector<shared_ptr<HostTensor>> op_inputs;
156         for (auto input : op->inputs())
157         {
158             descriptor::Tensor* tensor = &input.get_tensor();
159             op_inputs.push_back(tensor_map.at(tensor));
160         }
161
162         // get op outputs from map or create
163         vector<shared_ptr<HostTensor>> op_outputs;
164         for (size_t i = 0; i < op->get_output_size(); ++i)
165         {
166             descriptor::Tensor* tensor = &op->output(i).get_tensor();
167             shared_ptr<HostTensor> host_tensor;
168             auto it = tensor_map.find(tensor);
169             if (it == tensor_map.end())
170             {
171                 host_tensor = make_shared<HostTensor>(op->output(i));
172                 tensor_map.insert({tensor, host_tensor});
173             }
174             else
175             {
176                 host_tensor = it->second;
177             }
178             op_outputs.push_back(host_tensor);
179         }
180
181         // get op type
182         element::Type type;
183         if (is_type<op::Convert>(op) || is_type<op::Quantize>(op) || is_type<op::Dequantize>(op))
184         {
185             type = op->get_input_element_type(0);
186         }
187         else if (is_type<op::Equal>(op) || is_type<op::Greater>(op) || is_type<op::GreaterEq>(op) ||
188                  is_type<op::Less>(op) || is_type<op::LessEq>(op) || is_type<op::NotEqual>(op))
189         {
190             // Get the type of the second input, not the first
191             // All BinaryElementwiseComparision ops have the same type for inputs
192             // Select has bool for first input and the type we are interested in for the second
193             type = op->get_input_element_type(1);
194         }
195         else if (is_type<op::TopK>(op))
196         {
197             type = op->get_output_element_type(1);
198         }
199         else
200         {
201             type = op->get_output_element_type(0);
202         }
203
204         if (m_performance_counters_enabled)
205         {
206             m_timer_map[op].start();
207         }
208         if (!op->evaluate(op_outputs, op_inputs))
209         {
210             generate_calls(type, *op.get(), op_outputs, op_inputs);
211         }
212         if (m_performance_counters_enabled)
213         {
214             m_timer_map[op].stop();
215         }
216         if (m_nan_check_enabled)
217         {
218             perform_nan_check(op_outputs, op.get());
219         }
220     }
221
222     return true;
223 }
224
225 void runtime::interpreter::INTExecutable::generate_calls(const element::Type& type,
226                                                          const Node& op,
227                                                          const vector<shared_ptr<HostTensor>>& out,
228                                                          const vector<shared_ptr<HostTensor>>& in)
229 {
230     stringstream ss;
231     switch (type)
232     {
233     case element::Type_t::boolean: op_engine<char>(op, out, in); break;
234     case element::Type_t::f32: op_engine<float>(op, out, in); break;
235     case element::Type_t::f64: op_engine<double>(op, out, in); break;
236     case element::Type_t::i8: op_engine<int8_t>(op, out, in); break;
237     case element::Type_t::i16: op_engine<int16_t>(op, out, in); break;
238     case element::Type_t::i32: op_engine<int32_t>(op, out, in); break;
239     case element::Type_t::i64: op_engine<int64_t>(op, out, in); break;
240     case element::Type_t::u8: op_engine<uint8_t>(op, out, in); break;
241     case element::Type_t::u16: op_engine<uint16_t>(op, out, in); break;
242     case element::Type_t::u32: op_engine<uint32_t>(op, out, in); break;
243     case element::Type_t::u64: op_engine<uint64_t>(op, out, in); break;
244     case element::Type_t::undefined:
245     case element::Type_t::dynamic:
246     case element::Type_t::u1:
247     case element::Type_t::bf16:
248     case element::Type_t::f16:
249         ss << "unsupported element type " << type << " op " << op.get_name();
250         throw ngraph_error(ss.str());
251     }
252 }
253
254 void runtime::interpreter::INTExecutable::set_nan_check(bool enable)
255 {
256     m_nan_check_enabled = enable;
257 }
258
259 vector<runtime::PerformanceCounter>
260     runtime::interpreter::INTExecutable::get_performance_data() const
261 {
262     vector<runtime::PerformanceCounter> rc;
263     for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
264     {
265         rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
266     }
267     return rc;
268 }
269
270 void runtime::interpreter::INTExecutable::perform_nan_check(
271     const vector<shared_ptr<HostTensor>>& tensors, const Node* op)
272 {
273     size_t arg_number = 1;
274     for (const shared_ptr<HostTensor>& tensor : tensors)
275     {
276         const element::Type& type = tensor->get_element_type();
277         if (type == element::f32)
278         {
279             const float* data = tensor->get_data_ptr<float>();
280             for (size_t i = 0; i < tensor->get_element_count(); i++)
281             {
282                 if (std::isnan(data[i]))
283                 {
284                     if (op)
285                     {
286                         throw runtime_error("nan found in op '" + op->get_name() + "' output");
287                     }
288                     else
289                     {
290                         throw runtime_error("nan found in function's input tensor number " +
291                                             to_string(arg_number));
292                     }
293                 }
294             }
295         }
296         else if (type == element::f64)
297         {
298             const double* data = tensor->get_data_ptr<double>();
299             for (size_t i = 0; i < tensor->get_element_count(); i++)
300             {
301                 if (std::isnan(data[i]))
302                 {
303                     if (op)
304                     {
305                         throw runtime_error("nan found in op '" + op->get_name() + "' output");
306                     }
307                     else
308                     {
309                         throw runtime_error("nan found in function's input tensor number " +
310                                             to_string(arg_number));
311                     }
312                 }
313             }
314         }
315         arg_number++;
316     }
317 }
318
319 shared_ptr<ngraph::op::Parameter>
320     runtime::interpreter::INTExecutable::get_parameter(size_t index) const
321 {
322     const ParameterVector& parameters = get_parameters();
323     NGRAPH_CHECK(index < parameters.size(), "create_tensor for input out of bounds");
324     return parameters[index];
325 }
326
327 shared_ptr<ngraph::op::Result> runtime::interpreter::INTExecutable::get_result(size_t index) const
328 {
329     const ResultVector& results = get_results();
330     NGRAPH_CHECK(index < results.size(), "create_tensor for input out of bounds");
331     return results[index];
332 }
333 shared_ptr<runtime::Tensor>
334     runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index)
335 {
336     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
337     return make_shared<runtime::HostTensor>(parameter->get_element_type(), parameter->get_shape());
338 }
339
340 shared_ptr<runtime::Tensor>
341     runtime::interpreter::INTExecutable::create_output_tensor(size_t output_index)
342 {
343     shared_ptr<op::Result> result = get_result(output_index);
344     return make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
345 }
346
347 vector<shared_ptr<runtime::Tensor>>
348     runtime::interpreter::INTExecutable::create_input_tensor(size_t input_index,
349                                                              size_t pipeline_depth)
350 {
351     vector<shared_ptr<runtime::HostTensor>> tensors;
352     shared_ptr<op::Parameter> parameter = get_parameter(input_index);
353     for (size_t i = 0; i < pipeline_depth; i++)
354     {
355         shared_ptr<runtime::HostTensor> tensor;
356         auto t =
357             make_shared<runtime::HostTensor>(parameter->get_element_type(), parameter->get_shape());
358         tensor = static_pointer_cast<runtime::HostTensor>(t);
359         tensors.push_back(tensor);
360     }
361     vector<shared_ptr<runtime::Tensor>> result_tensors;
362     for (const shared_ptr<runtime::HostTensor>& tensor : tensors)
363     {
364         result_tensors.push_back(tensor);
365     }
366     return result_tensors;
367 }
368
369 vector<shared_ptr<runtime::Tensor>>
370     runtime::interpreter::INTExecutable::create_output_tensor(size_t output_index,
371                                                               size_t pipeline_depth)
372 {
373     vector<shared_ptr<runtime::HostTensor>> tensors;
374     shared_ptr<op::Result> result = get_result(output_index);
375     for (size_t i = 0; i < pipeline_depth; i++)
376     {
377         shared_ptr<runtime::HostTensor> tensor;
378         auto t = make_shared<runtime::HostTensor>(result->get_element_type(), result->get_shape());
379         tensor = static_pointer_cast<runtime::HostTensor>(t);
380         tensors.push_back(tensor);
381     }
382     vector<shared_ptr<runtime::Tensor>> result_tensors;
383     for (const shared_ptr<runtime::HostTensor>& tensor : tensors)
384     {
385         result_tensors.push_back(tensor);
386     }
387     return result_tensors;
388 }