Move downgrade passes to pass folder (#1675)
[platform/upstream/dldt.git] / ngraph / test / runtime / dynamic / dynamic_backend.cpp
1 //*****************************************************************************
2 // Copyright 2017-2020 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //*****************************************************************************
16
17 #include "dynamic_backend.hpp"
18 #include "ngraph/graph_util.hpp"
19 #include "ngraph/op/avg_pool.hpp"
20 #include "ngraph/op/broadcast.hpp"
21 #include "ngraph/op/convolution.hpp"
22 #include "ngraph/op/range.hpp"
23 #include "ngraph/op/reshape.hpp"
24 #include "ngraph/op/transpose.hpp"
25 #include "ngraph/pass/constant_folding.hpp"
26 #include "ngraph/pass/manager.hpp"
27 #include "ngraph/specialize_function.hpp"
28 #include "ngraph/util.hpp"
29 #include "pass/dyn_elimination.hpp"
30 #include "pass/opset0_downgrade.hpp"
31 #include "pass/opset1_downgrade.hpp"
32 #include "pass/shape_relevance.hpp"
33
34 using namespace std;
35 using namespace ngraph;
36
37 runtime::dynamic::DynamicBackend::DynamicBackend(shared_ptr<runtime::Backend> wrapped_backend)
38     : m_wrapped_backend(std::move(wrapped_backend))
39 {
40 }
41
42 shared_ptr<runtime::Tensor> runtime::dynamic::DynamicBackend::create_tensor()
43 {
44     return m_wrapped_backend->create_tensor();
45 }
46
47 shared_ptr<runtime::Tensor>
48     runtime::dynamic::DynamicBackend::create_tensor(const element::Type& type, const Shape& shape)
49 {
50     return m_wrapped_backend->create_tensor(type, shape);
51 }
52
53 shared_ptr<runtime::Tensor> runtime::dynamic::DynamicBackend::create_tensor(
54     const element::Type& type, const Shape& shape, void* memory_pointer)
55 {
56     return m_wrapped_backend->create_tensor(type, shape, memory_pointer);
57 }
58
59 std::shared_ptr<runtime::Tensor>
60     runtime::dynamic::DynamicBackend::create_dynamic_tensor(const element::Type& type,
61                                                             const PartialShape& shape)
62 {
63     return make_shared<DynamicTensor>(type, shape, m_wrapped_backend);
64 }
65
66 shared_ptr<runtime::Executable>
67     runtime::dynamic::DynamicBackend::compile(shared_ptr<Function> function,
68                                               bool enable_performance_collection)
69 {
70     return make_shared<runtime::dynamic::DynamicExecutable>(
71         function, m_wrapped_backend, enable_performance_collection);
72 }
73
74 runtime::dynamic::DynamicExecutable::DynamicExecutable(shared_ptr<Function> wrapped_function,
75                                                        shared_ptr<runtime::Backend> wrapped_backend,
76                                                        bool enable_performance_collection)
77     : m_wrapped_function(wrapped_function)
78     , m_wrapped_backend(wrapped_backend)
79     , m_enable_performance_collection(enable_performance_collection)
80 {
81     pass::Manager passes;
82     passes.register_pass<pass::ShapeRelevance>();
83     passes.run_passes(m_wrapped_function);
84
85     set_parameters_and_results(*wrapped_function);
86 }
87
88 // Due to clang++-3.9 bugs, this needs to be a non-static separate function from
89 // count_dyn_nodes.
90 bool is_dynamic_op(const std::shared_ptr<Node>& op)
91 {
92     return is_type<op::Transpose>(op) || is_type<op::v1::Reshape>(op) || is_type<op::Range>(op) ||
93            is_type<op::v1::ConvolutionBackpropData>(op) || is_type<op::v3::Broadcast>(op);
94 }
95
96 // Helper for a vile hack in DynamicExecutable::call. See body of that function for details.
97 static size_t count_dyn_nodes(const shared_ptr<ngraph::Function>& f)
98 {
99     size_t count = 0;
100     for (auto op : f->get_ops())
101     {
102         if (is_dynamic_op(op))
103         {
104             count++;
105         }
106     }
107     return count;
108 }
109
110 bool runtime::dynamic::DynamicExecutable::call(
111     const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
112     const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
113 {
114     // TODO: Get cached executable out if it exists.
115     // We will cache on:
116     // (1) all shapes;
117     // (2) all values of shape-relevant input tensors.
118
119     std::vector<int> merged_input_shapes;
120     std::ostringstream key;
121     size_t loop_count = 0;
122     for (auto& input : inputs)
123     {
124         if (m_wrapped_function->get_parameters()[loop_count]->is_relevant_to_shapes())
125         {
126             // Caching on values of Shape relevant inputs
127             int size = input->get_size_in_bytes() / (input->get_element_type().bitwidth() / 8);
128             std::vector<int64_t> data(size);
129             input->read(data.data(), input->get_size_in_bytes());
130             for (int i = 0; i < input->get_element_count(); i++)
131             {
132                 merged_input_shapes.emplace_back(data[i]);
133             }
134         }
135         else
136         {
137             // Caching on all remaining shapes
138             for (int i = 0; i < input->get_shape().size(); i++)
139             {
140                 merged_input_shapes.emplace_back(input->get_shape()[i]);
141             }
142         }
143         // -1 is the separator.
144         // So if shape of Input 1 = {2, 2, 3, 3} & Input 2 = {4, 5}
145         // the key would be 2, 2, 3, 3, -1, 4, 5, -1
146         merged_input_shapes.emplace_back(-1);
147         loop_count++;
148     }
149
150     std::copy(merged_input_shapes.begin(),
151               merged_input_shapes.end(),
152               std::ostream_iterator<int>(key, ", "));
153
154     if (m_lru->is_cached(merged_input_shapes))
155     {
156         std::vector<std::shared_ptr<runtime::Tensor>> wrapped_inputs;
157         std::vector<std::shared_ptr<runtime::Tensor>> wrapped_outputs;
158
159         std::shared_ptr<Function> clone = m_lru->get_cloned_function(merged_input_shapes);
160         const ResultVector& results = clone->get_results();
161         for (auto& result : results)
162         {
163             NGRAPH_CHECK(result->get_output_partial_shape(0).is_static(),
164                          "Shape staticization failed for result node ",
165                          *result);
166         }
167         NGRAPH_CHECK(results.size() == outputs.size());
168
169         for (size_t i = 0; i < outputs.size(); i++)
170         {
171             if (auto dynamic_tensor =
172                     std::dynamic_pointer_cast<runtime::dynamic::DynamicTensor>(outputs[i]))
173             {
174                 dynamic_tensor->make_storage(results[i]->get_output_element_type(0),
175                                              results[i]->get_output_shape(0));
176                 wrapped_outputs.push_back(dynamic_tensor->get_wrapped_tensor());
177             }
178             else
179             {
180                 wrapped_outputs.push_back(outputs[i]);
181             }
182         }
183
184         return m_lru->get_cached_entry(merged_input_shapes)->call(wrapped_outputs, inputs);
185     }
186     else
187     {
188         NGRAPH_CHECK(m_wrapped_function->get_parameters().size() == inputs.size());
189
190         std::vector<std::shared_ptr<runtime::Tensor>> wrapped_inputs;
191         std::vector<element::Type> arg_element_types;
192         std::vector<PartialShape> arg_shapes;
193
194         std::shared_ptr<Function> clone;
195         {
196             // We'll use AlignedBuffers to back the base pointers, storing them in this vector for
197             // RAII
198             // purposes.
199             std::vector<AlignedBuffer> arg_buffers;
200             arg_buffers.reserve(inputs.size());
201             std::vector<void*> arg_value_base_pointers(inputs.size());
202
203             size_t i = 0;
204
205             for (auto& input : inputs)
206             {
207                 if (m_wrapped_function->get_parameters()[i]->is_relevant_to_shapes())
208                 {
209                     // TODO(amprocte): Move has_storage() to runtime::Tensor?
210                     if (auto dynamic_tensor =
211                             std::dynamic_pointer_cast<runtime::dynamic::DynamicTensor>(input))
212                     {
213                         NGRAPH_CHECK(dynamic_tensor->has_storage());
214                     }
215
216                     arg_buffers.emplace_back(input->get_size_in_bytes(), /*alignment=*/64);
217                     arg_value_base_pointers[i] = arg_buffers.back().get_ptr();
218
219                     // TODO(amprocte): For host-resident tensors we should be able to skip the read,
220                     // but no API for that yet.
221                     input->read(arg_value_base_pointers[i], input->get_size_in_bytes());
222                 }
223                 else
224                 {
225                     arg_value_base_pointers[i] = nullptr;
226                 }
227
228                 if (auto dynamic_tensor =
229                         std::dynamic_pointer_cast<runtime::dynamic::DynamicTensor>(input))
230                 {
231                     NGRAPH_CHECK(dynamic_tensor->has_storage());
232                     arg_element_types.push_back(
233                         dynamic_tensor->get_wrapped_tensor()->get_element_type());
234                     arg_shapes.push_back(dynamic_tensor->get_wrapped_tensor()->get_shape());
235                     wrapped_inputs.push_back(dynamic_tensor->get_wrapped_tensor());
236                 }
237                 else
238                 {
239                     arg_element_types.push_back(input->get_element_type());
240                     arg_shapes.push_back(input->get_shape());
241                     wrapped_inputs.push_back(input);
242                 }
243
244                 i++;
245             }
246
247             clone = specialize_function(
248                 m_wrapped_function, arg_element_types, arg_shapes, arg_value_base_pointers);
249         }
250
251         pass::Manager passes;
252         // Opset1Downgrade should be moved below DynElimination
253         // when ConstantFolding for v3 ops will be ready
254         passes.register_pass<pass::Opset1Downgrade>();
255         passes.register_pass<pass::ConstantFolding>();
256         passes.register_pass<pass::DynElimination>();
257         passes.register_pass<pass::Opset0Downgrade>(); // Converts dynamic v1 variants to v0 ops
258         passes.set_per_pass_validation(false);
259
260         // FIXME(amprocte): Vile, temporary hack: we need to do repeated rounds of
261         // ConstantFolding/DynElimination until everything that DynElimination is supposed to
262         // eliminate has actually been eliminated. We could do this by monitoring the return values
263         // of the passes (keep iterating until both CF and DE report no changes), but that did not
264         // seem to work so here we are. Probably a better fix is to somehow combine the matchers in
265         // CF
266         // and DE into one pass.
267         size_t num_dyn_nodes_last_pass = std::numeric_limits<size_t>::max();
268
269         while (num_dyn_nodes_last_pass != 0)
270         {
271             passes.run_passes(clone);
272             auto num_dyn_nodes_this_pass = count_dyn_nodes(clone);
273
274             NGRAPH_CHECK(num_dyn_nodes_this_pass < num_dyn_nodes_last_pass,
275                          "Could not eliminate all Dyn nodes (",
276                          num_dyn_nodes_this_pass,
277                          " remaining)");
278
279             num_dyn_nodes_last_pass = num_dyn_nodes_this_pass;
280         }
281
282         pass::Manager pass_val;
283         pass_val.register_pass<pass::Validate>();
284         pass_val.run_passes(clone);
285
286         std::vector<std::shared_ptr<runtime::Tensor>> wrapped_outputs;
287
288         const ResultVector& results = clone->get_results();
289         for (auto& result : results)
290         {
291             NGRAPH_CHECK(result->get_output_partial_shape(0).is_static(),
292                          "Shape staticization failed for result node ",
293                          *result);
294         }
295         NGRAPH_CHECK(results.size() == outputs.size());
296
297         for (size_t i = 0; i < outputs.size(); i++)
298         {
299             if (auto dynamic_tensor =
300                     std::dynamic_pointer_cast<runtime::dynamic::DynamicTensor>(outputs[i]))
301             {
302                 dynamic_tensor->make_storage(results[i]->get_output_element_type(0),
303                                              results[i]->get_output_shape(0));
304                 wrapped_outputs.push_back(dynamic_tensor->get_wrapped_tensor());
305             }
306             else
307             {
308                 wrapped_outputs.push_back(outputs[i]);
309             }
310         }
311
312         auto compiled_executable =
313             m_wrapped_backend->compile(clone, m_enable_performance_collection);
314         // Put compiled executable in the cache.
315         m_lru->add_entry(merged_input_shapes, compiled_executable, clone);
316         auto result = compiled_executable->call(wrapped_outputs, wrapped_inputs);
317
318         return result;
319     }
320 }
321
322 runtime::dynamic::DynamicTensor::DynamicTensor(
323     const element::Type& element_type,
324     const PartialShape& shape,
325     const std::shared_ptr<runtime::Backend>& wrapped_backend)
326     : Tensor(make_shared<descriptor::Tensor>(element_type, shape, "wrapped_dynamic"))
327     , m_wrapped_tensor(nullptr)
328     , m_wrapped_backend(wrapped_backend)
329 {
330 }
331
332 Strides runtime::dynamic::DynamicTensor::get_strides() const
333 {
334     NGRAPH_CHECK(m_wrapped_tensor != nullptr,
335                  "asked for strides of a dynamic tensor with no allocated storage");
336     return ngraph::row_major_strides(m_wrapped_tensor->get_shape());
337 }
338
339 size_t runtime::dynamic::DynamicTensor::get_size_in_bytes() const
340 {
341     NGRAPH_CHECK(m_wrapped_tensor != nullptr,
342                  "asked for size in bytes of a dynamic tensor with no allocated storage");
343     return get_element_count() * get_element_type().size();
344 }
345
346 size_t runtime::dynamic::DynamicTensor::get_element_count() const
347 {
348     NGRAPH_CHECK(m_wrapped_tensor != nullptr,
349                  "asked for element count of a dynamic tensor with no allocated storage");
350     return shape_size(m_wrapped_tensor->get_shape());
351 }
352
353 const element::Type& runtime::dynamic::DynamicTensor::get_element_type() const
354 {
355     if (m_wrapped_tensor == nullptr)
356     {
357         return m_descriptor->get_element_type();
358     }
359     else
360     {
361         return m_wrapped_tensor->get_element_type();
362     }
363 }
364
365 const ngraph::Shape& runtime::dynamic::DynamicTensor::get_shape() const
366 {
367     NGRAPH_CHECK(m_wrapped_tensor != nullptr,
368                  "asked for shape of a dynamic tensor with no allocated storage");
369     return m_wrapped_tensor->get_shape();
370 }
371
372 void runtime::dynamic::DynamicTensor::write(const void* p, size_t n)
373 {
374     NGRAPH_CHECK(m_wrapped_tensor != nullptr,
375                  "tried to write to a dynamic tensor with no allocated storage");
376     m_wrapped_tensor->write(p, n);
377 }
378
379 void runtime::dynamic::DynamicTensor::read(void* p, size_t n) const
380 {
381     NGRAPH_CHECK(m_wrapped_tensor != nullptr,
382                  "tried to read from a dynamic tensor with no allocated storage");
383     m_wrapped_tensor->read(p, n);
384 }
385
386 bool runtime::dynamic::DynamicTensor::has_storage() const
387 {
388     return m_wrapped_tensor != nullptr;
389 }
390
391 void runtime::dynamic::DynamicTensor::release_storage()
392 {
393     m_wrapped_tensor = nullptr;
394 }
395
396 void runtime::dynamic::DynamicTensor::make_storage(const element::Type& element_type,
397                                                    const Shape& shape)
398 {
399     NGRAPH_CHECK(element_type.is_static(), "make_storage requires a static element type");
400     NGRAPH_CHECK(get_element_type().is_dynamic() || get_element_type() == element_type,
401                  "tried to make storage with element type ",
402                  element_type,
403                  " which is incompatible with dynamic tensor element_type ",
404                  get_element_type());
405     NGRAPH_CHECK(get_partial_shape().relaxes(shape),
406                  "tried to make storage with shape ",
407                  shape,
408                  " which is incompatible with dynamic tensor shape ",
409                  get_partial_shape());
410     m_wrapped_tensor = m_wrapped_backend->create_tensor(element_type, shape);
411 }
412
413 const std::shared_ptr<ngraph::runtime::Tensor>&
414     runtime::dynamic::DynamicTensor::get_wrapped_tensor() const
415 {
416     return m_wrapped_tensor;
417 }