Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / graph_optimizer / prep_opt_depthwise_sep_post.cpp
1 /*
2 // Copyright (c) 2018 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18
19 #include "pass_manager.h"
20 #include "program_helpers.h"
21
22
23 template <typename T>
24 void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre(program_impl& p, T& node)
25 {
26     if (!node.get_depthwise_sep_opt())
27         return;
28
29     if (node.get_groups() > 1) {
30         if (node.get_groups() >= 16) {
31             node.set_groups(1);  // use one kernel
32         }
33         return; // no concatenations requered
34     }
35
36     const auto& split = node.get_primitive()->split();
37
38     auto dependency_offset = node.get_primitive()->input.size();
39     //concatenate weights
40     {
41         //if weights were optimized it is needed to use the sizes after optimization
42         auto target_layout = program_helpers::get_weights_layout(node.get_dependency(dependency_offset), split);
43         program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
44         dependency_offset++;
45     }
46
47     //concatenate biases
48     if (node.get_primitive()->bias.size() != 0)
49     {
50         const auto& bias_layout = node.get_dependency(dependency_offset).get_output_layout();
51         auto target_layout = layout(bias_layout.data_type, cldnn::format::bfyx, { 1, 1, bias_layout.size.spatial[0] * split, 1 });
52         program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
53         dependency_offset++;
54     }
55
56     if (node.template is_type<convolution>())
57     {
58         auto& prim_node = node.template as<convolution>();
59         const auto& prim = prim_node.get_primitive();
60
61         // concatenate weights quantization factors
62         if (prim->weights_quantization_factors.size() != 0)
63         {
64             const auto& weights_quantization_layout = node.get_dependency(dependency_offset).get_output_layout();
65             auto target_layout = layout(weights_quantization_layout.data_type, cldnn::format::bfyx, { 1, 1, weights_quantization_layout.size.batch[0] * split, 1 });
66             program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
67             dependency_offset++;
68         }
69         // concatenate output callibration factors
70         if (prim->output_calibration_factors.size() != 0)
71         {
72             const auto& output_callibration_layout = node.get_dependency(dependency_offset).get_output_layout();
73             auto target_layout = layout(output_callibration_layout.data_type, cldnn::format::bfyx, { 1, 1, output_callibration_layout.size.batch[0] * split, 1 });
74             program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
75             dependency_offset++;
76         }
77     }
78
79     if (node.get_primitive())
80         //override node split, as only one kernel will be executed
81         node.set_split(1);
82 }
83 template void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre<convolution_node>(program_impl& p, convolution_node& node);
84 template void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre<deconvolution_node>(program_impl& p, deconvolution_node& node);
85
86 void prep_opt_depthwise_sep_post::run(program_impl& p)
87 {
88     //depthwise separated convolution/deconvolution optimization
89     for (auto& prim : p.get_processing_order())
90     {
91         if (prim->type() == convolution::type_id())
92         {
93             optimize_depthwise_sep_pre(p, prim->as<convolution>());
94         }
95         else if (prim->type() == deconvolution::type_id())
96         {
97             optimize_depthwise_sep_pre(p, prim->as<deconvolution>());
98         }
99     }
100 }