2 // Copyright (c) 2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
19 #include "pass_manager.h"
20 #include "program_helpers.h"
24 void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre(program_impl& p, T& node)
26 if (!node.get_depthwise_sep_opt())
29 if (node.get_groups() > 1) {
30 if (node.get_groups() >= 16) {
31 node.set_groups(1); // use one kernel
33 return; // no concatenations requered
36 const auto& split = node.get_primitive()->split();
38 auto dependency_offset = node.get_primitive()->input.size();
41 //if weights were optimized it is needed to use the sizes after optimization
42 auto target_layout = program_helpers::get_weights_layout(node.get_dependency(dependency_offset), split);
43 program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
48 if (node.get_primitive()->bias.size() != 0)
50 const auto& bias_layout = node.get_dependency(dependency_offset).get_output_layout();
51 auto target_layout = layout(bias_layout.data_type, cldnn::format::bfyx, { 1, 1, bias_layout.size.spatial[0] * split, 1 });
52 program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
56 if (node.template is_type<convolution>())
58 auto& prim_node = node.template as<convolution>();
59 const auto& prim = prim_node.get_primitive();
61 // concatenate weights quantization factors
62 if (prim->weights_quantization_factors.size() != 0)
64 const auto& weights_quantization_layout = node.get_dependency(dependency_offset).get_output_layout();
65 auto target_layout = layout(weights_quantization_layout.data_type, cldnn::format::bfyx, { 1, 1, weights_quantization_layout.size.batch[0] * split, 1 });
66 program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
69 // concatenate output callibration factors
70 if (prim->output_calibration_factors.size() != 0)
72 const auto& output_callibration_layout = node.get_dependency(dependency_offset).get_output_layout();
73 auto target_layout = layout(output_callibration_layout.data_type, cldnn::format::bfyx, { 1, 1, output_callibration_layout.size.batch[0] * split, 1 });
74 program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
79 if (node.get_primitive())
80 //override node split, as only one kernel will be executed
83 template void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre<convolution_node>(program_impl& p, convolution_node& node);
84 template void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre<deconvolution_node>(program_impl& p, deconvolution_node& node);
86 void prep_opt_depthwise_sep_post::run(program_impl& p)
88 //depthwise separated convolution/deconvolution optimization
89 for (auto& prim : p.get_processing_order())
91 if (prim->type() == convolution::type_id())
93 optimize_depthwise_sep_pre(p, prim->as<convolution>());
95 else if (prim->type() == deconvolution::type_id())
97 optimize_depthwise_sep_pre(p, prim->as<deconvolution>());