inference-engine/thirdparty/clDNN/src/graph_optimizer/prep_opt_depthwise_sep_post.cpp

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include "pass_manager.h"
  20 #include "program_helpers.h"
  21
  22
  23 template <typename T>
  24 void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre(program_impl& p, T& node)
  25 {
  26     if (!node.get_depthwise_sep_opt())
  27         return;
  28
  29     if (node.get_groups() > 1) {
  30         if (node.get_groups() >= 16) {
  31             node.set_groups(1);  // use one kernel
  32         }
  33         return; // no concatenations requered
  34     }
  35
  36     const auto& split = node.get_primitive()->split();
  37
  38     auto dependency_offset = node.get_primitive()->input.size();
  39     //concatenate weights
  40     {
  41         //if weights were optimized it is needed to use the sizes after optimization
  42         auto target_layout = program_helpers::get_weights_layout(node.get_dependency(dependency_offset), split);
  43         program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
  44         dependency_offset++;
  45     }
  46
  47     //concatenate biases
  48     if (node.get_primitive()->bias.size() != 0)
  49     {
  50         const auto& bias_layout = node.get_dependency(dependency_offset).get_output_layout();
  51         auto target_layout = layout(bias_layout.data_type, cldnn::format::bfyx, { 1, 1, bias_layout.size.spatial[0] * split, 1 });
  52         program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
  53         dependency_offset++;
  54     }
  55
  56     if (node.template is_type<convolution>())
  57     {
  58         auto& prim_node = node.template as<convolution>();
  59         const auto& prim = prim_node.get_primitive();
  60
  61         // concatenate weights quantization factors
  62         if (prim->weights_quantization_factors.size() != 0)
  63         {
  64             const auto& weights_quantization_layout = node.get_dependency(dependency_offset).get_output_layout();
  65             auto target_layout = layout(weights_quantization_layout.data_type, cldnn::format::bfyx, { 1, 1, weights_quantization_layout.size.batch[0] * split, 1 });
  66             program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
  67             dependency_offset++;
  68         }
  69         // concatenate output callibration factors
  70         if (prim->output_calibration_factors.size() != 0)
  71         {
  72             const auto& output_callibration_layout = node.get_dependency(dependency_offset).get_output_layout();
  73             auto target_layout = layout(output_callibration_layout.data_type, cldnn::format::bfyx, { 1, 1, output_callibration_layout.size.batch[0] * split, 1 });
  74             program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
  75             dependency_offset++;
  76         }
  77     }
  78
  79     if (node.get_primitive())
  80         //override node split, as only one kernel will be executed
  81         node.set_split(1);
  82 }
  83 template void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre<convolution_node>(program_impl& p, convolution_node& node);
  84 template void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre<deconvolution_node>(program_impl& p, deconvolution_node& node);
  85
  86 void prep_opt_depthwise_sep_post::run(program_impl& p)
  87 {
  88     //depthwise separated convolution/deconvolution optimization
  89     for (auto& prim : p.get_processing_order())
  90     {
  91         if (prim->type() == convolution::type_id())
  92         {
  93             optimize_depthwise_sep_pre(p, prim->as<convolution>());
  94         }
  95         else if (prim->type() == deconvolution::type_id())
  96         {
  97             optimize_depthwise_sep_pre(p, prim->as<deconvolution>());
  98         }
  99     }
 100 }