inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_depthwise_sep_opt.cpp

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include "pass_manager.h"
  20 #include "program_helpers.h"
  21
  22
  23 template <typename T>
  24 void prepare_depthwise_sep_opt::optimize_depthwise_sep_pre(T& node)
  25 {
  26     if (node.get_groups() == 1) {
  27         //enable optimization only when IFM / split <= 8 (otherwise scheduling multiple opt kernels is better) and split >= 16
  28         if (!(node.get_dependency(0).get_output_layout().size.feature[0] / node.get_primitive()->split() <= 8) ||
  29             !(node.get_primitive()->split() >= 16))
  30             return;
  31
  32         //make sure the weights and biases are data type and
  33         //are not reused in other primitives as they will be overriden with concatenated ones
  34         for (size_t i = 1; i < node.get_dependencies().size(); i++)
  35         {
  36             auto& weights_or_biases = node.get_dependency(i);
  37             if (weights_or_biases.get_users().size() > 1 || weights_or_biases.type() != data::type_id())
  38                 return;
  39         }
  40     }
  41     else {
  42         //enable optimization only when IFM / groups <= 8 (otherwise scheduling multiple opt kernels is better) and groups >= 16
  43         if (!(node.get_dependency(0).get_output_layout().size.feature[0] / node.get_groups() <= 8) ||
  44             !(node.get_groups() >= 16))
  45             return;
  46     }
  47
  48     node.set_depthwise_sep_opt(true);
  49 }
  50
  51 template void prepare_depthwise_sep_opt::optimize_depthwise_sep_pre<convolution_node>(convolution_node& node);
  52 template void prepare_depthwise_sep_opt::optimize_depthwise_sep_pre<deconvolution_node>(deconvolution_node& node);
  53
  54 void prepare_depthwise_sep_opt::run(program_impl& p)
  55 {
  56     //depthiwise separated convolution/deconvolution optimization
  57     for (auto& prim : p.get_processing_order())
  58     {
  59         if (prim->type() == convolution::type_id())
  60         {
  61             optimize_depthwise_sep_pre(prim->as<convolution>());
  62         }
  63         else if (prim->type() == deconvolution::type_id())
  64         {
  65             optimize_depthwise_sep_pre(prim->as<deconvolution>());
  66         }
  67     }
  68 }
  69
  70