inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19
  20 #include "api/CPP/proposal.hpp"
  21 #include "api/CPP/roi_pooling.hpp"
  22 #include "api/CPP/reorg_yolo.hpp"
  23 #include "api/CPP/eltwise.hpp"
  24 #include "upsampling_inst.h"
  25 #include "pass_manager.h"
  26 #include "program_node.h"
  27 #include "layout_optimizer.h"
  28 #include "program_impl.h"
  29 #include "program_helpers.h"
  30
  31 using namespace cldnn;
  32
  33 //ToDo remove friendship relation from program_impl
  34
  35 reorder_inputs::reorder_inputs(layout_optimizer& lo_ref) : base_pass("reorder_inputs"), _lo(lo_ref) {}
  36
  37 void reorder_inputs::run(program_impl& p) {
  38     run(p, _lo);
  39 }
  40
  41 void reorder_inputs::run(program_impl& p, layout_optimizer& lo)
  42 {
  43     //first pass to set layout optimization_attributes for topology
  44     for (auto& node : p.get_processing_order())
  45     {
  46         auto& prim = *node;
  47         if (prim.type() == cldnn::convolution::type_id())
  48         {
  49             if (prim.as<convolution>().get_primitive()->split() > 1)
  50                 lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::splitted_convolution, 1);
  51         }
  52
  53         //list of layers that do not support yxfb or perform worse than bfyx
  54         if (prim.type() == cldnn::detection_output::type_id() || prim.type() == cldnn::proposal::type_id() ||
  55             prim.type() == cldnn::roi_pooling::type_id() || prim.type() == cldnn::deconvolution::type_id() ||
  56             prim.type() == cldnn::upsampling::type_id() || prim.type() == cldnn::reorg_yolo::type_id())
  57             lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bfyx_only_layer, 1);
  58     }
  59
  60     const auto reorder_input = [&p, &lo](typed_program_node<convolution>& conv_node)
  61     {
  62         auto conv_prim = conv_node.get_primitive();
  63         auto& input_node = conv_node.get_dependency(0);
  64         auto&& weights_layout = conv_node.weights(0).get_output_layout();
  65         auto&& input_layout = input_node.get_output_layout();
  66
  67         std::shared_ptr<reorder> new_input = nullptr;
  68
  69         if (input_node.type() == reorder::type_id()) //convolution's input is a reorder
  70         {
  71             auto reorder_prim = input_node.as<reorder>().typed_desc();
  72             auto& reorder_input = input_node.get_dependency(0);
  73             auto reorder_layout = input_node.get_output_layout();
  74             reorder_layout.data_type = *reorder_prim->output_data_type;
  75             new_input = lo.get_reorder(
  76                 reorder_layout,
  77                 reorder_prim->id,
  78                 layout_optimizer::data_type::input,
  79                 conv_node,
  80                 weights_layout).first;
  81
  82             auto reorder_removed = false;
  83             if (new_input && new_input->output_format != format::winograd_2x3_s1_data && new_input->output_format != format::bf8_xy16 && new_input->output_format != format::byxf) //output format is not optimal
  84             {
  85                 auto reorder_input_layout = reorder_input.get_output_layout();
  86
  87                 auto opt_layout = layout(*new_input->output_data_type, new_input->output_format, reorder_input_layout.size);
  88                 if (reorder_input_layout == opt_layout) //reorder 'breaks' optimal format
  89                 {
  90                     if (reorder_prim->subtract_per_feature.empty() &&
  91                         reorder_prim->mean.empty() &&
  92                         !reorder_prim->output_padding) //just plain reorder
  93                     {
  94                         conv_node.replace_dependency(0, reorder_input);
  95                         if (input_node.get_users().size() == 0 && !input_node.is_output())
  96                         {
  97                             reorder_removed = p.extract_and_remove(input_node);
  98                         }
  99                         new_input = nullptr;
 100                     }
 101                     else //change reorder's output layout
 102                     {
 103                         reorder_prim->output_format = opt_layout.format;
 104                         reorder_prim->output_data_type = opt_layout.data_type;
 105                         new_input = nullptr;
 106                     }
 107                 }
 108                 else //current reorder gives bad output, simply change it
 109                 {
 110                     reorder_prim->output_format = opt_layout.format;
 111                     reorder_prim->output_data_type = opt_layout.data_type;
 112                     new_input = nullptr;
 113                 }
 114             }
 115
 116             if (!reorder_removed)
 117                 input_node.recalc_output_layout();
 118             else
 119                 conv_node.recalc_output_layout();
 120         }
 121         else
 122         {
 123             new_input = lo.get_reorder(
 124                 input_node.get_output_layout(),
 125                 input_node.id(),
 126                 layout_optimizer::data_type::input,
 127                 conv_node,
 128                 weights_layout).first;
 129         }
 130
 131         if (new_input && new_input->output_format == format::winograd_2x3_s1_data)
 132         {
 133             auto lower_size = (conv_prim->input_offset.negate() + input_layout.size);
 134
 135             tensor upper_input_padding = tensor{ 0 };
 136             upper_input_padding.spatial[0] = (2 - (lower_size.spatial[0] % 2)) % 2;          //winograd conv requires input's x to be in form 4 + 2n, with restriction that x >= 3, we can shortage it to x % 2 == 0
 137             upper_input_padding.spatial[1] = (8 - ((lower_size.spatial[1] - 2) % 8)) % 8;    //for y, y - 2 % 8 == 0 must hold
 138
 139             p.apply_needed_padding(conv_node, input_node, padding{ conv_prim->input_offset.negate().sizes(), upper_input_padding.sizes() });
 140
 141             auto winograd_output = std::make_shared<reorder>("_winograd_" + conv_node.id(), conv_node.id(), input_layout.format,
 142                 input_layout.data_type, std::vector<float>{}, cldnn_reorder_mean_mode::mean_subtract, conv_node.output_layout.data_padding);
 143             conv_node.output_layout.data_padding = padding{};
 144             program_node& back_node = p.get_or_create(winograd_output);
 145             p.get_processing_order().insert_next(&conv_node, &back_node);
 146
 147             auto bias_term = conv_node.bias_term();
 148             //create additional eltwise node after reorder to compute bias
 149             if (bias_term)
 150             {
 151                 auto& bias_node = conv_node.get_dependency(2);
 152                 std::vector<primitive_id> inputs = { back_node.id(), bias_node.id() };
 153                 auto winograd_output_biases = std::make_shared<eltwise>(back_node.id() + "_bias", inputs,
 154                     cldnn::eltwise_mode::sum, conv_prim->with_activation, conv_prim->activation_negative_slope,
 155                     back_node.get_output_layout().data_padding);
 156                 back_node.get_output_layout().data_padding = padding{};
 157                 auto& back_bias_node = p.get_or_create(winograd_output_biases);
 158                 p.get_processing_order().insert_next(&back_node, &back_bias_node);
 159                 p.replace_all_usages(back_node, back_bias_node);
 160                 p.add_connection(back_node, back_bias_node);
 161                 p.add_connection(bias_node, back_bias_node);
 162                 conv_node.invalidate_users();
 163                 p.replace_all_usages(conv_node, back_bias_node);
 164             }
 165
 166             if (conv_prim->with_activation)
 167             {
 168                 conv_node.typed_desc()->with_activation = false;
 169                 if (!bias_term)
 170                     back_node.set_fused_activation(activation_relu_negative_slope, cldnn_activation_additional_params_t{ conv_prim->activation_negative_slope });
 171             }
 172
 173             if (!bias_term)
 174             {
 175                 conv_node.invalidate_users();
 176                 p.replace_all_usages(conv_node, back_node);
 177             }
 178             p.add_connection(conv_node, back_node);
 179
 180             auto& r_node = p.get_or_create(new_input);
 181             r_node.as<reorder>().set_input_offset(conv_prim->input_offset);
 182
 183             if (!bias_term)
 184             {
 185                 p.swap_names(conv_node, back_node);
 186                 if (conv_node.is_output())
 187                 {
 188                     conv_node.set_output(false);
 189                     back_node.set_output(true);
 190                     for (auto& output : p.get_outputs())
 191                     {
 192                         if (output == &conv_node)
 193                         {
 194                             output = &back_node;
 195                             break;
 196                         }
 197                     }
 198                 }
 199             }
 200             else
 201             {
 202                 conv_node.remove_dependency(2);
 203                 auto& back_bias_node = *(p.nodes_map.find(back_node.id() + "_bias")->second);
 204                 p.swap_names(conv_node, back_bias_node);
 205                 if (conv_node.is_output())
 206                 {
 207                     conv_node.set_output(false);
 208                     back_bias_node.set_output(true);
 209                     for (auto& output : p.get_outputs())
 210                     {
 211                         if (output == &conv_node)
 212                         {
 213                             output = &back_bias_node;
 214                             break;
 215                         }
 216                     }
 217                 }
 218             }
 219         }
 220
 221         if (new_input && (new_input->output_format == format::bf8_xy16 || new_input->output_format == format::byxf))
 222         {
 223             auto conv1x1_output = std::make_shared<reorder>("_conv1x1_reorder_back_" + conv_node.id(), conv_node.id(), input_layout.format, input_layout.data_type);
 224             auto& back_node = p.get_or_create(conv1x1_output);
 225             p.get_processing_order().insert_next(&conv_node, &back_node);
 226             conv_node.invalidate_users();
 227             p.replace_all_usages(conv_node, back_node);
 228             p.add_connection(conv_node, back_node);
 229         }
 230
 231         if (new_input)
 232         {
 233             auto& r_node = p.get_or_create(new_input);
 234             p.add_intermediate(r_node, conv_node, 0, r_node.get_dependencies().empty());
 235             conv_node.recalc_output_layout();
 236         }
 237     };
 238
 239     const auto reorder_input_detection_output = [&p, &lo](typed_program_node<detection_output>& detection_output_node)
 240     {
 241         auto detection_output_prim = detection_output_node.get_primitive();
 242
 243         for (size_t i = 0; i < detection_output_node.get_dependencies().size(); i++)
 244         {
 245             auto& input = detection_output_node.get_dependency(i);
 246             std::shared_ptr<reorder> new_input = lo.get_reorder(
 247                 input.get_output_layout(),
 248                 input.id(),
 249                 layout_optimizer::data_type::input,
 250                 detection_output_node,
 251                 layout{ data_types::f32, format::bfyx, tensor{} }).first;
 252
 253             if (new_input)
 254             {
 255                 p.add_intermediate(new_input, detection_output_node, i);
 256             }
 257         }
 258     };
 259
 260     for (auto& prim : p.get_processing_order())
 261     {
 262         //there's an assumption that only convolution will take data/input_layout as input
 263         //exception to that rule would be a convolution which takes a reorder as input - see reoder_input above
 264         program_helpers::do_for_types<convolution, detection_output>(*prim,
 265             reorder_input,                  //case for convolution
 266             reorder_input_detection_output  //case for detection-output
 267             );
 268     }
 269 }