inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include "fused_conv_eltwise_inst.h"
  19 #include "primitive_type_base.h"
  20 #include "sliding_window_utils.h"
  21 #include "error_handler.h"
  22 #include "json_object.h"
  23
  24 namespace cldnn
  25 {
  26 primitive_type_id fused_conv_eltwise_type_id()
  27 {
  28     static primitive_type_base<fused_conv_eltwise> instance;
  29     return &instance;
  30 }
  31
  32 layout fused_conv_eltwise_inst::calc_output_layout(fused_conv_eltwise_node const& node)
  33 {
  34     assert((bool)node.get_primitive()->output_data_type == false
  35            && "Output data type forcing is not supported for "
  36               "fused_conv_eltwise_node!");
  37     auto desc = node.get_primitive();
  38
  39     auto input_layout = node.input().get_output_layout();
  40     auto weights_layout = node.weights(0).get_output_layout(); //weights are stored after inputs
  41
  42     auto input_offset = desc->conv.input_offset;
  43     auto stride = desc->conv.stride;
  44     auto dilation = desc->conv.dilation;
  45     auto split = desc->conv.weights.size();
  46
  47     // compute how many outputs in rows and columns will be generate by filter.
  48     // outp <= (input_size - (2*input_offset) - kernel_size)/ stride
  49     auto filter_size = weights_layout.size;
  50
  51     // TODO: Consider moving general parameter verification to arguments constructor.
  52     CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Stride spatial X", stride.spatial[0], "value", 0, "Stride spatial X must be positive (>= 1)");
  53     CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Stride spatial Y", stride.spatial[1], "value", 0, "Stride spatial Y must be positive (>= 1)");
  54     CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Dilatation spatial X", dilation.spatial[0], "value", 0, "Dilatation patial X must be positive (>= 1)");
  55     CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Dilatation spatial Y", dilation.spatial[1], "value", 0, "Dilatation spatial Y must be positive (>= 1)");
  56     CLDNN_ERROR_GREATER_THAN(node.id(), "Input offset spatial X", 2 * input_offset.spatial[0], "input layout spatial X", input_layout.size.spatial[0], "There is no input data to process");
  57     CLDNN_ERROR_GREATER_THAN(node.id(), "Input offset spatial Y", 2 * input_offset.spatial[1], "input layout spatial Y", input_layout.size.spatial[1], "There is no input data to process");
  58     CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset feature", input_offset.feature[0], "", 0, "Input offset in feature is not supported");
  59     CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset batch", input_offset.batch[0], "", 0, "Input offset in batch is not supported");
  60
  61     // TODO: FCN and SSD used offset larger than convolution size. does it make sense to support it? do we support it on the ref kernels?
  62 //     CLDNN_ERROR_GREATER_THAN(node.id(), "Negate input offset spatial X", -input_offset.spatial[0], "input window size spatial X", filter_size.spatial[0], "First convolution is outside of image. please reduce input offset X");
  63 //     CLDNN_ERROR_GREATER_THAN(node.id(), "Negate input offset spatial Y", -input_offset.spatial[1], "input window size spatial Y", filter_size.spatial[1], "First convolution is outside of image. please reduce input offset Y");
  64
  65     if (input_layout.format == format::winograd_2x3_s1_weights || input_layout.format == format::winograd_2x3_s1_fused_weights ||
  66         input_layout.format == format::winograd_6x3_s1_fused_weights || input_layout.format == format::image_2d_weights_winograd_6x3_s1_fbxyb || input_layout.format == format::image_2d_weights_winograd_6x3_s1_xfbyb)
  67         CLDNN_ERROR_MESSAGE(node.id(), "Input for convolution should not be in windograd weights format - it is reserved for weights only");
  68
  69     if (input_layout.format == format::winograd_2x3_s1_data)
  70     {
  71         CLDNN_ERROR_NOT_EQUAL(node.id(), "convolution split", split, "expected value", 1, "Convolution with winograd input only supports split == 1");
  72         CLDNN_ERROR_NOT_EQUAL(node.id(), "stride spatial X", stride.spatial[0], "expected value", 1, "Convolution's input in winograd_2x3_s1_data format can only be used with stride 1x1");
  73         CLDNN_ERROR_NOT_EQUAL(node.id(), "stride spatial Y", stride.spatial[1], "expected value", 1, "Convolution's input in winograd_2x3_s1_data format can only be used with stride 1x1");
  74         CLDNN_ERROR_NOT_EQUAL(node.id(), "Dilatation spatial X", dilation.spatial[0], "expected value", 1, "Winograd 2x3 convolution does not support dilatation");
  75         CLDNN_ERROR_NOT_EQUAL(node.id(), "Dilatation spatial Y", dilation.spatial[1], "expected value", 1, "Winograd 2x3 convolution does not support dilatation");
  76         if (input_layout.size.feature[0] % 32 != 0)
  77             CLDNN_ERROR_MESSAGE(node.id(), "Input for winograd 2x3 convolution should have features count divisable by 32");
  78         if (weights_layout.size.batch[0] % 32 != 0)
  79             CLDNN_ERROR_MESSAGE(node.id(), "Number of filters (OFM) for winograd 2x3 convolution should be divisable by 32");
  80
  81         if (node.get_primitive()->conv.with_activation)
  82             CLDNN_ERROR_MESSAGE(node.id(), "Winograd 2x3 convolution should not have activation fused - activation should be performed at transformation from winograd domain stage");
  83
  84         CLDNN_ERROR_LESS_THAN(node.id(), "input width", input_layout.size.spatial[0], "filter width", 3, "Convolution input is smaller than weights");
  85         CLDNN_ERROR_LESS_THAN(node.id(), "input height", input_layout.size.spatial[1], "filter height", 3, "Convolution input is smaller than weights");
  86
  87         constexpr tensor::value_type filter_height = 3; //by definition of format::winograd_2x3_s1_data (our assumption)
  88         constexpr tensor::value_type winograd_filter_height = filter_height; //for this format, winograd filter is considered to be a set of 1d filters so its height should remain the same as original filter's
  89
  90         return layout{ input_layout.data_type, input_layout.format, tensor{ input_layout.size.batch[0], weights_layout.size.batch[0], input_layout.size.spatial[0], input_layout.size.spatial[1] - winograd_filter_height + 1 }, input_layout.data_padding };
  91     }
  92
  93     // get output feature map from weights. It should be the same as number of biases. Will be verifed in convolution::create()
  94     auto number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(split);
  95
  96     if (desc->conv.with_output_size)
  97     {
  98         CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "User defined output spatial X", desc->conv.output_size.spatial[0], "value", 0, "must be positive(>= 1)");
  99         CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "User defined output spatial Y", desc->conv.output_size.spatial[1], "value", 0, "must be positive(>= 1)");
 100
 101         tensor output_size(input_layout.size.batch[0], number_of_features,
 102                            desc->conv.output_size.spatial[0], desc->conv.output_size.spatial[1]);
 103         return { input_layout.data_type, input_layout.format, output_size };
 104     }
 105
 106     auto output_range = calc_sliding_window_output_range<swor_mode::all>(
 107         input_layout.size, filter_size, input_offset, stride, dilation, true, 1);
 108
 109     tensor output_size(input_layout.size.batch[0], number_of_features,
 110                        output_range.spatial[0], output_range.spatial[1]);
 111
 112
 113     // due to performance reason for using fs_bs_yx_bsv4_fsv32 first convolution have 3 features, so first conv layer will take byxf and return fs_bs_yx_bsv4_fsv32
 114     if (input_layout.data_type == data_types::i8 && input_layout.format == format::byx8_f4 && input_layout.size.batch[0] % 4 == 0 && input_layout.size.feature[0] == 3)
 115     {
 116         return layout{ input_layout.data_type, cldnn::format::fs_bs_yx_bsv4_fsv32, output_size };
 117     }
 118
 119     return { input_layout.data_type, input_layout.format, output_size };
 120 }
 121
 122 std::string fused_conv_eltwise_inst::to_string(fused_conv_eltwise_node const& node)
 123 {
 124     auto desc       = node.get_primitive();
 125     auto strd       = desc->conv.stride;
 126     auto split      = node.get_split();
 127     auto dilation   = desc->conv.dilation;
 128     auto node_info  = node.desc_to_json();
 129     auto activation = desc->conv.with_activation ? " true" : "false";
 130
 131     std::stringstream primitive_description;
 132
 133     json_composite conv_info;
 134     conv_info.add("stride", strd.to_string());
 135     conv_info.add("input offset", desc->conv.input_offset.to_string());
 136     conv_info.add("split", split);
 137     conv_info.add("dilation", dilation.to_string());
 138     conv_info.add("with activation", activation);
 139     conv_info.add("slope", desc->conv.activation_negative_slope);
 140     if (desc->conv.with_output_size)
 141     {
 142         json_composite ud_out_size_info;
 143         ud_out_size_info.add("size", desc->conv.output_size.to_string());
 144         conv_info.add("with user defined output size", ud_out_size_info);
 145     }
 146
 147     node_info->add("convolution info", conv_info);
 148     node_info->dump(primitive_description);
 149
 150     return primitive_description.str();
 151 }
 152
 153 fused_conv_eltwise_inst::typed_primitive_inst(network_impl& network, fused_conv_eltwise_node const& node)
 154     : parent(network, node)
 155 {
 156     auto stride = argument.conv.stride;
 157
 158     auto input_inst = node.input().get_output_layout();
 159     auto output_inst = node.get_output_layout();
 160     auto output_size = output_inst.size;
 161
 162     CLDNN_ERROR_NOT_EQUAL(node.id(), "Input number of dimensions", input_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Input/output dims mismatch");
 163     CLDNN_ERROR_NOT_EQUAL(node.id(), "Stride number of dimensions", stride.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "stride/output dims mismatch");
 164
 165     auto split = node.get_split();
 166     for (decltype(split) j = 0; j < split; j++)
 167     {
 168         auto filter_inst = node.weights(j).get_output_layout(); //convolution filter
 169         if (bias_term())
 170         {
 171             auto bias_inst = node.bias(j).get_output_layout();
 172             CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias batch[0]", bias_inst.size.batch[0], "expected size of batch", 1, "Biases isn't 1D vector.");
 173             CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias feature[0]", bias_inst.size.feature[0], "expected size of feature", 1, "Biases isn't 1D vector.");
 174             CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[1]", bias_inst.size.spatial[1], "expected size of spatial[1]", 1, "Biases isn't 1D vector.");
 175
 176             CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[0]", bias_inst.size.spatial[0], "expected feature map number", output_size.feature[0] / split, "Bias/fm mismatch");
 177         }
 178
 179         auto input_offset = argument.conv.input_offset;
 180
 181         CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights number of dimensions", filter_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Weights/output dims mismatch");
 182         CLDNN_ERROR_NOT_EQUAL(node.id(), "Convolution padding mode", node.get_output_layout().data_padding.filling_value(), "padding value", 0.0f, "Unknown padding mode.");
 183         CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset number of dimensions", input_offset.raw.size(), "input number of dimensions", input_inst.size.raw.size(), "Input offset/ input size mismatch");
 184         CLDNN_ERROR_NOT_EQUAL(node.id(), "Output feature size", output_size.feature.size(), "expected feature size", 1, "Only one-dimensional features are supported");
 185         CLDNN_ERROR_NOT_EQUAL(node.id(), "Output batch size", output_size.batch.size(), "expected output size", 1, "Only one-dimensional batch size are supported");
 186         CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights spatial size", filter_inst.size.spatial.size(), "expected weights spatial size", 2, "Weights have to have 2 dimensions in spatial domain.");
 187         CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", (input_inst.size.feature[0] - input_offset.feature[0]) / split, "input feature maps number", filter_inst.size.feature[0], "Weights/ifm mismatch");
 188         if (filter_inst.format == format::bf_lyx_yx) // local convolution
 189         {
 190             auto local = filter_inst.size.local;
 191             CLDNN_ERROR_NOT_EQUAL(node.id(), "Number of local x dimension", local[0], "output x dimension", output_inst.size.spatial[0], "Weights/output dims mismatch");
 192             CLDNN_ERROR_NOT_EQUAL(node.id(), "Number of local y dimension", local[1], "output y dimension", output_inst.size.spatial[1], "Weights/output dims mismatch");
 193         }
 194     }
 195 }
 196 }