model-optimizer/mo/ops/convolution.py

   1 """
   2  Copyright (c) 2018-2019 Intel Corporation
   3
   4  Licensed under the Apache License, Version 2.0 (the "License");
   5  you may not use this file except in compliance with the License.
   6  You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10  Unless required by applicable law or agreed to in writing, software
  11  distributed under the License is distributed on an "AS IS" BASIS,
  12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  See the License for the specific language governing permissions and
  14  limitations under the License.
  15 """
  16
  17 import logging as log
  18
  19 import numpy as np
  20
  21 from mo.front.common.partial_infer.utils import int64_array, float_array, mark_input_bins, assign_dims_to_weights, \
  22     tf_window_op_pad_infer
  23 from mo.front.extractor import spatial_getter
  24 from mo.front.onnx.extractors.utils import get_backend_pad
  25 from mo.graph.graph import Node, Graph
  26 from mo.ops.op import Op, PermuteAttrs
  27 from mo.utils.error import Error
  28
  29
  30 class Convolution(Op):
  31     op = 'Convolution'
  32
  33     def __init__(self, graph: Graph, attrs: dict):
  34         super().__init__(graph, {
  35             'kind': 'op',
  36             'type': __class__.op,
  37             'op': __class__.op,
  38             'infer': __class__.infer,
  39             'multiplication_transparent': True,
  40             'multiplication_transparent_ports': [(0, 0), (1, 0)],
  41             'in_ports_count': 3,
  42             'out_ports_count': 1,
  43         }, attrs)
  44
  45     def backend_attrs(self):
  46         return [
  47            'auto_pad',
  48            'group',
  49            ('strides', lambda node: ','.join(map(str, node['stride'][node.spatial_dims]))),
  50            ('dilations', lambda node: ','.join(map(str, node['dilation'][node.spatial_dims]))),
  51            ('kernel', lambda node: ','.join(map(str, node['kernel_spatial']))),
  52
  53            ('pads_begin', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 0)))),
  54            ('pads_end', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 1)))),
  55            'output',
  56            'pad_value',
  57            'mode',
  58            'input',
  59         ]
  60
  61     def backend_attrs_v2(self):
  62         return [
  63             spatial_getter('stride-x', 'stride', 1),
  64             spatial_getter('stride-y', 'stride', 0),
  65
  66             ('kernel-x', lambda node: node.kernel_spatial[1]),
  67             ('kernel-y', lambda node: node.kernel_spatial[0]),
  68
  69             spatial_getter('dilation-x', 'dilation', 0),
  70             spatial_getter('dilation-y', 'dilation', 1),
  71             spatial_getter('pad-x', 'pad', 1, lambda x: x[0]),
  72             spatial_getter('pad-y', 'pad', 0, lambda x: x[0]),
  73             spatial_getter('pad-r', 'pad', 1, lambda x: x[1]),
  74             spatial_getter('pad-b', 'pad', 0, lambda x: x[1]),
  75
  76             'auto_pad',
  77             'output',
  78             'group',
  79         ]
  80
  81
  82     @staticmethod
  83     def calc_convolution(input_spatial_shape, stride_spatial_shape, pad_spatial_shape, kernel_extent):
  84         ''' Calculates output shape for Convolution.
  85             Verified to be applicable for both Caffe and ONNX.
  86         '''
  87         spatial_val_wo_stride = input_spatial_shape + pad_spatial_shape - kernel_extent
  88         float_spatial_val_wo_stride = float_array(spatial_val_wo_stride)
  89         return float_spatial_val_wo_stride / stride_spatial_shape + 1
  90
  91     @staticmethod
  92     def calc_deconvolution(node, input_spatial_shape, pad_spatial_shape, kernel_extent):
  93         ''' Calculates output shape for Deconvolution.
  94             Verified to be applicable for both Caffe and ONNX with explicitly defined pads.
  95             If pads are not specified for ONNX operator, this function is not applicable.
  96         '''
  97         shape = node.stride[node.spatial_dims] * (input_spatial_shape - 1) + kernel_extent - pad_spatial_shape
  98         return shape
  99
 100     @staticmethod
 101     def infer(node: Node):
 102         """
 103         Infers shape of convolution node as it is done in ONNX.
 104         It is very similar to one that Caffe does, but slightly different.
 105         We made a complete fork of this function because they are supposed to be
 106         supported differently by different people.
 107         Args:
 108             node: graph convolution node
 109         """
 110         input_shape = node.in_node(0).shape
 111         if input_shape is None:
 112             return
 113
 114         # bias_term cannot be deduced earlier for frameworks that represent
 115         # convolution weights/biases as regular inputs; so the number of inputs
 116         # is being checked here and restore correct value for bias_term to
 117         # have the rest of the code unchanged. It will be used after we merge
 118         # several infer functions for convolution in different FWs to a single one.
 119         if not node.has_valid('bias_term'):
 120             node['bias_term'] = len(node.in_nodes()) == 3
 121
 122         # In case of caffe we have to calculate input index for weights because
 123         # caffe convolution can be with more than one input
 124         weights_index = len(node.in_nodes()) - 2
 125         if not node.bias_term:
 126             weights_index = len(node.in_nodes()) - 1
 127
 128         # Reshape weights kernel to original shape
 129         # In case of caffe ot MXNet framework, values for weights has no structed shape like OIHW
 130         # so we have to reshape weights to normal shape
 131         # For this case, Convolution node should have attribute reshape_kernel = True
 132         if node.has_valid('reshape_kernel') and node.reshape_kernel:
 133             if not (node.has_valid('output') and node.has_valid('channel_dims') and node.has_valid(
 134                     'group') and node.has_valid('kernel_spatial')):
 135                 log.error('Cannot reshape kernel due to not all required attrs was set to {} node'.format(node.id))
 136                 return
 137             # layout for Convolution weights is OIHW
 138             kernel_shape = np.array([node.output, input_shape[node.channel_dims].item() / node.group,
 139                                     *[node.kernel_spatial[i] for i in range(len(node.kernel_spatial))]], dtype=np.int64)
 140             if node.type == 'Deconvolution':  # layout for Deconvolution weights is IOHW
 141                 kernel_shape[[0, 1]] = kernel_shape[[1, 0]]
 142
 143             if np.prod(kernel_shape) != np.prod(node.in_node(weights_index).value.shape):
 144                 log.error("Size of weights {} does not match kernel shape: {}\n".format(np.prod(node.in_node(weights_index).value.shape), kernel_shape) +
 145                           "    Possible reason is wrong channel number in input shape\n")
 146                 raise Error("Cannot reshape weights to kernel shape")
 147
 148             node.in_node(weights_index).shape = np.array(kernel_shape)
 149             node.in_node(weights_index).value = np.reshape(node.in_node(weights_index).value, kernel_shape)
 150             node.reshape_kernel = False
 151
 152         # Pass weights shape to node attribute kernel_shape
 153         kernel_shape = node.in_node(weights_index).shape
 154         node['kernel_shape'] = kernel_shape
 155         # Calculate kernel_spatial_idx and spatial_dims if it is not specified
 156         # It is necessary for ONNX dut to convolution can be 1D/2D/3D
 157         if not node.has_valid('kernel_spatial_idx'):
 158             node['kernel_spatial_idx'] = np.delete([x for x in range(len(kernel_shape))], (node.input_feature_channel, node.output_feature_channel))
 159
 160         if not node.has_valid('spatial_dims'):
 161             node['spatial_dims'] = np.delete([x for x in range(len(input_shape))], (node.channel_dims[0], node.batch_dims[0]))
 162
 163         node['kernel_spatial'] = kernel_shape[node.kernel_spatial_idx]
 164
 165         if not node.has_valid('output'):
 166             # restore the number of output feature maps from the second argument that is weights
 167             if node.type in ['Convolution', 'Deconvolution']:
 168                 node['output'] = kernel_shape[node.output_feature_channel]
 169             else:
 170                 raise Error(
 171                     'Convolution infer function was called for a node {} with unsupported type {}',
 172                     node.soft_get('name'),
 173                     node.type
 174                 )
 175
 176         # Set default values for dilation, strides and pads if not set
 177         if not node.has_valid('dilation'):
 178             node['dilation'] = np.full([len(input_shape)], 1, dtype=np.int64)
 179         if not node.has_valid('stride'):
 180             node['stride'] = np.full([len(input_shape)], 1, dtype=np.int64)
 181         if not node.has_valid('pad'):
 182             node['pad'] = np.array([[0, 0]] * len(input_shape), dtype=np.int64)
 183         node['pad_spatial_shape'] = node.pad[node.spatial_dims]
 184
 185         if not node.has_valid('output_padding'):
 186             node['output_padding'] = np.full([len(input_shape)], 0, dtype=np.int64)
 187
 188         input_spatial_shape = input_shape[node.spatial_dims]
 189         stride_spatial_shape = node.stride[node.spatial_dims]
 190
 191         kernel_extent = node.dilation[node.spatial_dims] * (node.kernel_spatial - 1) + 1
 192         # TensorFlow always has auto_pad attribute that can be either valid or same_upper
 193         # In ONNX auto_pad attribute is deprecated but appears in some models (could be valid, same_upper or same_lower)
 194         # Caffe do not use auto_pad attribute
 195         if node.has_valid('auto_pad') and not node.has_valid('output_spatial_shape'):
 196             node['pad_spatial_shape'], node['output_spatial_shape'] = tf_window_op_pad_infer(input_spatial_shape,
 197                                                                                              kernel_extent,
 198                                                                                              stride_spatial_shape,
 199                                                                                              node.auto_pad,
 200                                                                                              node.type == 'Deconvolution')
 201
 202             pad = np.zeros((len(input_shape), 2), dtype=np.int64)
 203             pad[node.spatial_dims] = node.pad_spatial_shape
 204             node.pad = pad
 205         else:
 206             pad_spatial_shape = np.add.reduce(node.pad_spatial_shape, axis=1)
 207             if node.type == 'Convolution':
 208                 float_spatial = Convolution.calc_convolution(input_spatial_shape, stride_spatial_shape,
 209                                                              pad_spatial_shape,
 210                                                              kernel_extent)
 211                 node['output_spatial_shape'] = int64_array(float_spatial)
 212             elif node.type == 'Deconvolution':
 213                 # In case of given output_spatial_shape we calculate pads spatial
 214                 if node.has_valid('output_spatial_shape'):
 215                     if node.has_valid('get_pad'):
 216                         node['pad'] = node.get_pad(node, input_shape, kernel_shape)
 217                     else:
 218                         log.debug('Can\'t calculate paddings due to missing lambda get_pad in {} node'.format(node.id))
 219                         return
 220                 else:
 221                     output_padding = node.output_padding[node.spatial_dims] if node.has_valid('output_padding') else None
 222                     if output_padding is not None and any(output_padding):
 223                         pad_spatial_shape -= output_padding
 224                         for dim in range(len(pad_spatial_shape)):
 225                             node.pad_spatial_shape[dim][1] -= pad_spatial_shape[dim]
 226                         node.pad[node.spatial_dims] = node.pad_spatial_shape
 227                         node['output_padding'] = None
 228
 229                     float_spatial = Convolution.calc_deconvolution(node, input_spatial_shape, pad_spatial_shape,
 230                                                                    kernel_extent)
 231                     node['output_spatial_shape'] = int64_array(float_spatial)
 232             else:
 233                 return
 234
 235         # For cases when group attribute wasn't set in extractor we should specify get_group attribute
 236         # this attribute should store lambda node: ... (check tf convolution extractor)
 237         if node.has_valid('get_group'):
 238             node['group'] = node.get_group(node)
 239         output_shape = np.full_like(input_shape, -1, dtype=np.int64)
 240         output_shape[node.batch_dims] = input_shape[node.batch_dims]  # pylint: disable=unsupported-assignment-operation
 241         output_shape[node.spatial_dims] = node.output_spatial_shape  # pylint: disable=unsupported-assignment-operation
 242
 243         # For cases when output attribute wasn't set in extractor we should specify get_output_feature_dim attribute
 244         # this attribute should store lambda node: ... (check tf convolution extractor)
 245         if node.has_valid('get_output_feature_dim'):
 246             node['output'] = node.get_output_feature_dim(node)
 247         output_shape[node.channel_dims] = node.output  # pylint: disable=unsupported-assignment-operation
 248         node['output_shape'] = output_shape
 249
 250         for n in node.out_nodes():
 251             node.out_node(n).shape = output_shape
 252
 253         mark_input_bins(node)
 254         assign_dims_to_weights(node.in_node(weights_index), node.kernel_spatial_idx, node.input_feature_channel,
 255                                node.output_feature_channel, len(kernel_shape))
 256
 257         PermuteAttrs.create_permute_attrs(node, attrs=[('pad', 'input:0'),
 258                                                        ('stride', 'input:0'),
 259                                                        ('dilation', 'input:0'),
 260                                                        ('output_shape', 'input:0'),
 261                                                        ('batch_dims', 'input:0'),
 262                                                        ('channel_dims', 'input:0'),
 263                                                        ('spatial_dims', 'input:0'),
 264
 265                                                        ('kernel_shape', 'input:{}'.format(weights_index)),
 266                                                        ('kernel_spatial_idx', 'input:{}'.format(weights_index)),
 267                                                        ('input_feature_channel', 'input:{}'.format(weights_index)),
 268                                                        ('output_feature_channel', 'input:{}'.format(weights_index)),
 269                                                        ])
 270
 271         PermuteAttrs.set_permutation(node.in_node(weights_index), node,
 272                                      node.get_weights_permute if node.has_valid('get_weights_permute') else None)