Publishing 2019 R1 content
[platform/upstream/dldt.git] / model-optimizer / mo / ops / convolution.py
1 """
2  Copyright (c) 2018-2019 Intel Corporation
3
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 """
16
17 import logging as log
18
19 import numpy as np
20
21 from mo.front.common.partial_infer.utils import int64_array, float_array, mark_input_bins, assign_dims_to_weights, \
22     tf_window_op_pad_infer
23 from mo.front.extractor import spatial_getter
24 from mo.front.onnx.extractors.utils import get_backend_pad
25 from mo.graph.graph import Node, Graph
26 from mo.ops.op import Op, PermuteAttrs
27 from mo.utils.error import Error
28
29
30 class Convolution(Op):
31     op = 'Convolution'
32
33     def __init__(self, graph: Graph, attrs: dict):
34         super().__init__(graph, {
35             'kind': 'op',
36             'type': __class__.op,
37             'op': __class__.op,
38             'infer': __class__.infer,
39             'multiplication_transparent': True,
40             'multiplication_transparent_ports': [(0, 0), (1, 0)],
41             'in_ports_count': 3,
42             'out_ports_count': 1,
43         }, attrs)
44
45     def backend_attrs(self):
46         return [
47            'auto_pad',
48            'group',
49            ('strides', lambda node: ','.join(map(str, node['stride'][node.spatial_dims]))),
50            ('dilations', lambda node: ','.join(map(str, node['dilation'][node.spatial_dims]))),
51            ('kernel', lambda node: ','.join(map(str, node['kernel_spatial']))),
52
53            ('pads_begin', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 0)))),
54            ('pads_end', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 1)))),
55            'output',
56            'pad_value',
57            'mode',
58            'input',
59         ]
60
61     def backend_attrs_v2(self):
62         return [
63             spatial_getter('stride-x', 'stride', 1),
64             spatial_getter('stride-y', 'stride', 0),
65
66             ('kernel-x', lambda node: node.kernel_spatial[1]),
67             ('kernel-y', lambda node: node.kernel_spatial[0]),
68
69             spatial_getter('dilation-x', 'dilation', 0),
70             spatial_getter('dilation-y', 'dilation', 1),
71             spatial_getter('pad-x', 'pad', 1, lambda x: x[0]),
72             spatial_getter('pad-y', 'pad', 0, lambda x: x[0]),
73             spatial_getter('pad-r', 'pad', 1, lambda x: x[1]),
74             spatial_getter('pad-b', 'pad', 0, lambda x: x[1]),
75
76             'auto_pad',
77             'output',
78             'group',
79         ]
80
81
82     @staticmethod
83     def calc_convolution(input_spatial_shape, stride_spatial_shape, pad_spatial_shape, kernel_extent):
84         ''' Calculates output shape for Convolution.
85             Verified to be applicable for both Caffe and ONNX.
86         '''
87         spatial_val_wo_stride = input_spatial_shape + pad_spatial_shape - kernel_extent
88         float_spatial_val_wo_stride = float_array(spatial_val_wo_stride)
89         return float_spatial_val_wo_stride / stride_spatial_shape + 1
90
91     @staticmethod
92     def calc_deconvolution(node, input_spatial_shape, pad_spatial_shape, kernel_extent):
93         ''' Calculates output shape for Deconvolution.
94             Verified to be applicable for both Caffe and ONNX with explicitly defined pads.
95             If pads are not specified for ONNX operator, this function is not applicable.
96         '''
97         shape = node.stride[node.spatial_dims] * (input_spatial_shape - 1) + kernel_extent - pad_spatial_shape
98         return shape
99
100     @staticmethod
101     def infer(node: Node):
102         """
103         Infers shape of convolution node as it is done in ONNX.
104         It is very similar to one that Caffe does, but slightly different.
105         We made a complete fork of this function because they are supposed to be
106         supported differently by different people.
107         Args:
108             node: graph convolution node
109         """
110         input_shape = node.in_node(0).shape
111         if input_shape is None:
112             return
113
114         # bias_term cannot be deduced earlier for frameworks that represent
115         # convolution weights/biases as regular inputs; so the number of inputs
116         # is being checked here and restore correct value for bias_term to
117         # have the rest of the code unchanged. It will be used after we merge
118         # several infer functions for convolution in different FWs to a single one.
119         if not node.has_valid('bias_term'):
120             node['bias_term'] = len(node.in_nodes()) == 3
121
122         # In case of caffe we have to calculate input index for weights because
123         # caffe convolution can be with more than one input
124         weights_index = len(node.in_nodes()) - 2
125         if not node.bias_term:
126             weights_index = len(node.in_nodes()) - 1
127
128         # Reshape weights kernel to original shape
129         # In case of caffe ot MXNet framework, values for weights has no structed shape like OIHW
130         # so we have to reshape weights to normal shape
131         # For this case, Convolution node should have attribute reshape_kernel = True
132         if node.has_valid('reshape_kernel') and node.reshape_kernel:
133             if not (node.has_valid('output') and node.has_valid('channel_dims') and node.has_valid(
134                     'group') and node.has_valid('kernel_spatial')):
135                 log.error('Cannot reshape kernel due to not all required attrs was set to {} node'.format(node.id))
136                 return
137             # layout for Convolution weights is OIHW
138             kernel_shape = np.array([node.output, input_shape[node.channel_dims].item() / node.group,
139                                     *[node.kernel_spatial[i] for i in range(len(node.kernel_spatial))]], dtype=np.int64)
140             if node.type == 'Deconvolution':  # layout for Deconvolution weights is IOHW
141                 kernel_shape[[0, 1]] = kernel_shape[[1, 0]]
142
143             if np.prod(kernel_shape) != np.prod(node.in_node(weights_index).value.shape):
144                 log.error("Size of weights {} does not match kernel shape: {}\n".format(np.prod(node.in_node(weights_index).value.shape), kernel_shape) +
145                           "    Possible reason is wrong channel number in input shape\n")
146                 raise Error("Cannot reshape weights to kernel shape")
147
148             node.in_node(weights_index).shape = np.array(kernel_shape)
149             node.in_node(weights_index).value = np.reshape(node.in_node(weights_index).value, kernel_shape)
150             node.reshape_kernel = False
151
152         # Pass weights shape to node attribute kernel_shape
153         kernel_shape = node.in_node(weights_index).shape
154         node['kernel_shape'] = kernel_shape
155         # Calculate kernel_spatial_idx and spatial_dims if it is not specified
156         # It is necessary for ONNX dut to convolution can be 1D/2D/3D
157         if not node.has_valid('kernel_spatial_idx'):
158             node['kernel_spatial_idx'] = np.delete([x for x in range(len(kernel_shape))], (node.input_feature_channel, node.output_feature_channel))
159
160         if not node.has_valid('spatial_dims'):
161             node['spatial_dims'] = np.delete([x for x in range(len(input_shape))], (node.channel_dims[0], node.batch_dims[0]))
162
163         node['kernel_spatial'] = kernel_shape[node.kernel_spatial_idx]
164
165         if not node.has_valid('output'):
166             # restore the number of output feature maps from the second argument that is weights
167             if node.type in ['Convolution', 'Deconvolution']:
168                 node['output'] = kernel_shape[node.output_feature_channel]
169             else:
170                 raise Error(
171                     'Convolution infer function was called for a node {} with unsupported type {}',
172                     node.soft_get('name'),
173                     node.type
174                 )
175
176         # Set default values for dilation, strides and pads if not set
177         if not node.has_valid('dilation'):
178             node['dilation'] = np.full([len(input_shape)], 1, dtype=np.int64)
179         if not node.has_valid('stride'):
180             node['stride'] = np.full([len(input_shape)], 1, dtype=np.int64)
181         if not node.has_valid('pad'):
182             node['pad'] = np.array([[0, 0]] * len(input_shape), dtype=np.int64)
183         node['pad_spatial_shape'] = node.pad[node.spatial_dims]
184
185         if not node.has_valid('output_padding'):
186             node['output_padding'] = np.full([len(input_shape)], 0, dtype=np.int64)
187
188         input_spatial_shape = input_shape[node.spatial_dims]
189         stride_spatial_shape = node.stride[node.spatial_dims]
190
191         kernel_extent = node.dilation[node.spatial_dims] * (node.kernel_spatial - 1) + 1
192         # TensorFlow always has auto_pad attribute that can be either valid or same_upper
193         # In ONNX auto_pad attribute is deprecated but appears in some models (could be valid, same_upper or same_lower)
194         # Caffe do not use auto_pad attribute
195         if node.has_valid('auto_pad') and not node.has_valid('output_spatial_shape'):
196             node['pad_spatial_shape'], node['output_spatial_shape'] = tf_window_op_pad_infer(input_spatial_shape,
197                                                                                              kernel_extent,
198                                                                                              stride_spatial_shape,
199                                                                                              node.auto_pad,
200                                                                                              node.type == 'Deconvolution')
201
202             pad = np.zeros((len(input_shape), 2), dtype=np.int64)
203             pad[node.spatial_dims] = node.pad_spatial_shape
204             node.pad = pad
205         else:
206             pad_spatial_shape = np.add.reduce(node.pad_spatial_shape, axis=1)
207             if node.type == 'Convolution':
208                 float_spatial = Convolution.calc_convolution(input_spatial_shape, stride_spatial_shape,
209                                                              pad_spatial_shape,
210                                                              kernel_extent)
211                 node['output_spatial_shape'] = int64_array(float_spatial)
212             elif node.type == 'Deconvolution':
213                 # In case of given output_spatial_shape we calculate pads spatial
214                 if node.has_valid('output_spatial_shape'):
215                     if node.has_valid('get_pad'):
216                         node['pad'] = node.get_pad(node, input_shape, kernel_shape)
217                     else:
218                         log.debug('Can\'t calculate paddings due to missing lambda get_pad in {} node'.format(node.id))
219                         return
220                 else:
221                     output_padding = node.output_padding[node.spatial_dims] if node.has_valid('output_padding') else None
222                     if output_padding is not None and any(output_padding):
223                         pad_spatial_shape -= output_padding
224                         for dim in range(len(pad_spatial_shape)):
225                             node.pad_spatial_shape[dim][1] -= pad_spatial_shape[dim]
226                         node.pad[node.spatial_dims] = node.pad_spatial_shape
227                         node['output_padding'] = None
228
229                     float_spatial = Convolution.calc_deconvolution(node, input_spatial_shape, pad_spatial_shape,
230                                                                    kernel_extent)
231                     node['output_spatial_shape'] = int64_array(float_spatial)
232             else:
233                 return
234
235         # For cases when group attribute wasn't set in extractor we should specify get_group attribute
236         # this attribute should store lambda node: ... (check tf convolution extractor)
237         if node.has_valid('get_group'):
238             node['group'] = node.get_group(node)
239         output_shape = np.full_like(input_shape, -1, dtype=np.int64)
240         output_shape[node.batch_dims] = input_shape[node.batch_dims]  # pylint: disable=unsupported-assignment-operation
241         output_shape[node.spatial_dims] = node.output_spatial_shape  # pylint: disable=unsupported-assignment-operation
242
243         # For cases when output attribute wasn't set in extractor we should specify get_output_feature_dim attribute
244         # this attribute should store lambda node: ... (check tf convolution extractor)
245         if node.has_valid('get_output_feature_dim'):
246             node['output'] = node.get_output_feature_dim(node)
247         output_shape[node.channel_dims] = node.output  # pylint: disable=unsupported-assignment-operation
248         node['output_shape'] = output_shape
249
250         for n in node.out_nodes():
251             node.out_node(n).shape = output_shape
252
253         mark_input_bins(node)
254         assign_dims_to_weights(node.in_node(weights_index), node.kernel_spatial_idx, node.input_feature_channel,
255                                node.output_feature_channel, len(kernel_shape))
256
257         PermuteAttrs.create_permute_attrs(node, attrs=[('pad', 'input:0'),
258                                                        ('stride', 'input:0'),
259                                                        ('dilation', 'input:0'),
260                                                        ('output_shape', 'input:0'),
261                                                        ('batch_dims', 'input:0'),
262                                                        ('channel_dims', 'input:0'),
263                                                        ('spatial_dims', 'input:0'),
264
265                                                        ('kernel_shape', 'input:{}'.format(weights_index)),
266                                                        ('kernel_spatial_idx', 'input:{}'.format(weights_index)),
267                                                        ('input_feature_channel', 'input:{}'.format(weights_index)),
268                                                        ('output_feature_channel', 'input:{}'.format(weights_index)),
269                                                        ])
270
271         PermuteAttrs.set_permutation(node.in_node(weights_index), node,
272                                      node.get_weights_permute if node.has_valid('get_weights_permute') else None)