Imported Upstream version 1.25.0
[platform/core/ml/nnfw.git] / runtime / onert / backend / cpu / ops / DepthwiseConvolutionLayer.cc
1 /*
2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "DepthwiseConvolutionLayer.h"
18
19 #include "cker/PortableTensorUtils.h"
20 #include <cker/operation/DepthwiseConv.h>
21
22 namespace onert
23 {
24 namespace backend
25 {
26 namespace cpu
27 {
28 namespace ops
29 {
30
31 void DepthwiseConvolutionLayer::convFloat32()
32 {
33   float output_activation_min = 0, output_activation_max = 0;
34   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
35
36   nnfw::cker::DepthwiseConvParams op_params;
37   op_params.stride_width = _strideWidth;
38   op_params.stride_height = _strideHeight;
39   op_params.dilation_width_factor = _dilationWidth;
40   op_params.dilation_height_factor = _dilationHeight;
41   op_params.padding_values.width = _paddingLeft;
42   op_params.padding_values.height = _paddingTop;
43   op_params.depth_multiplier = _multiplier;
44   op_params.float_activation_min = output_activation_min;
45   op_params.float_activation_max = output_activation_max;
46
47   nnfw::cker::DepthwiseConv<float, float>(
48     op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
49     getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
50     getBuffer<float>(_output), _external_context->ruy_context());
51 }
52
53 void DepthwiseConvolutionLayer::convQ8uPerTensor()
54 {
55   int32_t output_activation_min = 0;
56   int32_t output_activation_max = 0;
57   CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
58                                     &output_activation_max);
59
60   double real_multiplier = 0.0;
61   int32_t output_multiplier = 0;
62   int32_t output_shift = 0;
63   GetQuantizedConvolutionMultiplier(_input, _kernel, _bias, _output, &real_multiplier);
64   QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
65
66   nnfw::cker::DepthwiseConvParams op_params;
67   op_params.stride_width = _strideWidth;
68   op_params.stride_height = _strideHeight;
69   op_params.dilation_width_factor = _dilationWidth;
70   op_params.dilation_height_factor = _dilationHeight;
71   op_params.padding_values.width = _paddingLeft;
72   op_params.padding_values.height = _paddingTop;
73   op_params.depth_multiplier = _multiplier;
74   op_params.input_offset = -_input->data_zero_point();
75   op_params.weights_offset = -_kernel->data_zero_point();
76   op_params.output_offset = _output->data_zero_point();
77   op_params.output_multiplier = output_multiplier;
78   op_params.output_shift = output_shift;
79   op_params.quantized_activation_min = output_activation_min;
80   op_params.quantized_activation_max = output_activation_max;
81
82   nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
83     op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
84     getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
85     getBuffer<uint8_t>(_output), _external_context->ruy_context());
86 }
87
88 void DepthwiseConvolutionLayer::convQ8uPerChannel()
89 {
90   nnfw::cker::DepthwiseConvParams op_params;
91   op_params.padding_values.width = _paddingLeft;
92   op_params.padding_values.height = _paddingTop;
93   op_params.stride_width = _strideWidth;
94   op_params.stride_height = _strideHeight;
95   op_params.dilation_width_factor = _dilationWidth;
96   op_params.dilation_height_factor = _dilationHeight;
97   op_params.depth_multiplier = _multiplier;
98   op_params.input_offset = -_input->data_zero_point();
99   op_params.output_offset = _output->data_zero_point();
100   int32_t output_activation_min = 0;
101   int32_t output_activation_max = 0;
102   CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
103                                     &output_activation_max);
104   op_params.quantized_activation_min = output_activation_min;
105   op_params.quantized_activation_max = output_activation_max;
106   // NOTE: The following fields of ConvParams are not used:
107   // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
108
109   nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel(
110     op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
111     getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),
112     _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),
113     getShape(_output), getBuffer<uint8_t>(_output));
114 }
115
116 void DepthwiseConvolutionLayer::convQ8i()
117 {
118   if (!_prepared)
119   {
120     prepareQ8i();
121     _prepared = true;
122   }
123
124   int32_t output_activation_min = 0;
125   int32_t output_activation_max = 0;
126   CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
127                                     &output_activation_max);
128
129   nnfw::cker::DepthwiseConvParams op_params;
130   op_params.padding_type = nnfw::cker::PaddingType::kSame;
131   op_params.padding_values.width = _paddingLeft;
132   op_params.padding_values.height = _paddingTop;
133   op_params.depth_multiplier = _multiplier;
134   op_params.stride_width = _strideWidth;
135   op_params.stride_height = _strideHeight;
136   op_params.dilation_width_factor = _dilationWidth;
137   op_params.dilation_height_factor = _dilationHeight;
138   op_params.input_offset = -_input->data_zero_point();
139   op_params.weights_offset = 0;
140   op_params.output_offset = _output->data_zero_point();
141   op_params.quantized_activation_min = output_activation_min;
142   op_params.quantized_activation_max = output_activation_max;
143
144   nnfw::cker::optimized_integer_ops::DepthwiseConvPerChannel(
145     op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
146     getShape(_input), getBuffer<int8_t>(_input), getShape(_kernel), getBuffer<int8_t>(_kernel),
147     getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output), getBuffer<int8_t>(_output),
148     _external_context->ruy_context());
149 }
150
151 void DepthwiseConvolutionLayer::convQ8iHybridPerChannel()
152 {
153   if (!_prepared)
154   {
155     prepareQ8iHybridPerChannel();
156     _prepared = true;
157   }
158
159   float output_activation_min = 0, output_activation_max = 0;
160   CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
161
162   auto input_shape = getShape(_input);
163   const int batch_size = input_shape.Dims(0);
164   const int input_size = input_shape.FlatSize() / batch_size;
165
166   auto scaling_factors_ptr = _input_scaling_factors.data();
167   auto input_offsets_ptr = _input_offsets.data();
168
169   for (int b = 0; b < batch_size; ++b)
170   {
171     const int offset = b * input_size;
172     nnfw::cker::PortableAsymmetricQuantizeFloats(getBuffer<float>(_input) + offset, input_size,
173                                                  _input_quantized.data() + offset,
174                                                  &scaling_factors_ptr[b], &input_offsets_ptr[b]);
175   }
176
177   nnfw::cker::DepthwiseConvParams op_params;
178   op_params.padding_values.width = _paddingLeft;
179   op_params.padding_values.height = _paddingTop;
180   op_params.depth_multiplier = _multiplier;
181   op_params.stride_width = _strideWidth;
182   op_params.stride_height = _strideHeight;
183   op_params.dilation_width_factor = _dilationWidth;
184   op_params.dilation_height_factor = _dilationHeight;
185   op_params.float_activation_min = output_activation_min;
186   op_params.float_activation_max = output_activation_max;
187
188   nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel(
189     op_params, _input_scaling_factors.data(), getShape(_input), _input_quantized.data(),
190     getShape(_kernel), getBuffer<int8_t>(_kernel), getShape(_bias), getBuffer<float>(_bias),
191     getShape(_output), getBuffer<float>(_output), _kernel->data_scales().data(),
192     _input_offsets.data());
193 }
194
195 void DepthwiseConvolutionLayer::prepareQ8i()
196 {
197   GetQuantizedConvolutionMultipliersAndShifts(
198     _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
199     _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
200     _per_channel_output_shift);
201 }
202
203 void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
204 {
205   GetQuantizedConvolutionMultipliersAndShifts(
206     _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
207     _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
208     _per_channel_output_shift);
209 }
210
211 void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel()
212 {
213   // allocate memory for activation quantization.
214   // - quantized values (int8_t type and same shape of original input)
215   // - quantization params (= scale/zeropoint for each input)
216   auto input_shape = getShape(_input);
217   const int batch_size = input_shape.Dims(0);
218   const int input_size = input_shape.FlatSize() / batch_size;
219   _input_quantized.resize(input_size);
220   // TODO: Optimize the case of batch_size = 1
221   _input_scaling_factors.resize(batch_size);
222   _input_offsets.resize(batch_size);
223 }
224
225 void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel()
226 {
227   // ensure weight is per-channel quantized.
228   int32_t kernel_input_channel = getShape(_kernel).Dims(3);
229   // zero_points comes from flatbuffer vector. Its size is within uint32_t range.
230   size_t kernel_zerop_cnt = _kernel->data_scales().size();
231   // promote to int64_t to compare int32_t and uint32_t
232   if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt)
233     throw std::runtime_error{"DConv2D hybrid supports only per-channel quantized weight."};
234 }
235
236 void DepthwiseConvolutionLayer::configure(
237   const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
238   const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
239   const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
240   const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
241   const ir::Activation activation, IPortableTensor *output,
242   const std::shared_ptr<ExternalContext> &external_context)
243 {
244   _input = input;
245   _kernel = kernel;
246   _bias = bias;
247   _paddingLeft = paddingLeft;
248   _paddingRight = paddingRight;
249   _paddingTop = paddingTop;
250   _paddingBottom = paddingBottom;
251   _strideWidth = strideWidth;
252   _strideHeight = strideHeight;
253   _multiplier = multiplier;
254   _dilationWidth = dilationWidth;
255   _dilationHeight = dilationHeight;
256   _activation = activation;
257   _output = output;
258   _external_context = external_context;
259   _is_hybrid = _input->data_type() == OperandType::FLOAT32 &&
260                _kernel->data_type() == OperandType::QUANT_INT8_SYMM;
261
262   if (_is_hybrid)
263   {
264     ensureQ8iHybridPerChannel();
265     prepareQ8iHybridPerChannel();
266     _prepared = true;
267   }
268   else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
269   {
270     if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
271     {
272       prepareQ8i();
273       _prepared = true;
274     }
275   }
276   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
277            !_input->is_dynamic() && !_output->is_dynamic())
278   {
279     const bool per_channel_quantized = _kernel->data_scales().size() > 1;
280     if (per_channel_quantized)
281     {
282       prepareQ8uPerChannel();
283       _prepared = true;
284     }
285   }
286 }
287
288 void DepthwiseConvolutionLayer::run()
289 {
290   if (_is_hybrid)
291   {
292     convQ8iHybridPerChannel();
293   }
294   else if (_input->data_type() == OperandType::FLOAT32)
295   {
296     convFloat32();
297   }
298   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
299   {
300     const bool per_channel_quantized = _kernel->data_scales().size() > 1;
301     if (per_channel_quantized)
302       convQ8uPerChannel();
303     else
304       convQ8uPerTensor();
305   }
306   else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
307   {
308     convQ8i();
309   }
310   else
311   {
312     throw std::runtime_error{"DepthwiseConv: unsupported data type"};
313   }
314 }
315
316 } // namespace ops
317 } // namespace cpu
318 } // namespace backend
319 } // namespace onert