2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "DepthwiseConvolutionLayer.h"
19 #include "cker/PortableTensorUtils.h"
20 #include <cker/operation/DepthwiseConv.h>
31 void DepthwiseConvolutionLayer::convFloat32()
33 float output_activation_min = 0, output_activation_max = 0;
34 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
36 nnfw::cker::DepthwiseConvParams op_params;
37 op_params.stride_width = _strideWidth;
38 op_params.stride_height = _strideHeight;
39 op_params.dilation_width_factor = _dilationWidth;
40 op_params.dilation_height_factor = _dilationHeight;
41 op_params.padding_values.width = _paddingLeft;
42 op_params.padding_values.height = _paddingTop;
43 op_params.depth_multiplier = _multiplier;
44 op_params.float_activation_min = output_activation_min;
45 op_params.float_activation_max = output_activation_max;
47 nnfw::cker::DepthwiseConv<float, float>(
48 op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
49 getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
50 getBuffer<float>(_output), _external_context->ruy_context());
53 void DepthwiseConvolutionLayer::convQ8uPerTensor()
55 int32_t output_activation_min = 0;
56 int32_t output_activation_max = 0;
57 CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
58 &output_activation_max);
60 double real_multiplier = 0.0;
61 int32_t output_multiplier = 0;
62 int32_t output_shift = 0;
63 GetQuantizedConvolutionMultiplier(_input, _kernel, _bias, _output, &real_multiplier);
64 QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
66 nnfw::cker::DepthwiseConvParams op_params;
67 op_params.stride_width = _strideWidth;
68 op_params.stride_height = _strideHeight;
69 op_params.dilation_width_factor = _dilationWidth;
70 op_params.dilation_height_factor = _dilationHeight;
71 op_params.padding_values.width = _paddingLeft;
72 op_params.padding_values.height = _paddingTop;
73 op_params.depth_multiplier = _multiplier;
74 op_params.input_offset = -_input->data_zero_point();
75 op_params.weights_offset = -_kernel->data_zero_point();
76 op_params.output_offset = _output->data_zero_point();
77 op_params.output_multiplier = output_multiplier;
78 op_params.output_shift = output_shift;
79 op_params.quantized_activation_min = output_activation_min;
80 op_params.quantized_activation_max = output_activation_max;
82 nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
83 op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
84 getBuffer<uint8_t>(_kernel), getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output),
85 getBuffer<uint8_t>(_output), _external_context->ruy_context());
88 void DepthwiseConvolutionLayer::convQ8uPerChannel()
90 nnfw::cker::DepthwiseConvParams op_params;
91 op_params.padding_values.width = _paddingLeft;
92 op_params.padding_values.height = _paddingTop;
93 op_params.stride_width = _strideWidth;
94 op_params.stride_height = _strideHeight;
95 op_params.dilation_width_factor = _dilationWidth;
96 op_params.dilation_height_factor = _dilationHeight;
97 op_params.depth_multiplier = _multiplier;
98 op_params.input_offset = -_input->data_zero_point();
99 op_params.output_offset = _output->data_zero_point();
100 int32_t output_activation_min = 0;
101 int32_t output_activation_max = 0;
102 CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
103 &output_activation_max);
104 op_params.quantized_activation_min = output_activation_min;
105 op_params.quantized_activation_max = output_activation_max;
106 // NOTE: The following fields of ConvParams are not used:
107 // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
109 nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel(
110 op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
111 getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),
112 _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),
113 getShape(_output), getBuffer<uint8_t>(_output));
116 void DepthwiseConvolutionLayer::convQ8i()
124 int32_t output_activation_min = 0;
125 int32_t output_activation_max = 0;
126 CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
127 &output_activation_max);
129 nnfw::cker::DepthwiseConvParams op_params;
130 op_params.padding_type = nnfw::cker::PaddingType::kSame;
131 op_params.padding_values.width = _paddingLeft;
132 op_params.padding_values.height = _paddingTop;
133 op_params.depth_multiplier = _multiplier;
134 op_params.stride_width = _strideWidth;
135 op_params.stride_height = _strideHeight;
136 op_params.dilation_width_factor = _dilationWidth;
137 op_params.dilation_height_factor = _dilationHeight;
138 op_params.input_offset = -_input->data_zero_point();
139 op_params.weights_offset = 0;
140 op_params.output_offset = _output->data_zero_point();
141 op_params.quantized_activation_min = output_activation_min;
142 op_params.quantized_activation_max = output_activation_max;
144 nnfw::cker::optimized_integer_ops::DepthwiseConvPerChannel(
145 op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
146 getShape(_input), getBuffer<int8_t>(_input), getShape(_kernel), getBuffer<int8_t>(_kernel),
147 getShape(_bias), getBuffer<int32_t>(_bias), getShape(_output), getBuffer<int8_t>(_output),
148 _external_context->ruy_context());
151 void DepthwiseConvolutionLayer::convQ8iHybridPerChannel()
155 prepareQ8iHybridPerChannel();
159 float output_activation_min = 0, output_activation_max = 0;
160 CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
162 auto input_shape = getShape(_input);
163 const int batch_size = input_shape.Dims(0);
164 const int input_size = input_shape.FlatSize() / batch_size;
166 auto scaling_factors_ptr = _input_scaling_factors.data();
167 auto input_offsets_ptr = _input_offsets.data();
169 for (int b = 0; b < batch_size; ++b)
171 const int offset = b * input_size;
172 nnfw::cker::PortableAsymmetricQuantizeFloats(getBuffer<float>(_input) + offset, input_size,
173 _input_quantized.data() + offset,
174 &scaling_factors_ptr[b], &input_offsets_ptr[b]);
177 nnfw::cker::DepthwiseConvParams op_params;
178 op_params.padding_values.width = _paddingLeft;
179 op_params.padding_values.height = _paddingTop;
180 op_params.depth_multiplier = _multiplier;
181 op_params.stride_width = _strideWidth;
182 op_params.stride_height = _strideHeight;
183 op_params.dilation_width_factor = _dilationWidth;
184 op_params.dilation_height_factor = _dilationHeight;
185 op_params.float_activation_min = output_activation_min;
186 op_params.float_activation_max = output_activation_max;
188 nnfw::cker::reference_integer_ops::DepthwiseConvHybridPerChannel(
189 op_params, _input_scaling_factors.data(), getShape(_input), _input_quantized.data(),
190 getShape(_kernel), getBuffer<int8_t>(_kernel), getShape(_bias), getBuffer<float>(_bias),
191 getShape(_output), getBuffer<float>(_output), _kernel->data_scales().data(),
192 _input_offsets.data());
195 void DepthwiseConvolutionLayer::prepareQ8i()
197 GetQuantizedConvolutionMultipliersAndShifts(
198 _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
199 _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
200 _per_channel_output_shift);
203 void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
205 GetQuantizedConvolutionMultipliersAndShifts(
206 _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
207 _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
208 _per_channel_output_shift);
211 void DepthwiseConvolutionLayer::prepareQ8iHybridPerChannel()
213 // allocate memory for activation quantization.
214 // - quantized values (int8_t type and same shape of original input)
215 // - quantization params (= scale/zeropoint for each input)
216 auto input_shape = getShape(_input);
217 const int batch_size = input_shape.Dims(0);
218 const int input_size = input_shape.FlatSize() / batch_size;
219 _input_quantized.resize(input_size);
220 // TODO: Optimize the case of batch_size = 1
221 _input_scaling_factors.resize(batch_size);
222 _input_offsets.resize(batch_size);
225 void DepthwiseConvolutionLayer::ensureQ8iHybridPerChannel()
227 // ensure weight is per-channel quantized.
228 int32_t kernel_input_channel = getShape(_kernel).Dims(3);
229 // zero_points comes from flatbuffer vector. Its size is within uint32_t range.
230 size_t kernel_zerop_cnt = _kernel->data_scales().size();
231 // promote to int64_t to compare int32_t and uint32_t
232 if ((int64_t)kernel_input_channel != (int64_t)kernel_zerop_cnt)
233 throw std::runtime_error{"DConv2D hybrid supports only per-channel quantized weight."};
236 void DepthwiseConvolutionLayer::configure(
237 const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
238 const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
239 const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
240 const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
241 const ir::Activation activation, IPortableTensor *output,
242 const std::shared_ptr<ExternalContext> &external_context)
247 _paddingLeft = paddingLeft;
248 _paddingRight = paddingRight;
249 _paddingTop = paddingTop;
250 _paddingBottom = paddingBottom;
251 _strideWidth = strideWidth;
252 _strideHeight = strideHeight;
253 _multiplier = multiplier;
254 _dilationWidth = dilationWidth;
255 _dilationHeight = dilationHeight;
256 _activation = activation;
258 _external_context = external_context;
259 _is_hybrid = _input->data_type() == OperandType::FLOAT32 &&
260 _kernel->data_type() == OperandType::QUANT_INT8_SYMM;
264 ensureQ8iHybridPerChannel();
265 prepareQ8iHybridPerChannel();
268 else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
270 if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
276 else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
277 !_input->is_dynamic() && !_output->is_dynamic())
279 const bool per_channel_quantized = _kernel->data_scales().size() > 1;
280 if (per_channel_quantized)
282 prepareQ8uPerChannel();
288 void DepthwiseConvolutionLayer::run()
292 convQ8iHybridPerChannel();
294 else if (_input->data_type() == OperandType::FLOAT32)
298 else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
300 const bool per_channel_quantized = _kernel->data_scales().size() > 1;
301 if (per_channel_quantized)
306 else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
312 throw std::runtime_error{"DepthwiseConv: unsupported data type"};
318 } // namespace backend