2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "kernels/DepthwiseConv2D.h"
19 #include "kernels/Utils.h"
21 #include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
22 #include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
26 namespace luci_interpreter
31 DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
32 Tensor *output, const DepthwiseConv2DParams ¶ms)
33 : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
37 void DepthwiseConv2D::configure()
39 // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
40 // | input filter bias output |
41 // ----+---------------------------+
42 // (1) | float float float float |
43 // (2) | float int8 float float | hybrid
44 // (3) | uint8 uint8 int32 uint8 | quantized
45 // (4) | int8 int8 int32 int8 | quantized per channel
46 // (5) | int16 int8 int64 int16 | quantized per channel 16x8
48 // We only support (1) and (3) for now, and additionally the following:
49 // | input filter bias output |
50 // ----+---------------------------+
51 // (5) | int16 int16 int64 int16 |
53 if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
55 LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
57 else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
59 LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
61 else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
63 LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
67 throw std::runtime_error("Unsupported type.");
69 LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
71 const Shape &input_shape = input()->shape();
72 const Shape &filter_shape = filter()->shape();
73 LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
75 const int32_t batches = input_shape.dim(0);
76 const int32_t input_height = input_shape.dim(1);
77 const int32_t input_width = input_shape.dim(2);
78 // Filter format: [1, H, W, O].
79 LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
80 const int32_t filter_height = filter_shape.dim(1);
81 const int32_t filter_width = filter_shape.dim(2);
82 const int32_t channels_out = filter_shape.dim(3);
84 LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
85 bias()->shape().dim(0) == channels_out));
87 const int32_t output_height =
88 computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
89 _params.dilation_height_factor);
90 const int32_t output_width =
91 computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
92 _params.dilation_width_factor);
94 _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
95 input_height, filter_height, output_height);
96 _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
97 filter_width, output_width);
99 output()->resize({batches, output_height, output_width, channels_out});
102 void DepthwiseConv2D::execute() const
104 switch (input()->element_type())
106 case DataType::FLOAT32:
107 if (filter()->element_type() == DataType::FLOAT32)
112 throw std::runtime_error("Unsupported type.");
114 if (filter()->scales().size() == 1)
118 else if (filter()->scales().size() > 1)
120 LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
121 LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
122 static_cast<size_t>(filter()->shape().dim(3)));
123 evalQuantizedPerChannel();
130 throw std::runtime_error("Unsupported type.");
134 void DepthwiseConv2D::evalFloat() const
136 float activation_min{};
137 float activation_max{};
138 calculateActivationRange(_params.activation, &activation_min, &activation_max);
140 tflite::DepthwiseParams params{};
141 params.padding_values.height = _padding_height;
142 params.padding_values.width = _padding_width;
143 params.stride_height = _params.stride_height;
144 params.stride_width = _params.stride_width;
145 params.dilation_height_factor = _params.dilation_height_factor;
146 params.dilation_width_factor = _params.dilation_width_factor;
147 params.depth_multiplier = _params.depth_multiplier;
148 params.float_activation_min = activation_min;
149 params.float_activation_max = activation_max;
151 tflite::reference_ops::DepthwiseConv(
152 params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
153 getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
154 getTensorShape(output()), getTensorData<float>(output()));
157 void DepthwiseConv2D::evalQuantizedPerChannel() const
159 const auto *input_data = getTensorData<uint8_t>(input());
160 const auto *filter_data = getTensorData<uint8_t>(filter());
161 const auto *bias_data = getTensorData<int32_t>(bias());
162 auto *output_data = getTensorData<uint8_t>(output());
164 const Shape &input_shape = input()->shape();
165 const Shape &filter_shape = filter()->shape();
166 const Shape &output_shape = output()->shape();
168 const int32_t batches = input_shape.dim(0);
169 const int32_t input_height = input_shape.dim(1);
170 const int32_t input_width = input_shape.dim(2);
171 const int32_t input_depth = input_shape.dim(3);
172 const int32_t filter_height = filter_shape.dim(1);
173 const int32_t filter_width = filter_shape.dim(2);
174 const int32_t output_height = output_shape.dim(1);
175 const int32_t output_width = output_shape.dim(2);
177 const int32_t stride_height = _params.stride_height;
178 const int32_t stride_width = _params.stride_width;
179 const int32_t dilation_height_factor = _params.dilation_height_factor;
180 const int32_t dilation_width_factor = _params.dilation_width_factor;
181 const int32_t depth_multiplier = _params.depth_multiplier;
183 int32_t activation_min{};
184 int32_t activation_max{};
185 calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
187 const std::vector<double> effective_output_scales =
188 getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
190 std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
191 quantizeMultipliers(effective_output_scales);
192 BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
194 for (int batch = 0; batch < batches; ++batch)
196 for (int out_y = 0; out_y < output_height; ++out_y)
198 for (int out_x = 0; out_x < output_width; ++out_x)
200 for (int in_channel = 0; in_channel < input_depth; ++in_channel)
202 for (int m = 0; m < depth_multiplier; ++m)
204 const int output_channel = m + in_channel * depth_multiplier;
205 const int in_x_origin = (out_x * stride_width) - _padding_width;
206 const int in_y_origin = (out_y * stride_height) - _padding_height;
208 for (int filter_y = 0; filter_y < filter_height; ++filter_y)
210 for (int filter_x = 0; filter_x < filter_width; ++filter_x)
212 const int in_x = in_x_origin + dilation_width_factor * filter_x;
213 const int in_y = in_y_origin + dilation_height_factor * filter_y;
214 // Zero padding by omitting the areas outside the image.
215 const bool is_point_inside_image =
216 (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
217 if (is_point_inside_image)
220 input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
222 filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
223 acc += (filter_val - filter()->zero_points()[output_channel]) *
224 (input_val - input()->zero_point());
230 acc += bias_data[output_channel];
232 int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
233 int output_shift = quant_multipliers[output_channel].shift;
235 tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
236 scaled_acc += output()->zero_point();
237 scaled_acc = std::max(scaled_acc, activation_min);
238 scaled_acc = std::min(scaled_acc, activation_max);
239 output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
240 static_cast<uint8_t>(scaled_acc);
248 void DepthwiseConv2D::evalQuantized() const
250 const auto input_scale = static_cast<double>(input()->scale());
251 const auto filter_scale = static_cast<double>(filter()->scale());
252 const auto output_scale = static_cast<double>(output()->scale());
254 const double real_multiplier = input_scale * filter_scale / output_scale;
255 int32_t output_multiplier{};
257 quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
259 int32_t activation_min{};
260 int32_t activation_max{};
261 calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
263 tflite::DepthwiseParams params{};
264 params.padding_values.height = _padding_height;
265 params.padding_values.width = _padding_width;
266 params.stride_height = _params.stride_height;
267 params.stride_width = _params.stride_width;
268 params.dilation_height_factor = _params.dilation_height_factor;
269 params.dilation_width_factor = _params.dilation_width_factor;
270 params.depth_multiplier = _params.depth_multiplier;
271 // The kernel expects input and filter zero points to be negated.
272 params.input_offset = -input()->zero_point(); // Note the '-'.
273 params.weights_offset = -filter()->zero_point(); // Note the '-'.
274 params.output_offset = output()->zero_point();
275 params.output_multiplier = output_multiplier;
276 params.output_shift = output_shift;
277 params.quantized_activation_min = activation_min;
278 params.quantized_activation_max = activation_max;
280 tflite::reference_ops::DepthwiseConv(
281 params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
282 getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
283 getTensorShape(output()), getTensorData<uint8_t>(output()));
286 void DepthwiseConv2D::evalQuantizedS16() const
288 const auto *input_data = getTensorData<int16_t>(input());
289 const auto *filter_data = getTensorData<int16_t>(filter());
290 const auto *bias_data = getTensorData<int64_t>(bias());
291 auto *output_data = getTensorData<int16_t>(output());
293 const Shape &input_shape = input()->shape();
294 const Shape &filter_shape = filter()->shape();
295 const Shape &output_shape = output()->shape();
297 const int32_t batches = input_shape.dim(0);
298 const int32_t input_height = input_shape.dim(1);
299 const int32_t input_width = input_shape.dim(2);
300 const int32_t input_depth = input_shape.dim(3);
301 const int32_t filter_height = filter_shape.dim(1);
302 const int32_t filter_width = filter_shape.dim(2);
303 const int32_t output_height = output_shape.dim(1);
304 const int32_t output_width = output_shape.dim(2);
306 const int32_t stride_height = _params.stride_height;
307 const int32_t stride_width = _params.stride_width;
308 const int32_t dilation_height_factor = _params.dilation_height_factor;
309 const int32_t dilation_width_factor = _params.dilation_width_factor;
310 const int32_t depth_multiplier = _params.depth_multiplier;
312 const std::vector<double> effective_output_scales =
313 getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
315 std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
316 quantizeMultipliers(effective_output_scales);
318 BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
320 int32_t activation_min{};
321 int32_t activation_max{};
322 calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
324 for (int32_t batch = 0; batch < batches; ++batch)
326 for (int32_t out_y = 0; out_y < output_height; ++out_y)
328 for (int32_t out_x = 0; out_x < output_width; ++out_x)
330 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
332 for (int32_t m = 0; m < depth_multiplier; ++m)
334 const int32_t out_c = m + in_c * depth_multiplier;
335 const int32_t in_y_origin = out_y * stride_height - _padding_height;
336 const int32_t in_x_origin = out_x * stride_width - _padding_width;
338 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
340 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
342 const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
343 const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
344 if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
346 const int16_t input_val =
347 input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
348 const int16_t filter_val =
349 filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
350 acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
354 if (bias_data != nullptr)
356 acc += bias_data[out_c];
359 int32_t output_multiplier = quant_multipliers[out_c].multiplier;
360 int output_shift = quant_multipliers[out_c].shift;
362 tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
364 scaled_acc = std::max(scaled_acc, activation_min);
365 scaled_acc = std::min(scaled_acc, activation_max);
367 output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
375 } // namespace kernels
376 } // namespace luci_interpreter