2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "kernels/DepthwiseConv2D.h"
19 #include "kernels/Utils.h"
21 #include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
22 #include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
26 namespace luci_interpreter
31 DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
32 Tensor *output, const DepthwiseConv2DParams ¶ms)
33 : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
37 void DepthwiseConv2D::configure()
39 // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
40 // | input filter bias output |
41 // ----+---------------------------+
42 // (1) | float float float float |
43 // (2) | float int8 float float | hybrid
44 // (3) | uint8 uint8 int32 uint8 | quantized
45 // (4) | int8 int8 int32 int8 | quantized per channel
46 // (5) | int16 int8 int64 int16 | quantized per channel 16x8
48 // We only support (1) and (3) for now, and additionally the following:
49 // | input filter bias output |
50 // ----+---------------------------+
51 // (5) | int16 int16 int64 int16 |
53 if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
55 LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
57 else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
59 LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
61 else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
63 LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
67 throw std::runtime_error("Unsupported type.");
69 LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
71 const Shape &input_shape = input()->shape();
72 const Shape &filter_shape = filter()->shape();
73 LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
75 const int32_t batches = input_shape.dim(0);
76 const int32_t input_height = input_shape.dim(1);
77 const int32_t input_width = input_shape.dim(2);
78 // Filter format: [1, H, W, O].
79 LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
80 const int32_t filter_height = filter_shape.dim(1);
81 const int32_t filter_width = filter_shape.dim(2);
82 const int32_t channels_out = filter_shape.dim(3);
84 LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
85 bias()->shape().dim(0) == channels_out));
87 const int32_t output_height =
88 computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
89 _params.dilation_height_factor);
90 const int32_t output_width =
91 computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
92 _params.dilation_width_factor);
94 _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
95 input_height, filter_height, output_height);
96 _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
97 filter_width, output_width);
99 output()->resize({batches, output_height, output_width, channels_out});
102 void DepthwiseConv2D::execute() const
104 switch (input()->element_type())
106 case DataType::FLOAT32:
107 if (filter()->element_type() == DataType::FLOAT32)
112 throw std::runtime_error("Unsupported type.");
120 throw std::runtime_error("Unsupported type.");
124 void DepthwiseConv2D::evalFloat() const
126 float activation_min{};
127 float activation_max{};
128 calculateActivationRange(_params.activation, &activation_min, &activation_max);
130 tflite::DepthwiseParams params{};
131 params.padding_values.height = _padding_height;
132 params.padding_values.width = _padding_width;
133 params.stride_height = _params.stride_height;
134 params.stride_width = _params.stride_width;
135 params.dilation_height_factor = _params.dilation_height_factor;
136 params.dilation_width_factor = _params.dilation_width_factor;
137 params.depth_multiplier = _params.depth_multiplier;
138 params.float_activation_min = activation_min;
139 params.float_activation_max = activation_max;
141 tflite::reference_ops::DepthwiseConv(
142 params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
143 getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
144 getTensorShape(output()), getTensorData<float>(output()));
147 void DepthwiseConv2D::evalQuantized() const
149 const auto input_scale = static_cast<double>(input()->scale());
150 const auto filter_scale = static_cast<double>(filter()->scale());
151 const auto output_scale = static_cast<double>(output()->scale());
153 const double real_multiplier = input_scale * filter_scale / output_scale;
154 int32_t output_multiplier{};
156 quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
158 int32_t activation_min{};
159 int32_t activation_max{};
160 calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
162 tflite::DepthwiseParams params{};
163 params.padding_values.height = _padding_height;
164 params.padding_values.width = _padding_width;
165 params.stride_height = _params.stride_height;
166 params.stride_width = _params.stride_width;
167 params.dilation_height_factor = _params.dilation_height_factor;
168 params.dilation_width_factor = _params.dilation_width_factor;
169 params.depth_multiplier = _params.depth_multiplier;
170 // The kernel expects input and filter zero points to be negated.
171 params.input_offset = -input()->zero_point(); // Note the '-'.
172 params.weights_offset = -filter()->zero_point(); // Note the '-'.
173 params.output_offset = output()->zero_point();
174 params.output_multiplier = output_multiplier;
175 params.output_shift = output_shift;
176 params.quantized_activation_min = activation_min;
177 params.quantized_activation_max = activation_max;
179 tflite::reference_ops::DepthwiseConv(
180 params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
181 getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
182 getTensorShape(output()), getTensorData<uint8_t>(output()));
185 void DepthwiseConv2D::evalQuantizedS16() const
187 const auto *input_data = getTensorData<int16_t>(input());
188 const auto *filter_data = getTensorData<int16_t>(filter());
189 const auto *bias_data = getTensorData<int64_t>(bias());
190 auto *output_data = getTensorData<int16_t>(output());
192 const Shape &input_shape = input()->shape();
193 const Shape &filter_shape = filter()->shape();
194 const Shape &output_shape = output()->shape();
196 const int32_t batches = input_shape.dim(0);
197 const int32_t input_height = input_shape.dim(1);
198 const int32_t input_width = input_shape.dim(2);
199 const int32_t input_depth = input_shape.dim(3);
200 const int32_t filter_height = filter_shape.dim(1);
201 const int32_t filter_width = filter_shape.dim(2);
202 const int32_t output_height = output_shape.dim(1);
203 const int32_t output_width = output_shape.dim(2);
205 const int32_t stride_height = _params.stride_height;
206 const int32_t stride_width = _params.stride_width;
207 const int32_t dilation_height_factor = _params.dilation_height_factor;
208 const int32_t dilation_width_factor = _params.dilation_width_factor;
209 const int32_t depth_multiplier = _params.depth_multiplier;
211 const std::vector<double> effective_output_scales =
212 getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
214 std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
215 quantizeMultipliers(effective_output_scales);
217 BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
219 int32_t activation_min{};
220 int32_t activation_max{};
221 calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
223 for (int32_t batch = 0; batch < batches; ++batch)
225 for (int32_t out_y = 0; out_y < output_height; ++out_y)
227 for (int32_t out_x = 0; out_x < output_width; ++out_x)
229 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
231 for (int32_t m = 0; m < depth_multiplier; ++m)
233 const int32_t out_c = m + in_c * depth_multiplier;
234 const int32_t in_y_origin = out_y * stride_height - _padding_height;
235 const int32_t in_x_origin = out_x * stride_width - _padding_width;
237 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
239 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
241 const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
242 const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
243 if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
245 const int16_t input_val =
246 input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
247 const int16_t filter_val =
248 filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
249 acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
253 if (bias_data != nullptr)
255 acc += bias_data[out_c];
258 int32_t output_multiplier = quant_multipliers[out_c].multiplier;
259 int output_shift = quant_multipliers[out_c].shift;
261 tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
263 scaled_acc = std::max(scaled_acc, activation_min);
264 scaled_acc = std::min(scaled_acc, activation_max);
266 output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
274 } // namespace kernels
275 } // namespace luci_interpreter