2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "kernels/TransposeConv.h"
20 #include "kernels/Utils.h"
22 #include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
26 namespace luci_interpreter
32 TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
33 const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms)
34 : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
38 TransposeConv::~TransposeConv()
40 // Define destructor here, to delete vector of qunatized multipliers properly
43 void TransposeConv::configure()
45 assert(output_shape()->shape().num_dims() == 1);
46 assert(input()->shape().num_dims() == 4);
47 assert(filter()->shape().num_dims() == 4);
48 assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
49 input()->element_type() == DataType::S16);
50 assert(input()->element_type() == output()->element_type());
51 assert(input()->shape().dim(3) == filter()->shape().dim(3));
53 const int num_dims = output_shape()->shape().dim(0);
54 Shape out_shape(num_dims);
55 const auto *shape_data = getTensorData<int32_t>(output_shape());
56 for (int i = 0; i < num_dims; i++)
57 out_shape.dim(i) = shape_data[i];
58 output()->resize(out_shape);
60 const int32_t filter_height = filter()->shape().dim(1);
61 const int32_t filter_width = filter()->shape().dim(2);
62 const int32_t output_height = out_shape.dim(1);
63 const int32_t output_width = out_shape.dim(2);
65 const int32_t unused_output_height =
66 computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
67 const int32_t unused_output_width =
68 computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
71 computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
73 computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
75 if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
77 DataType scratch_data_type =
78 input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
80 std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, "");
81 const std::vector<double> real_multipliers =
82 getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
84 _quant_multipliers = quantizeMultipliers(real_multipliers);
88 void TransposeConv::execute() const
90 switch (input()->element_type())
92 case DataType::FLOAT32:
96 if (filter()->scales().size() == 1)
100 else if (filter()->scales().size() > 1)
102 LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
103 LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
104 static_cast<size_t>(filter()->shape().dim(0)));
105 evalQuantizedPerChannel();
112 throw std::runtime_error("Unsupported type.");
114 if (!!_scratch_tensor)
115 _scratch_tensor->deallocate();
118 void TransposeConv::evalFloat() const
120 tflite::ConvParams op_params{};
121 op_params.padding_type = tflite::PaddingType::kSame;
122 op_params.padding_values.height = _padding_height;
123 op_params.padding_values.width = _padding_width;
124 op_params.stride_height = params().stride_height;
125 op_params.stride_width = params().stride_width;
126 tflite::reference_ops::TransposeConv(op_params, //
127 getTensorShape(input()), getTensorData<float>(input()), //
128 getTensorShape(filter()), getTensorData<float>(filter()), //
129 getTensorShape(bias()), getTensorData<float>(bias()), //
130 getTensorShape(output()), getTensorData<float>(output()), //
131 tflite::RuntimeShape(), nullptr);
134 void TransposeConv::evalQuantized() const
136 tflite::ConvParams op_params{};
137 op_params.padding_type = tflite::PaddingType::kSame;
138 op_params.padding_values.height = _padding_height;
139 op_params.padding_values.width = _padding_width;
140 op_params.stride_height = params().stride_height;
141 op_params.stride_width = params().stride_width;
142 // The kernel expects input and filter zero points to be negated.
143 op_params.input_offset = -input()->zero_point(); // Note the '-'.
144 op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
145 op_params.output_offset = output()->zero_point();
146 op_params.output_multiplier = _quant_multipliers[0].multiplier;
147 op_params.output_shift = _quant_multipliers[0].shift;
148 op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
149 op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
151 tflite::reference_ops::TransposeConv(op_params, //
152 getTensorShape(input()), getTensorData<uint8>(input()), //
153 getTensorShape(filter()), getTensorData<uint8>(filter()), //
154 getTensorShape(bias()), getTensorData<int32_t>(bias()), //
155 getTensorShape(output()), getTensorData<uint8>(output()), //
156 tflite::RuntimeShape(), nullptr, //
157 getTensorData<int32_t>(_scratch_tensor.get()));
160 void TransposeConv::evalQuantizedPerChannel() const
162 const auto *input_data = getTensorData<uint8_t>(input());
163 const auto *filter_data = getTensorData<uint8_t>(filter());
164 const auto *bias_data = getTensorData<int32_t>(bias());
165 auto *output_data = getTensorData<uint8_t>(output());
166 auto *scratch_data = getTensorData<int32_t>(_scratch_tensor.get());
168 const Shape &input_shape = input()->shape();
169 const Shape &filter_shape = filter()->shape();
170 const Shape &output_shape = output()->shape();
172 const int32_t batches = input_shape.dim(0);
173 const int32_t input_height = input_shape.dim(1);
174 const int32_t input_width = input_shape.dim(2);
175 const int32_t input_depth = input_shape.dim(3);
176 const int32_t output_depth = filter_shape.dim(0);
177 const int32_t filter_height = filter_shape.dim(1);
178 const int32_t filter_width = filter_shape.dim(2);
179 const int32_t output_height = output_shape.dim(1);
180 const int32_t output_width = output_shape.dim(2);
182 const int32_t stride_height = _params.stride_height;
183 const int32_t stride_width = _params.stride_width;
185 int32_t activation_min{};
186 int32_t activation_max{};
187 calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
189 std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int32_t));
191 BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
192 for (int32_t batch = 0; batch < batches; ++batch)
194 for (int32_t in_y = 0; in_y < input_height; ++in_y)
196 for (int32_t in_x = 0; in_x < input_width; ++in_x)
198 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
200 const int32_t out_y_origin = in_y * stride_height - _padding_height;
201 const int32_t out_x_origin = in_x * stride_width - _padding_width;
202 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
204 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
206 const int32_t out_x = out_x_origin + filter_x;
207 const int32_t out_y = out_y_origin + filter_y;
208 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
210 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
212 const uint8_t input_val =
213 input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
214 const uint8_t filter_val =
215 filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
216 scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
217 static_cast<int32_t>(input_val - input()->zero_point()) *
218 static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
226 for (int32_t out_y = 0; out_y < output_height; ++out_y)
228 for (int32_t out_x = 0; out_x < output_width; ++out_x)
230 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
232 int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
235 acc += bias_data[out_c];
238 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
239 acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
241 scaled_acc += output()->zero_point();
242 scaled_acc = std::max(scaled_acc, activation_min);
243 scaled_acc = std::min(scaled_acc, activation_max);
245 output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
252 void TransposeConv::evalQuantizedS16() const
254 const auto *input_data = getTensorData<int16_t>(input());
255 const auto *filter_data = getTensorData<int16_t>(filter());
256 const auto *bias_data = getTensorData<int64_t>(bias());
257 auto *output_data = getTensorData<int16_t>(output());
258 auto *scratch_data = getTensorData<int64_t>(_scratch_tensor.get());
260 const Shape &input_shape = input()->shape();
261 const Shape &filter_shape = filter()->shape();
262 const Shape &output_shape = output()->shape();
264 const int32_t batches = input_shape.dim(0);
265 const int32_t input_height = input_shape.dim(1);
266 const int32_t input_width = input_shape.dim(2);
267 const int32_t input_depth = input_shape.dim(3);
268 const int32_t output_depth = filter_shape.dim(0);
269 const int32_t filter_height = filter_shape.dim(1);
270 const int32_t filter_width = filter_shape.dim(2);
271 const int32_t output_height = output_shape.dim(1);
272 const int32_t output_width = output_shape.dim(2);
274 const int32_t stride_height = _params.stride_height;
275 const int32_t stride_width = _params.stride_width;
277 int32_t activation_min{};
278 int32_t activation_max{};
279 calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
281 std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int64_t));
283 BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
284 for (int32_t batch = 0; batch < batches; ++batch)
286 for (int32_t in_y = 0; in_y < input_height; ++in_y)
288 for (int32_t in_x = 0; in_x < input_width; ++in_x)
290 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
292 const int32_t out_y_origin = in_y * stride_height - _padding_height;
293 const int32_t out_x_origin = in_x * stride_width - _padding_width;
294 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
296 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
298 const int32_t out_x = out_x_origin + filter_x;
299 const int32_t out_y = out_y_origin + filter_y;
300 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
302 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
304 const int16_t input_val =
305 input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
306 const int16_t filter_val =
307 filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
308 scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
309 static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
317 for (int32_t out_y = 0; out_y < output_height; ++out_y)
319 for (int32_t out_x = 0; out_x < output_width; ++out_x)
321 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
323 int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
326 acc += bias_data[out_c];
328 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
329 acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
331 scaled_acc = std::max(scaled_acc, activation_min);
332 scaled_acc = std::min(scaled_acc, activation_max);
334 output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
341 } // namespace kernels
342 } // namespace luci_interpreter