2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "kernels/TransposeConv.h"
20 #include "kernels/Utils.h"
22 #include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
26 namespace luci_interpreter
32 TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
33 const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
34 const TransposeConvParams ¶ms)
35 : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
36 {output, scratch_tensor}, params)
40 TransposeConv::~TransposeConv()
42 // Define destructor here, to delete vector of qunatized multipliers properly
45 void TransposeConv::configure()
47 assert(output_shape()->shape().num_dims() == 1);
48 assert(input()->shape().num_dims() == 4);
49 assert(filter()->shape().num_dims() == 4);
50 assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
51 input()->element_type() == DataType::S16);
52 assert(input()->element_type() == output()->element_type());
53 assert(input()->shape().dim(3) == filter()->shape().dim(3));
55 const int num_dims = output_shape()->shape().dim(0);
56 Shape out_shape(num_dims);
57 const auto *shape_data = getTensorData<int32_t>(output_shape());
58 for (int i = 0; i < num_dims; i++)
59 out_shape.dim(i) = shape_data[i];
60 output()->resize(out_shape);
62 const int32_t filter_height = filter()->shape().dim(1);
63 const int32_t filter_width = filter()->shape().dim(2);
64 const int32_t output_height = out_shape.dim(1);
65 const int32_t output_width = out_shape.dim(2);
67 const int32_t unused_output_height =
68 computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
69 const int32_t unused_output_width =
70 computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
73 computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
75 computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
77 if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
79 auto scratch_tensor = getOutputTensors()[1];
80 scratch_tensor->resize(output()->shape());
81 const std::vector<double> real_multipliers =
82 getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
84 _quant_multipliers = quantizeMultipliers(real_multipliers);
88 auto scratch_tensor = getOutputTensors()[1];
89 scratch_tensor->set_allocatable(false);
93 void TransposeConv::execute() const
95 switch (input()->element_type())
97 case DataType::FLOAT32:
101 if (filter()->scales().size() == 1)
105 else if (filter()->scales().size() > 1)
107 LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
108 LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
109 static_cast<size_t>(filter()->shape().dim(0)));
110 evalQuantizedPerChannel();
117 throw std::runtime_error("Unsupported type.");
121 void TransposeConv::evalFloat() const
123 tflite::ConvParams op_params{};
124 op_params.padding_type = tflite::PaddingType::kSame;
125 op_params.padding_values.height = _padding_height;
126 op_params.padding_values.width = _padding_width;
127 op_params.stride_height = params().stride_height;
128 op_params.stride_width = params().stride_width;
129 tflite::reference_ops::TransposeConv(op_params, //
130 getTensorShape(input()), getTensorData<float>(input()), //
131 getTensorShape(filter()), getTensorData<float>(filter()), //
132 getTensorShape(bias()), getTensorData<float>(bias()), //
133 getTensorShape(output()), getTensorData<float>(output()), //
134 tflite::RuntimeShape(), nullptr);
137 void TransposeConv::evalQuantized() const
139 tflite::ConvParams op_params{};
140 op_params.padding_type = tflite::PaddingType::kSame;
141 op_params.padding_values.height = _padding_height;
142 op_params.padding_values.width = _padding_width;
143 op_params.stride_height = params().stride_height;
144 op_params.stride_width = params().stride_width;
145 // The kernel expects input and filter zero points to be negated.
146 op_params.input_offset = -input()->zero_point(); // Note the '-'.
147 op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
148 op_params.output_offset = output()->zero_point();
149 op_params.output_multiplier = _quant_multipliers[0].multiplier;
150 op_params.output_shift = _quant_multipliers[0].shift;
151 op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
152 op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
154 auto scratch_tensor = getOutputTensors()[1];
156 tflite::reference_ops::TransposeConv(op_params, //
157 getTensorShape(input()), getTensorData<uint8>(input()), //
158 getTensorShape(filter()), getTensorData<uint8>(filter()), //
159 getTensorShape(bias()), getTensorData<int32_t>(bias()), //
160 getTensorShape(output()), getTensorData<uint8>(output()), //
161 tflite::RuntimeShape(), nullptr, //
162 getTensorData<int32_t>(scratch_tensor));
165 void TransposeConv::evalQuantizedPerChannel() const
167 const auto *input_data = getTensorData<uint8_t>(input());
168 const auto *filter_data = getTensorData<uint8_t>(filter());
169 const auto *bias_data = getTensorData<int32_t>(bias());
170 auto *output_data = getTensorData<uint8_t>(output());
172 auto scratch_tensor = getOutputTensors()[1];
173 auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
175 const Shape &input_shape = input()->shape();
176 const Shape &filter_shape = filter()->shape();
177 const Shape &output_shape = output()->shape();
179 const int32_t batches = input_shape.dim(0);
180 const int32_t input_height = input_shape.dim(1);
181 const int32_t input_width = input_shape.dim(2);
182 const int32_t input_depth = input_shape.dim(3);
183 const int32_t output_depth = filter_shape.dim(0);
184 const int32_t filter_height = filter_shape.dim(1);
185 const int32_t filter_width = filter_shape.dim(2);
186 const int32_t output_height = output_shape.dim(1);
187 const int32_t output_width = output_shape.dim(2);
189 const int32_t stride_height = _params.stride_height;
190 const int32_t stride_width = _params.stride_width;
192 int32_t activation_min{};
193 int32_t activation_max{};
194 calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
196 std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
198 BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
199 for (int32_t batch = 0; batch < batches; ++batch)
201 for (int32_t in_y = 0; in_y < input_height; ++in_y)
203 for (int32_t in_x = 0; in_x < input_width; ++in_x)
205 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
207 const int32_t out_y_origin = in_y * stride_height - _padding_height;
208 const int32_t out_x_origin = in_x * stride_width - _padding_width;
209 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
211 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
213 const int32_t out_x = out_x_origin + filter_x;
214 const int32_t out_y = out_y_origin + filter_y;
215 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
217 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
219 const uint8_t input_val =
220 input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
221 const uint8_t filter_val =
222 filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
223 scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
224 static_cast<int32_t>(input_val - input()->zero_point()) *
225 static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
233 for (int32_t out_y = 0; out_y < output_height; ++out_y)
235 for (int32_t out_x = 0; out_x < output_width; ++out_x)
237 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
239 int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
242 acc += bias_data[out_c];
245 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
246 acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
248 scaled_acc += output()->zero_point();
249 scaled_acc = std::max(scaled_acc, activation_min);
250 scaled_acc = std::min(scaled_acc, activation_max);
252 output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
259 void TransposeConv::evalQuantizedS16() const
261 const auto *input_data = getTensorData<int16_t>(input());
262 const auto *filter_data = getTensorData<int16_t>(filter());
263 const auto *bias_data = getTensorData<int64_t>(bias());
264 auto *output_data = getTensorData<int16_t>(output());
266 auto scratch_tensor = getOutputTensors()[1];
267 auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
269 const Shape &input_shape = input()->shape();
270 const Shape &filter_shape = filter()->shape();
271 const Shape &output_shape = output()->shape();
273 const int32_t batches = input_shape.dim(0);
274 const int32_t input_height = input_shape.dim(1);
275 const int32_t input_width = input_shape.dim(2);
276 const int32_t input_depth = input_shape.dim(3);
277 const int32_t output_depth = filter_shape.dim(0);
278 const int32_t filter_height = filter_shape.dim(1);
279 const int32_t filter_width = filter_shape.dim(2);
280 const int32_t output_height = output_shape.dim(1);
281 const int32_t output_width = output_shape.dim(2);
283 const int32_t stride_height = _params.stride_height;
284 const int32_t stride_width = _params.stride_width;
286 int32_t activation_min{};
287 int32_t activation_max{};
288 calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
290 std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
292 BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
293 for (int32_t batch = 0; batch < batches; ++batch)
295 for (int32_t in_y = 0; in_y < input_height; ++in_y)
297 for (int32_t in_x = 0; in_x < input_width; ++in_x)
299 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
301 const int32_t out_y_origin = in_y * stride_height - _padding_height;
302 const int32_t out_x_origin = in_x * stride_width - _padding_width;
303 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
305 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
307 const int32_t out_x = out_x_origin + filter_x;
308 const int32_t out_y = out_y_origin + filter_y;
309 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
311 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
313 const int16_t input_val =
314 input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
315 const int16_t filter_val =
316 filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
317 scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
318 static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
326 for (int32_t out_y = 0; out_y < output_height; ++out_y)
328 for (int32_t out_x = 0; out_x < output_width; ++out_x)
330 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
332 int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
335 acc += bias_data[out_c];
337 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
338 acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
340 scaled_acc = std::max(scaled_acc, activation_min);
341 scaled_acc = std::min(scaled_acc, activation_max);
343 output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
350 } // namespace kernels
351 } // namespace luci_interpreter