b0ee905dc14bb9990b08235c1ebccfe1d2eec860
[platform/core/ml/nnfw.git] / compiler / luci-interpreter / src / kernels / TransposeConv.cpp
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *    http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "kernels/TransposeConv.h"
19
20 #include "kernels/Utils.h"
21
22 #include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
23
24 #include <stdexcept>
25
26 namespace luci_interpreter
27 {
28
29 namespace kernels
30 {
31
32 TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
33                              const Tensor *bias, Tensor *output, const TransposeConvParams &params)
34     : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
35 {
36 }
37
38 TransposeConv::~TransposeConv()
39 {
40   // Define destructor here, to delete vector of qunatized multipliers properly
41 }
42
43 void TransposeConv::configure()
44 {
45   assert(output_shape()->shape().num_dims() == 1);
46   assert(input()->shape().num_dims() == 4);
47   assert(filter()->shape().num_dims() == 4);
48   assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
49          input()->element_type() == DataType::S16);
50   assert(input()->element_type() == output()->element_type());
51   assert(input()->shape().dim(3) == filter()->shape().dim(3));
52
53   const int num_dims = output_shape()->shape().dim(0);
54   Shape out_shape(num_dims);
55   const auto *shape_data = getTensorData<int32_t>(output_shape());
56   for (int i = 0; i < num_dims; i++)
57     out_shape.dim(i) = shape_data[i];
58   output()->resize(out_shape);
59
60   const int32_t filter_height = filter()->shape().dim(1);
61   const int32_t filter_width = filter()->shape().dim(2);
62   const int32_t output_height = out_shape.dim(1);
63   const int32_t output_width = out_shape.dim(2);
64
65   const int32_t unused_output_height =
66       computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
67   const int32_t unused_output_width =
68       computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
69
70   _padding_height =
71       computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
72   _padding_width =
73       computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
74
75   if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
76   {
77     DataType scratch_data_type =
78         input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
79     _scratch_tensor =
80         std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, "");
81     const std::vector<double> real_multipliers =
82         getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
83
84     _quant_multipliers = quantizeMultipliers(real_multipliers);
85   }
86 }
87
88 void TransposeConv::execute() const
89 {
90   switch (input()->element_type())
91   {
92     case DataType::FLOAT32:
93       evalFloat();
94       break;
95     case DataType::U8:
96       evalQuantized();
97       break;
98     case DataType::S16:
99       evalQuantizedS16();
100       break;
101     default:
102       throw std::runtime_error("Unsupported type.");
103   }
104   if (!!_scratch_tensor)
105     _scratch_tensor->deallocate();
106 }
107
108 void TransposeConv::evalFloat() const
109 {
110   tflite::ConvParams op_params{};
111   op_params.padding_type = tflite::PaddingType::kSame;
112   op_params.padding_values.height = _padding_height;
113   op_params.padding_values.width = _padding_width;
114   op_params.stride_height = params().stride_height;
115   op_params.stride_width = params().stride_width;
116   tflite::reference_ops::TransposeConv(op_params,                                                //
117                                        getTensorShape(input()), getTensorData<float>(input()),   //
118                                        getTensorShape(filter()), getTensorData<float>(filter()), //
119                                        getTensorShape(bias()), getTensorData<float>(bias()),     //
120                                        getTensorShape(output()), getTensorData<float>(output()), //
121                                        tflite::RuntimeShape(), nullptr);
122 }
123
124 void TransposeConv::evalQuantized() const
125 {
126   tflite::ConvParams op_params{};
127   op_params.padding_type = tflite::PaddingType::kSame;
128   op_params.padding_values.height = _padding_height;
129   op_params.padding_values.width = _padding_width;
130   op_params.stride_height = params().stride_height;
131   op_params.stride_width = params().stride_width;
132   // The kernel expects input and filter zero points to be negated.
133   op_params.input_offset = -input()->zero_point();    // Note the '-'.
134   op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
135   op_params.output_offset = output()->zero_point();
136   op_params.output_multiplier = _quant_multipliers[0].multiplier;
137   op_params.output_shift = _quant_multipliers[0].shift;
138   op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
139   op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
140
141   tflite::reference_ops::TransposeConv(op_params,                                                //
142                                        getTensorShape(input()), getTensorData<uint8>(input()),   //
143                                        getTensorShape(filter()), getTensorData<uint8>(filter()), //
144                                        getTensorShape(bias()), getTensorData<int32_t>(bias()),   //
145                                        getTensorShape(output()), getTensorData<uint8>(output()), //
146                                        tflite::RuntimeShape(), nullptr,                          //
147                                        getTensorData<int32_t>(_scratch_tensor.get()));
148 }
149
150 void TransposeConv::evalQuantizedS16() const
151 {
152   const auto *input_data = getTensorData<int16_t>(input());
153   const auto *filter_data = getTensorData<int16_t>(filter());
154   const auto *bias_data = getTensorData<int64_t>(bias());
155   auto *output_data = getTensorData<int16_t>(output());
156   auto *scratch_data = getTensorData<int64_t>(_scratch_tensor.get());
157
158   const Shape &input_shape = input()->shape();
159   const Shape &filter_shape = filter()->shape();
160   const Shape &output_shape = output()->shape();
161
162   const int32_t batches = input_shape.dim(0);
163   const int32_t input_height = input_shape.dim(1);
164   const int32_t input_width = input_shape.dim(2);
165   const int32_t input_depth = input_shape.dim(3);
166   const int32_t output_depth = filter_shape.dim(0);
167   const int32_t filter_height = filter_shape.dim(1);
168   const int32_t filter_width = filter_shape.dim(2);
169   const int32_t output_height = output_shape.dim(1);
170   const int32_t output_width = output_shape.dim(2);
171
172   const int32_t stride_height = _params.stride_height;
173   const int32_t stride_width = _params.stride_width;
174
175   int32_t activation_min{};
176   int32_t activation_max{};
177   calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
178
179   std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int64_t));
180
181   BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
182   for (int32_t batch = 0; batch < batches; ++batch)
183   {
184     for (int32_t in_y = 0; in_y < input_height; ++in_y)
185     {
186       for (int32_t in_x = 0; in_x < input_width; ++in_x)
187       {
188         for (int32_t in_c = 0; in_c < input_depth; ++in_c)
189         {
190           const int32_t out_y_origin = in_y * stride_height - _padding_height;
191           const int32_t out_x_origin = in_x * stride_width - _padding_width;
192           for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
193           {
194             for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
195             {
196               const int32_t out_x = out_x_origin + filter_x;
197               const int32_t out_y = out_y_origin + filter_y;
198               if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
199               {
200                 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
201                 {
202                   const int16_t input_val =
203                       input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
204                   const int16_t filter_val =
205                       filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
206                   scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
207                       static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
208                 }
209               }
210             }
211           }
212         }
213       }
214     }
215     for (int32_t out_y = 0; out_y < output_height; ++out_y)
216     {
217       for (int32_t out_x = 0; out_x < output_width; ++out_x)
218       {
219         for (int32_t out_c = 0; out_c < output_depth; ++out_c)
220         {
221           int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
222           if (bias_data)
223           {
224             acc += bias_data[out_c];
225           }
226           int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
227               acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
228
229           scaled_acc = std::max(scaled_acc, activation_min);
230           scaled_acc = std::min(scaled_acc, activation_max);
231
232           output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
233         }
234       }
235     }
236   }
237 }
238
239 } // namespace kernels
240 } // namespace luci_interpreter