7b02c1e25dd0058af04f4ebef9fba0b897765bf8
[platform/core/ml/nnfw.git] / onert-micro / luci-interpreter / src / kernels / Sub.cpp
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *    http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "kernels/Sub.h"
19 #include "kernels/Utils.h"
20
21 #include "PALSub.h"
22
23 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
24
25 namespace luci_interpreter
26 {
27 namespace kernels
28 {
29
30 Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
31   : KernelWithParams<SubParams>({input1, input2}, {output}, params)
32 {
33 }
34
35 void Sub::configure()
36 {
37   LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
38   LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
39   // TODO: enable it only if kernel with dynamic shapes
40   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
41 }
42
43 void Sub::execute() const
44 {
45   switch (input1()->element_type())
46   {
47     case DataType::FLOAT32:
48       evalFloat();
49       break;
50     case DataType::S64:
51       evalInteger<int64_t>();
52       break;
53     case DataType::S32:
54       evalInteger<int32_t>();
55       break;
56     case DataType::U8:
57       evalQuantized();
58       break;
59     default:
60       assert(false && "Unsupported type.");
61   }
62 }
63
64 void Sub::evalFloat() const
65 {
66   tflite::ArithmeticParams params{};
67   fillArithmeticActivationRange<float>(params, _params.activation);
68
69   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
70     getTensorShape(input1()), getTensorShape(input2()), &params);
71
72   if (need_broadcast)
73   {
74     tflite::reference_ops::BroadcastSubSlow(
75       params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
76       getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
77   }
78   else
79   {
80     luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
81                               getTensorShape(input2()), getTensorData<float>(input2()),
82                               getTensorShape(output()), getTensorData<float>(output()));
83   }
84 }
85
86 template <typename T> void Sub::evalInteger() const
87 {
88   tflite::ArithmeticParams params{};
89   fillArithmeticActivationRange<T>(params, _params.activation);
90
91   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
92     getTensorShape(input1()), getTensorShape(input2()), &params);
93
94   if (need_broadcast)
95   {
96     tflite::reference_ops::BroadcastSubSlow(
97       params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
98       getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
99   }
100   else
101   {
102     tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
103                                getTensorShape(input2()), getTensorData<T>(input2()),
104                                getTensorShape(output()), getTensorData<T>(output()));
105   }
106 }
107
108 void Sub::evalQuantized() const
109 {
110   const auto input1_scale = static_cast<double>(input1()->scale());
111   const auto input2_scale = static_cast<double>(input2()->scale());
112   const auto output_scale = static_cast<double>(output()->scale());
113
114   const int left_shift = 20;
115   const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
116   const double real_input1_multiplier = input1_scale / twice_max_input_scale;
117   const double real_input2_multiplier = input2_scale / twice_max_input_scale;
118   const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
119
120   int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
121   int input1_shift{}, input2_shift{}, output_shift{};
122   quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
123   quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
124   quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
125
126   int32_t activation_min{};
127   int32_t activation_max{};
128   calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
129
130   tflite::ArithmeticParams params{};
131   params.left_shift = left_shift;
132   // The kernel expects inputs' zero points to be negated.
133   params.input1_offset = -input1()->zero_point(); // Note the '-'.
134   params.input1_multiplier = input1_multiplier;
135   params.input1_shift = input1_shift;
136   params.input2_offset = -input2()->zero_point(); // Note the '-'.
137   params.input2_multiplier = input2_multiplier;
138   params.input2_shift = input2_shift;
139   params.output_offset = output()->zero_point();
140   params.output_multiplier = output_multiplier;
141   params.output_shift = output_shift;
142   params.quantized_activation_min = activation_min;
143   params.quantized_activation_max = activation_max;
144
145   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
146     getTensorShape(input1()), getTensorShape(input2()), &params);
147
148   if (need_broadcast)
149   {
150     tflite::reference_ops::BroadcastSubSlow(
151       params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
152       getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
153   }
154   else
155   {
156     tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
157                                getTensorShape(input2()), getTensorData<uint8_t>(input2()),
158                                getTensorShape(output()), getTensorData<uint8_t>(output()));
159   }
160 }
161
162 } // namespace kernels
163 } // namespace luci_interpreter