Imported Upstream version 1.8.0
[platform/core/ml/nnfw.git] / compiler / luci-interpreter / src / kernels / Add.cpp
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *    http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "kernels/Add.h"
19
20 #include "kernels/Utils.h"
21
22 #include <tensorflow/lite/kernels/internal/reference/add.h>
23 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
24
25 #include <stdexcept>
26
27 namespace luci_interpreter
28 {
29 namespace kernels
30 {
31
32 Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
33     : KernelWithParams<AddParams>({input1, input2}, {output}, params)
34 {
35 }
36
37 void Add::configure()
38 {
39   if (input1()->element_type() != input2()->element_type())
40   {
41     throw std::runtime_error("Input Tensor Data Type Mismatch.");
42   }
43   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
44 }
45
46 void Add::execute() const
47 {
48   switch (input1()->element_type())
49   {
50     case DataType::FLOAT32:
51       evalFloat();
52       break;
53     case DataType::U8:
54       evalQuantized();
55       break;
56     default:
57       throw std::runtime_error("Unsupported type.");
58   }
59 }
60
61 void Add::evalFloat() const
62 {
63   float activation_min{};
64   float activation_max{};
65   calculateActivationRange(_params.activation, &activation_min, &activation_max);
66
67   tflite::ArithmeticParams params{};
68   params.float_activation_min = activation_min;
69   params.float_activation_max = activation_max;
70
71   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
72       getTensorShape(input1()), getTensorShape(input2()), &params);
73
74   if (need_broadcast)
75   {
76     tflite::reference_ops::BroadcastAdd4DSlow(
77         params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
78         getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
79   }
80   else
81   {
82     tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
83                                getTensorShape(input2()), getTensorData<float>(input2()),
84                                getTensorShape(output()), getTensorData<float>(output()));
85   }
86 }
87
88 void Add::evalQuantized() const
89 {
90   const auto input1_scale = static_cast<double>(input1()->scale());
91   const auto input2_scale = static_cast<double>(input2()->scale());
92   const auto output_scale = static_cast<double>(output()->scale());
93
94   const int left_shift = 20;
95   const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
96   const double real_input1_multiplier = input1_scale / twice_max_input_scale;
97   const double real_input2_multiplier = input2_scale / twice_max_input_scale;
98   const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
99
100   int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
101   int input1_shift{}, input2_shift{}, output_shift{};
102   quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
103   quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
104   quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
105
106   int32_t activation_min{};
107   int32_t activation_max{};
108   calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
109
110   tflite::ArithmeticParams params{};
111   params.left_shift = left_shift;
112   // The kernel expects inputs' zero points to be negated.
113   params.input1_offset = -input1()->zero_point(); // Note the '-'.
114   params.input1_multiplier = input1_multiplier;
115   params.input1_shift = input1_shift;
116   params.input2_offset = -input2()->zero_point(); // Note the '-'.
117   params.input2_multiplier = input2_multiplier;
118   params.input2_shift = input2_shift;
119   params.output_offset = output()->zero_point();
120   params.output_multiplier = output_multiplier;
121   params.output_shift = output_shift;
122   params.quantized_activation_min = activation_min;
123   params.quantized_activation_max = activation_max;
124
125   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
126       getTensorShape(input1()), getTensorShape(input2()), &params);
127
128   if (need_broadcast)
129   {
130     tflite::reference_ops::BroadcastAdd4DSlow(
131         params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
132         getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
133         getTensorData<uint8_t>(output()));
134   }
135   else
136   {
137     tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
138                                getTensorShape(input2()), getTensorData<uint8_t>(input2()),
139                                getTensorShape(output()), getTensorData<uint8_t>(output()));
140   }
141 }
142
143 } // namespace kernels
144 } // namespace luci_interpreter