bac9fe3934a553a06448e2ba8cfe66c2f66facb4
[platform/core/ml/nnfw.git] / onert-micro / luci-interpreter / src / kernels / Mul.cpp
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *    http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "kernels/Mul.h"
19
20 #include "kernels/BinaryOpCommon.h"
21 #include "kernels/Utils.h"
22
23 #include "PALMul.h"
24
25 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
26
27 namespace luci_interpreter
28 {
29 namespace kernels
30 {
31
32 Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
33   : KernelWithParams<MulParams>({input1, input2}, {output}, params)
34 {
35 }
36
37 void Mul::configure()
38 {
39   LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
40   LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
41   if (input1()->element_type() == DataType::S16)
42   {
43     LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
44                            input2()->zero_points().size() == 1)
45     LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
46                            output()->zero_point() == 0);
47   }
48   // TODO: enable it only if kernel with dynamic shapes
49   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
50 }
51
52 void Mul::execute() const
53 {
54   switch (input1()->element_type())
55   {
56     case DataType::FLOAT32:
57       evalFloat();
58       break;
59     case DataType::S64:
60       evalInteger<int64_t>();
61       break;
62     case DataType::S32:
63       evalInteger<int32_t>();
64       break;
65     case DataType::S16:
66       evalQuantizedS16();
67       break;
68     default:
69       assert(false && "Unsupported type.");
70   }
71 }
72
73 void Mul::evalFloat() const
74 {
75   tflite::ArithmeticParams params{};
76   fillArithmeticActivationRange<float>(params, _params.activation);
77
78   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
79     getTensorShape(input1()), getTensorShape(input2()), &params);
80
81   if (need_broadcast)
82   {
83     luci_interpreter_pal::BroadcastMul4DSlow(
84       params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
85       getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
86   }
87   else
88   {
89     luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
90                               getTensorShape(input2()), getTensorData<float>(input2()),
91                               getTensorShape(output()), getTensorData<float>(output()));
92   }
93 }
94
95 template <typename T> void Mul::evalInteger() const
96 {
97   tflite::ArithmeticParams params{};
98   fillArithmeticActivationRange<T>(params, _params.activation);
99
100   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
101     getTensorShape(input1()), getTensorShape(input2()), &params);
102
103   if (need_broadcast)
104   {
105     luci_interpreter_pal::BroadcastMul4DSlow(
106       params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
107       getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
108   }
109   else
110   {
111     luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
112                               getTensorShape(input2()), getTensorData<T>(input2()),
113                               getTensorShape(output()), getTensorData<T>(output()));
114   }
115 }
116
117 void Mul::evalQuantizedS16() const
118 {
119   const auto input1_scale = static_cast<double>(input1()->scale());
120   const auto input2_scale = static_cast<double>(input2()->scale());
121   const auto output_scale = static_cast<double>(output()->scale());
122
123   const double real_multiplier = input1_scale * input2_scale / output_scale;
124
125   int32_t output_multiplier;
126   int output_shift;
127   quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
128
129   int32_t activation_min{};
130   int32_t activation_max{};
131   calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
132
133   auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
134                                                                               int16_t input2_val) {
135     int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
136     output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
137     output = std::max(output, activation_min);
138     output = std::min(output, activation_max);
139     return static_cast<int16_t>(output);
140   };
141
142   BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
143                         getTensorShape(input2()), getTensorData<int16_t>(input2()),
144                         getTensorShape(output()), getTensorData<int16_t>(output()), fn);
145 }
146
147 } // namespace kernels
148 } // namespace luci_interpreter