Imported Upstream version 1.18.0
[platform/core/ml/nnfw.git] / compiler / luci-interpreter / src / kernels / Mul.cpp
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *    http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include "kernels/Mul.h"
19
20 #include "kernels/BinaryOpCommon.h"
21 #include "kernels/Utils.h"
22
23 #include "PALMul.h"
24
25 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
26
27 #include <stdexcept>
28
29 namespace luci_interpreter
30 {
31 namespace kernels
32 {
33
34 Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
35   : KernelWithParams<MulParams>({input1, input2}, {output}, params)
36 {
37 }
38
39 void Mul::configure()
40 {
41   LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
42   LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
43   if (input1()->element_type() == DataType::S16)
44   {
45     LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
46                            output()->zero_point() == 0);
47   }
48
49   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
50 }
51
52 void Mul::execute() const
53 {
54   switch (input1()->element_type())
55   {
56     case DataType::FLOAT32:
57       evalFloat();
58       break;
59     case DataType::S16:
60       evalQuantizedS16();
61       break;
62     default:
63       throw std::runtime_error("Unsupported type.");
64   }
65 }
66
67 void Mul::evalFloat() const
68 {
69   float activation_min{};
70   float activation_max{};
71   calculateActivationRange(_params.activation, &activation_min, &activation_max);
72
73   tflite::ArithmeticParams params{};
74   params.float_activation_min = activation_min;
75   params.float_activation_max = activation_max;
76
77   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
78     getTensorShape(input1()), getTensorShape(input2()), &params);
79
80   if (need_broadcast)
81   {
82     luci_interpreter_pal::BroadcastMul4DSlow(
83       params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
84       getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
85   }
86   else
87   {
88     luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
89                               getTensorShape(input2()), getTensorData<float>(input2()),
90                               getTensorShape(output()), getTensorData<float>(output()));
91   }
92 }
93
94 void Mul::evalQuantizedS16() const
95 {
96   const auto input1_scale = static_cast<double>(input1()->scale());
97   const auto input2_scale = static_cast<double>(input2()->scale());
98   const auto output_scale = static_cast<double>(output()->scale());
99
100   const double real_multiplier = input1_scale * input2_scale / output_scale;
101
102   int32_t output_multiplier;
103   int output_shift;
104   quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
105
106   int32_t activation_min{};
107   int32_t activation_max{};
108   calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
109
110   auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
111                                                                               int16_t input2_val) {
112     int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
113     output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
114     output = std::max(output, activation_min);
115     output = std::min(output, activation_max);
116     return static_cast<int16_t>(output);
117   };
118
119   BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
120                         getTensorShape(input2()), getTensorData<int16_t>(input2()),
121                         getTensorShape(output()), getTensorData<int16_t>(output()), fn);
122 }
123
124 } // namespace kernels
125 } // namespace luci_interpreter