2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "kernels/Mul.h"
20 #include "kernels/BinaryOpCommon.h"
21 #include "kernels/Utils.h"
25 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
27 namespace luci_interpreter
32 Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms)
33 : KernelWithParams<MulParams>({input1, input2}, {output}, params)
39 LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
40 LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
41 if (input1()->element_type() == DataType::S16)
43 LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
44 input2()->zero_points().size() == 1)
45 LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
46 output()->zero_point() == 0);
48 // TODO: enable it only if kernel with dynamic shapes
49 output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
52 void Mul::execute() const
54 switch (input1()->element_type())
56 case DataType::FLOAT32:
60 evalInteger<int64_t>();
63 evalInteger<int32_t>();
69 assert(false && "Unsupported type.");
73 void Mul::evalFloat() const
75 tflite::ArithmeticParams params{};
76 fillArithmeticActivationRange<float>(params, _params.activation);
78 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
79 getTensorShape(input1()), getTensorShape(input2()), ¶ms);
83 luci_interpreter_pal::BroadcastMul4DSlow(
84 params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
85 getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
89 luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
90 getTensorShape(input2()), getTensorData<float>(input2()),
91 getTensorShape(output()), getTensorData<float>(output()));
95 template <typename T> void Mul::evalInteger() const
97 tflite::ArithmeticParams params{};
98 fillArithmeticActivationRange<T>(params, _params.activation);
100 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
101 getTensorShape(input1()), getTensorShape(input2()), ¶ms);
105 luci_interpreter_pal::BroadcastMul4DSlow(
106 params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
107 getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
111 luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
112 getTensorShape(input2()), getTensorData<T>(input2()),
113 getTensorShape(output()), getTensorData<T>(output()));
117 void Mul::evalQuantizedS16() const
119 const auto input1_scale = static_cast<double>(input1()->scale());
120 const auto input2_scale = static_cast<double>(input2()->scale());
121 const auto output_scale = static_cast<double>(output()->scale());
123 const double real_multiplier = input1_scale * input2_scale / output_scale;
125 int32_t output_multiplier;
127 quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
129 int32_t activation_min{};
130 int32_t activation_max{};
131 calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
133 auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
134 int16_t input2_val) {
135 int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
136 output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
137 output = std::max(output, activation_min);
138 output = std::min(output, activation_max);
139 return static_cast<int16_t>(output);
142 BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
143 getTensorShape(input2()), getTensorData<int16_t>(input2()),
144 getTensorShape(output()), getTensorData<int16_t>(output()), fn);
147 } // namespace kernels
148 } // namespace luci_interpreter