2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "kernels/Sub.h"
19 #include "kernels/Utils.h"
23 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
27 namespace luci_interpreter
32 Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms)
33 : KernelWithParams<SubParams>({input1, input2}, {output}, params)
39 LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
40 output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
43 void Sub::execute() const
45 switch (input1()->element_type())
47 case DataType::FLOAT32:
54 throw std::runtime_error("Unsupported type.");
58 void Sub::evalFloat() const
60 float activation_min{};
61 float activation_max{};
62 calculateActivationRange(_params.activation, &activation_min, &activation_max);
64 tflite::ArithmeticParams params{};
65 params.float_activation_min = activation_min;
66 params.float_activation_max = activation_max;
68 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
69 getTensorShape(input1()), getTensorShape(input2()), ¶ms);
73 tflite::reference_ops::BroadcastSubSlow(
74 params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
75 getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
79 luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
80 getTensorShape(input2()), getTensorData<float>(input2()),
81 getTensorShape(output()), getTensorData<float>(output()));
85 void Sub::evalQuantized() const
87 const auto input1_scale = static_cast<double>(input1()->scale());
88 const auto input2_scale = static_cast<double>(input2()->scale());
89 const auto output_scale = static_cast<double>(output()->scale());
91 const int left_shift = 20;
92 const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
93 const double real_input1_multiplier = input1_scale / twice_max_input_scale;
94 const double real_input2_multiplier = input2_scale / twice_max_input_scale;
95 const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
97 int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
98 int input1_shift{}, input2_shift{}, output_shift{};
99 quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
100 quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
101 quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
103 int32_t activation_min{};
104 int32_t activation_max{};
105 calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
107 tflite::ArithmeticParams params{};
108 params.left_shift = left_shift;
109 // The kernel expects inputs' zero points to be negated.
110 params.input1_offset = -input1()->zero_point(); // Note the '-'.
111 params.input1_multiplier = input1_multiplier;
112 params.input1_shift = input1_shift;
113 params.input2_offset = -input2()->zero_point(); // Note the '-'.
114 params.input2_multiplier = input2_multiplier;
115 params.input2_shift = input2_shift;
116 params.output_offset = output()->zero_point();
117 params.output_multiplier = output_multiplier;
118 params.output_shift = output_shift;
119 params.quantized_activation_min = activation_min;
120 params.quantized_activation_max = activation_max;
122 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
123 getTensorShape(input1()), getTensorShape(input2()), ¶ms);
127 tflite::reference_ops::BroadcastSubSlow(
128 params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
129 getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
133 tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
134 getTensorShape(input2()), getTensorData<uint8_t>(input2()),
135 getTensorShape(output()), getTensorData<uint8_t>(output()));
139 } // namespace kernels
140 } // namespace luci_interpreter