compiler/luci-interpreter/src/kernels/Sub.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #include "kernels/Sub.h"
  19 #include "kernels/Utils.h"
  20
  21 #include "PALSub.h"
  22
  23 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
  24
  25 #include <stdexcept>
  26
  27 namespace luci_interpreter
  28 {
  29 namespace kernels
  30 {
  31
  32 Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
  33   : KernelWithParams<SubParams>({input1, input2}, {output}, params)
  34 {
  35 }
  36
  37 void Sub::configure()
  38 {
  39   LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
  40   LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
  41   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
  42 }
  43
  44 void Sub::execute() const
  45 {
  46   switch (input1()->element_type())
  47   {
  48     case DataType::FLOAT32:
  49       evalFloat();
  50       break;
  51     case DataType::S64:
  52       evalInteger<int64_t>();
  53       break;
  54     case DataType::S32:
  55       evalInteger<int32_t>();
  56       break;
  57     case DataType::U8:
  58       evalQuantized();
  59       break;
  60     default:
  61       throw std::runtime_error("Unsupported type.");
  62   }
  63 }
  64
  65 void Sub::evalFloat() const
  66 {
  67   tflite::ArithmeticParams params{};
  68   fillArithmeticActivationRange<float>(params, _params.activation);
  69
  70   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
  71     getTensorShape(input1()), getTensorShape(input2()), &params);
  72
  73   if (need_broadcast)
  74   {
  75     tflite::reference_ops::BroadcastSubSlow(
  76       params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
  77       getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
  78   }
  79   else
  80   {
  81     luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
  82                               getTensorShape(input2()), getTensorData<float>(input2()),
  83                               getTensorShape(output()), getTensorData<float>(output()));
  84   }
  85 }
  86
  87 template <typename T> void Sub::evalInteger() const
  88 {
  89   tflite::ArithmeticParams params{};
  90   fillArithmeticActivationRange<T>(params, _params.activation);
  91
  92   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
  93     getTensorShape(input1()), getTensorShape(input2()), &params);
  94
  95   if (need_broadcast)
  96   {
  97     tflite::reference_ops::BroadcastSubSlow(
  98       params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
  99       getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
 100   }
 101   else
 102   {
 103     tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
 104                                getTensorShape(input2()), getTensorData<T>(input2()),
 105                                getTensorShape(output()), getTensorData<T>(output()));
 106   }
 107 }
 108
 109 void Sub::evalQuantized() const
 110 {
 111   const auto input1_scale = static_cast<double>(input1()->scale());
 112   const auto input2_scale = static_cast<double>(input2()->scale());
 113   const auto output_scale = static_cast<double>(output()->scale());
 114
 115   const int left_shift = 20;
 116   const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
 117   const double real_input1_multiplier = input1_scale / twice_max_input_scale;
 118   const double real_input2_multiplier = input2_scale / twice_max_input_scale;
 119   const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
 120
 121   int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
 122   int input1_shift{}, input2_shift{}, output_shift{};
 123   quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
 124   quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
 125   quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
 126
 127   int32_t activation_min{};
 128   int32_t activation_max{};
 129   calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
 130
 131   tflite::ArithmeticParams params{};
 132   params.left_shift = left_shift;
 133   // The kernel expects inputs' zero points to be negated.
 134   params.input1_offset = -input1()->zero_point(); // Note the '-'.
 135   params.input1_multiplier = input1_multiplier;
 136   params.input1_shift = input1_shift;
 137   params.input2_offset = -input2()->zero_point(); // Note the '-'.
 138   params.input2_multiplier = input2_multiplier;
 139   params.input2_shift = input2_shift;
 140   params.output_offset = output()->zero_point();
 141   params.output_multiplier = output_multiplier;
 142   params.output_shift = output_shift;
 143   params.quantized_activation_min = activation_min;
 144   params.quantized_activation_max = activation_max;
 145
 146   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
 147     getTensorShape(input1()), getTensorShape(input2()), &params);
 148
 149   if (need_broadcast)
 150   {
 151     tflite::reference_ops::BroadcastQuantSubSlow(
 152       params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
 153       getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
 154   }
 155   else
 156   {
 157     tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
 158                                getTensorShape(input2()), getTensorData<uint8_t>(input2()),
 159                                getTensorShape(output()), getTensorData<uint8_t>(output()));
 160   }
 161 }
 162
 163 } // namespace kernels
 164 } // namespace luci_interpreter