compiler/luci-interpreter/src/kernels/Add.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #include "kernels/Add.h"
  19
  20 #include "kernels/Utils.h"
  21
  22 #include <tensorflow/lite/kernels/internal/reference/add.h>
  23 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
  24
  25 #include <stdexcept>
  26
  27 namespace luci_interpreter
  28 {
  29 namespace kernels
  30 {
  31
  32 Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
  33     : KernelWithParams<AddParams>({input1, input2}, {output}, params)
  34 {
  35 }
  36
  37 void Add::configure()
  38 {
  39   if (input1()->element_type() != input2()->element_type())
  40   {
  41     throw std::runtime_error("Input Tensor Data Type Mismatch.");
  42   }
  43   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
  44 }
  45
  46 void Add::execute() const
  47 {
  48   switch (input1()->element_type())
  49   {
  50     case DataType::FLOAT32:
  51       evalFloat();
  52       break;
  53     case DataType::U8:
  54       evalQuantized();
  55       break;
  56     default:
  57       throw std::runtime_error("Unsupported type.");
  58   }
  59 }
  60
  61 void Add::evalFloat() const
  62 {
  63   float activation_min{};
  64   float activation_max{};
  65   calculateActivationRange(_params.activation, &activation_min, &activation_max);
  66
  67   tflite::ArithmeticParams params{};
  68   params.float_activation_min = activation_min;
  69   params.float_activation_max = activation_max;
  70
  71   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
  72       getTensorShape(input1()), getTensorShape(input2()), &params);
  73
  74   if (need_broadcast)
  75   {
  76     tflite::reference_ops::BroadcastAdd4DSlow(
  77         params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
  78         getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
  79   }
  80   else
  81   {
  82     tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
  83                                getTensorShape(input2()), getTensorData<float>(input2()),
  84                                getTensorShape(output()), getTensorData<float>(output()));
  85   }
  86 }
  87
  88 void Add::evalQuantized() const
  89 {
  90   const auto input1_scale = static_cast<double>(input1()->scale());
  91   const auto input2_scale = static_cast<double>(input2()->scale());
  92   const auto output_scale = static_cast<double>(output()->scale());
  93
  94   const int left_shift = 20;
  95   const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
  96   const double real_input1_multiplier = input1_scale / twice_max_input_scale;
  97   const double real_input2_multiplier = input2_scale / twice_max_input_scale;
  98   const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
  99
 100   int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
 101   int input1_shift{}, input2_shift{}, output_shift{};
 102   quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
 103   quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
 104   quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
 105
 106   int32_t activation_min{};
 107   int32_t activation_max{};
 108   calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
 109
 110   tflite::ArithmeticParams params{};
 111   params.left_shift = left_shift;
 112   // The kernel expects inputs' zero points to be negated.
 113   params.input1_offset = -input1()->zero_point(); // Note the '-'.
 114   params.input1_multiplier = input1_multiplier;
 115   params.input1_shift = input1_shift;
 116   params.input2_offset = -input2()->zero_point(); // Note the '-'.
 117   params.input2_multiplier = input2_multiplier;
 118   params.input2_shift = input2_shift;
 119   params.output_offset = output()->zero_point();
 120   params.output_multiplier = output_multiplier;
 121   params.output_shift = output_shift;
 122   params.quantized_activation_min = activation_min;
 123   params.quantized_activation_max = activation_max;
 124
 125   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
 126       getTensorShape(input1()), getTensorShape(input2()), &params);
 127
 128   if (need_broadcast)
 129   {
 130     tflite::reference_ops::BroadcastAdd4DSlow(
 131         params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
 132         getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
 133         getTensorData<uint8_t>(output()));
 134   }
 135   else
 136   {
 137     tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
 138                                getTensorShape(input2()), getTensorData<uint8_t>(input2()),
 139                                getTensorShape(output()), getTensorData<uint8_t>(output()));
 140   }
 141 }
 142
 143 } // namespace kernels
 144 } // namespace luci_interpreter