onert-micro/luci-interpreter/src/kernels/Tanh.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "Builders.h"
  18 #include "kernels/Utils.h"
  19 #include "SISOKernel.h"
  20
  21 #include "PALTanh.h"
  22
  23 namespace luci_interpreter
  24 {
  25
  26 #ifndef DIS_QUANT
  27
  28 namespace
  29 {
  30 void calculateArithmeticData(const circle::Tensor *input, const circle::Tensor *output,
  31                              int32_t &input_zero_point, int32_t &input_range_radius,
  32                              int32_t &input_multiplier, int &input_left_shift)
  33 {
  34   const auto input_dtype = Tensor::element_type(input);
  35   switch (input_dtype)
  36   {
  37     // TODO: enable it
  38 #if 0
  39     case DataType::S8:
  40     {
  41       static constexpr int input_integer_bits = 4;
  42       const double input_real_multiplier = static_cast<double>(Tensor::scale(input)) *
  43                                            static_cast<double>(1 << (31 - input_integer_bits));
  44
  45       const double q = std::frexp(input_real_multiplier, &input_left_shift);
  46       input_multiplier = static_cast<int32_t>(std::round(q * (1ll << 31)));
  47       input_range_radius = kernels::calculateInputRadius(input_integer_bits, input_left_shift, 31);
  48     }
  49     break;
  50 #endif
  51     case DataType::S16:
  52     {
  53       static constexpr int input_integer_bits = 3;
  54       static constexpr int output_fractional_bits = 15;
  55
  56       // These operators are implemented in fixed-point arithmetic,
  57       // which intrinsically wants symmetric ranges (zero_point==0)
  58       // and power-of-two scales (power-of-two is abbreviated below as POT).
  59       // While more general support would be possible by means of rescaling,
  60       // that would add some overhead and some loss of accuracy and wouldn't
  61       // be used at the moment as current quantized LSTM applications are
  62       // happy with symmetric, power-of-two-scales quantization. So we just
  63       // implement that narrow case only for now.
  64
  65       int input_scale_log2_rounded;
  66       bool param_scale_pot = kernels::checkedLog2(Tensor::scale(input), &input_scale_log2_rounded);
  67
  68       input_left_shift = (15 - input_integer_bits) + input_scale_log2_rounded;
  69       param_scale_pot &= (input_left_shift == 0 || input_left_shift == 1);
  70
  71       if (param_scale_pot)
  72       {
  73         input_multiplier = 0;
  74       }
  75       else
  76       {
  77         // Calculate multiplier to change input scale to 1/(3*4096)
  78         // as required by the table lookup.
  79         // The number 3.0 in the multiplier comes from here,
  80         // because the interval is [-10.7, 10.7] instead of [-8, 8].
  81         // So, in this scaling +/-2^17 represents +/-10.7.
  82
  83         double multiplier = static_cast<double>(Tensor::scale(input)) * 4096.0 * 3.0;
  84         input_left_shift = 0;
  85
  86         while (multiplier <= 32767.0 / 2.0 && input_left_shift <= 30)
  87         {
  88           input_left_shift++;
  89           multiplier = multiplier * 2.0;
  90         }
  91
  92         input_multiplier = static_cast<int32_t>(multiplier);
  93       }
  94
  95       int output_scale_log2_rounded;
  96       kernels::checkedLog2(Tensor::scale(output), &output_scale_log2_rounded);
  97       assert(output_scale_log2_rounded == -output_fractional_bits);
  98     }
  99     break;
 100     default:
 101       assert(false && "Unsupported type");
 102   }
 103 }
 104
 105 } // namespace
 106
 107 void evalInteger(const circle::Tensor *input, const circle::Tensor *output,
 108                  BaseRuntimeGraph *runtime_graph)
 109 {
 110   int32_t input_zero_point = 0;
 111   int32_t input_range_radius = 0;
 112   int32_t input_multiplier = 0;
 113   int input_left_shift = 0;
 114
 115   calculateArithmeticData(input, output, input_zero_point, input_range_radius, input_multiplier,
 116                           input_left_shift);
 117
 118   const auto *input_data = runtime_graph->getDataByTensor(input);
 119   assert(input_data);
 120
 121   auto *output_data = runtime_graph->getDataByTensor(output);
 122   assert(output_data);
 123
 124   const int flat_size = kernels::getTensorRuntimeShape(input, runtime_graph).flatSize();
 125
 126   const auto input_dtype = Tensor::element_type(input);
 127   switch (input_dtype)
 128   {
 129     // TODO: enable it
 130 #if 0
 131     case DataType::S8:
 132       luci_interpreter_pal::Tanh(
 133         input_zero_point, input_range_radius, input_multiplier, input_left_shift,
 134         flat_size, kernels::getTensorData<int8_t>(input_data), kernels::getTensorData<int8_t>(output_data));
 135       break;
 136 #endif // 0
 137     case DataType::S16:
 138       luci_interpreter_pal::Tanh(input_multiplier, input_left_shift, flat_size,
 139                                  kernels::getTensorData<int16_t>(input_data),
 140                                  kernels::getTensorData<int16_t>(output_data));
 141       break;
 142     default:
 143       assert(false && "Not support yet");
 144   }
 145 }
 146 #endif // DIS_QUANT
 147
 148 void configure_kernel_CircleTanh(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
 149 {
 150   kernels::SISOKernel kernel(cur_op, runtime_graph);
 151
 152   LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input()) ==
 153                          Tensor::element_type(kernel.output()));
 154 }
 155
 156 void execute_kernel_CircleTanh(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
 157 {
 158   kernels::SISOKernel kernel(cur_op, runtime_graph);
 159
 160   const auto *input_data = runtime_graph->getDataByTensor(kernel.input());
 161   assert(input_data);
 162
 163   auto *output_data = runtime_graph->getDataByTensor(kernel.output());
 164
 165   bool is_inplace = runtime_graph->is_inplace_op(cur_op);
 166
 167   switch (Tensor::element_type(kernel.input()))
 168   {
 169 #ifndef DIS_FLOAT
 170     case DataType::FLOAT32:
 171     {
 172       const float *input_data_float = kernels::getTensorData<float>(input_data);
 173       float *output_data_float = kernels::getTensorData<float>(output_data);
 174       if (is_inplace)
 175       {
 176         output_data_float = const_cast<float *>(input_data_float);
 177       }
 178
 179       assert(output_data_float);
 180
 181       const int flat_size =
 182         kernels::getTensorRuntimeShape(kernel.input(), runtime_graph).flatSize();
 183
 184       luci_interpreter_pal::Tanh(flat_size, input_data_float, output_data_float);
 185       break;
 186     }
 187 #endif // DIS_FLOAT
 188 #ifndef DIS_QUANT
 189     case DataType::S16:
 190       // TODO: enable it
 191 #if 0
 192     case DataType::S8:
 193 #endif
 194       evalInteger(kernel.input(), kernel.output(), runtime_graph);
 195       break;
 196 #endif // DIS_QUANT
 197     default:
 198       assert(false && "Unsupported type");
 199   }
 200
 201   if (is_inplace)
 202     runtime_graph->makeInplaceOperation(kernel.input(), kernel.output());
 203 }
 204 } // namespace luci_interpreter