onert-micro/luci-interpreter/src/kernels/Mul.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #include "kernels/Mul.h"
  19
  20 #include "kernels/BinaryOpCommon.h"
  21 #include "kernels/Utils.h"
  22
  23 #include "PALMul.h"
  24
  25 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
  26
  27 namespace luci_interpreter
  28 {
  29 namespace kernels
  30 {
  31
  32 Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
  33   : KernelWithParams<MulParams>({input1, input2}, {output}, params)
  34 {
  35 }
  36
  37 void Mul::configure()
  38 {
  39   LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
  40   LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
  41   if (input1()->element_type() == DataType::S16)
  42   {
  43     LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
  44                            input2()->zero_points().size() == 1)
  45     LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
  46                            output()->zero_point() == 0);
  47   }
  48   // TODO: enable it only if kernel with dynamic shapes
  49   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
  50 }
  51
  52 void Mul::execute() const
  53 {
  54   switch (input1()->element_type())
  55   {
  56     case DataType::FLOAT32:
  57       evalFloat();
  58       break;
  59     case DataType::S64:
  60       evalInteger<int64_t>();
  61       break;
  62     case DataType::S32:
  63       evalInteger<int32_t>();
  64       break;
  65     case DataType::S16:
  66       evalQuantizedS16();
  67       break;
  68     default:
  69       assert(false && "Unsupported type.");
  70   }
  71 }
  72
  73 void Mul::evalFloat() const
  74 {
  75   tflite::ArithmeticParams params{};
  76   fillArithmeticActivationRange<float>(params, _params.activation);
  77
  78   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
  79     getTensorShape(input1()), getTensorShape(input2()), &params);
  80
  81   if (need_broadcast)
  82   {
  83     luci_interpreter_pal::BroadcastMul4DSlow(
  84       params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
  85       getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
  86   }
  87   else
  88   {
  89     luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
  90                               getTensorShape(input2()), getTensorData<float>(input2()),
  91                               getTensorShape(output()), getTensorData<float>(output()));
  92   }
  93 }
  94
  95 template <typename T> void Mul::evalInteger() const
  96 {
  97   tflite::ArithmeticParams params{};
  98   fillArithmeticActivationRange<T>(params, _params.activation);
  99
 100   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
 101     getTensorShape(input1()), getTensorShape(input2()), &params);
 102
 103   if (need_broadcast)
 104   {
 105     luci_interpreter_pal::BroadcastMul4DSlow(
 106       params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
 107       getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
 108   }
 109   else
 110   {
 111     luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
 112                               getTensorShape(input2()), getTensorData<T>(input2()),
 113                               getTensorShape(output()), getTensorData<T>(output()));
 114   }
 115 }
 116
 117 void Mul::evalQuantizedS16() const
 118 {
 119   const auto input1_scale = static_cast<double>(input1()->scale());
 120   const auto input2_scale = static_cast<double>(input2()->scale());
 121   const auto output_scale = static_cast<double>(output()->scale());
 122
 123   const double real_multiplier = input1_scale * input2_scale / output_scale;
 124
 125   int32_t output_multiplier;
 126   int output_shift;
 127   quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
 128
 129   int32_t activation_min{};
 130   int32_t activation_max{};
 131   calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
 132
 133   auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
 134                                                                               int16_t input2_val) {
 135     int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
 136     output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
 137     output = std::max(output, activation_min);
 138     output = std::min(output, activation_max);
 139     return static_cast<int16_t>(output);
 140   };
 141
 142   BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
 143                         getTensorShape(input2()), getTensorData<int16_t>(input2()),
 144                         getTensorShape(output()), getTensorData<int16_t>(output()), fn);
 145 }
 146
 147 } // namespace kernels
 148 } // namespace luci_interpreter