compiler/luci-interpreter/src/kernels/Mul.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #include "kernels/Mul.h"
  19
  20 #include "kernels/BinaryOpCommon.h"
  21 #include "kernels/Utils.h"
  22
  23 #include "PALMul.h"
  24
  25 #include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
  26
  27 #include <stdexcept>
  28
  29 namespace luci_interpreter
  30 {
  31 namespace kernels
  32 {
  33
  34 Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
  35   : KernelWithParams<MulParams>({input1, input2}, {output}, params)
  36 {
  37 }
  38
  39 void Mul::configure()
  40 {
  41   LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
  42   LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
  43   if (input1()->element_type() == DataType::S16)
  44   {
  45     LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
  46                            output()->zero_point() == 0);
  47   }
  48
  49   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
  50 }
  51
  52 void Mul::execute() const
  53 {
  54   switch (input1()->element_type())
  55   {
  56     case DataType::FLOAT32:
  57       evalFloat();
  58       break;
  59     case DataType::S16:
  60       evalQuantizedS16();
  61       break;
  62     default:
  63       throw std::runtime_error("Unsupported type.");
  64   }
  65 }
  66
  67 void Mul::evalFloat() const
  68 {
  69   float activation_min{};
  70   float activation_max{};
  71   calculateActivationRange(_params.activation, &activation_min, &activation_max);
  72
  73   tflite::ArithmeticParams params{};
  74   params.float_activation_min = activation_min;
  75   params.float_activation_max = activation_max;
  76
  77   const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
  78     getTensorShape(input1()), getTensorShape(input2()), &params);
  79
  80   if (need_broadcast)
  81   {
  82     luci_interpreter_pal::BroadcastMul4DSlow(
  83       params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
  84       getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
  85   }
  86   else
  87   {
  88     luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
  89                               getTensorShape(input2()), getTensorData<float>(input2()),
  90                               getTensorShape(output()), getTensorData<float>(output()));
  91   }
  92 }
  93
  94 void Mul::evalQuantizedS16() const
  95 {
  96   const auto input1_scale = static_cast<double>(input1()->scale());
  97   const auto input2_scale = static_cast<double>(input2()->scale());
  98   const auto output_scale = static_cast<double>(output()->scale());
  99
 100   const double real_multiplier = input1_scale * input2_scale / output_scale;
 101
 102   int32_t output_multiplier;
 103   int output_shift;
 104   quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
 105
 106   int32_t activation_min{};
 107   int32_t activation_max{};
 108   calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
 109
 110   auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
 111                                                                               int16_t input2_val) {
 112     int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
 113     output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
 114     output = std::max(output, activation_min);
 115     output = std::min(output, activation_max);
 116     return static_cast<int16_t>(output);
 117   };
 118
 119   BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
 120                         getTensorShape(input2()), getTensorData<int16_t>(input2()),
 121                         getTensorShape(output()), getTensorData<int16_t>(output()), fn);
 122 }
 123
 124 } // namespace kernels
 125 } // namespace luci_interpreter