compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *      http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
  19 #define __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__
  20
  21 #include "cker/Shape.h"
  22 #include "cker/Types.h"
  23 #include "cker/Utils.h"
  24
  25 #include <cmath>
  26
  27 namespace nnfw
  28 {
  29 namespace cker
  30 {
  31 namespace reference
  32 {
  33
  34 template <typename T>
  35 inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
  36                                const T *input1_data, const Shape &input2_shape,
  37                                const T *input2_data, const Shape &output_shape, T *output_data,
  38                                const std::function<T(const T &, const T &)> &fn)
  39 {
  40   const int32_t flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
  41   for (int i = 0; i < flat_size; ++i)
  42   {
  43     output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
  44                                                   params.quantized_activation_min,
  45                                                   params.quantized_activation_max);
  46   }
  47 }
  48
  49 template <>
  50 inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
  51                                const float *input1_data, const Shape &input2_shape,
  52                                const float *input2_data, const Shape &output_shape,
  53                                float *output_data,
  54                                const std::function<float(const float &, const float &)> &fn)
  55 {
  56   const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
  57   for (int i = 0; i < size; i++)
  58   {
  59     output_data[i] =
  60         ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
  61                                      params.float_activation_min, params.float_activation_max);
  62   }
  63 }
  64
  65 template <typename T>
  66 inline void BroadcastBinaryArithmeticOpSlowQuant8(
  67     const BinaryArithmeticOpParam &params, const Shape &input1_shape, const T *input1_data,
  68     const Shape &input2_shape, const T *input2_data, const Shape &output_shape, T *output_data,
  69     const std::function<T(const BinaryArithmeticOpParam &params, const T &, const T &)> &fn)
  70 {
  71   NdArrayDesc<4> desc1;
  72   NdArrayDesc<4> desc2;
  73   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
  74   const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
  75
  76   if ((params.quantized_activation_min < 0) && (params.quantized_activation_max > 255))
  77   {
  78     throw std::runtime_error{"Support only for Quant8."};
  79   }
  80
  81   // Comment from tensorflow lite:
  82   //
  83   // In Tensorflow, the dimensions are canonically named (batch_number, row,
  84   // col, channel), with extents (batches, height, width, depth), with the
  85   // trailing dimension changing most rapidly (channels has the smallest stride,
  86   // typically 1 element).
  87   //
  88   // In generated C code, we store arrays with the dimensions reversed. The
  89   // first dimension has smallest stride.
  90   //
  91   // We name our variables by their Tensorflow convention, but generate C code
  92   // nesting loops such that the innermost loop has the smallest stride for the
  93   // best cache behavior.
  94   for (int b = 0; b < extended_output_shape.Dims(0); ++b)
  95   {
  96     for (int y = 0; y < extended_output_shape.Dims(1); ++y)
  97     {
  98       for (int x = 0; x < extended_output_shape.Dims(2); ++x)
  99       {
 100         for (int c = 0; c < extended_output_shape.Dims(3); ++c)
 101         {
 102           output_data[Offset(extended_output_shape, b, y, x, c)] =
 103               ActivationFunctionWithMinMax<uint8_t>(
 104                   fn(params, input1_data[SubscriptToIndex(desc1, b, y, x, c)],
 105                      input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
 106                   params.quantized_activation_min, params.quantized_activation_max);
 107         }
 108       }
 109     }
 110   }
 111 }
 112 template <typename T>
 113 inline void BroadcastBinaryArithmeticOpSlow(const BinaryArithmeticOpParam &params,
 114                                             const Shape &input1_shape, const T *input1_data,
 115                                             const Shape &input2_shape, const T *input2_data,
 116                                             const Shape &output_shape, T *output_data,
 117                                             const std::function<T(const T &, const T &)> &fn)
 118 {
 119   NdArrayDesc<4> desc1;
 120   NdArrayDesc<4> desc2;
 121   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
 122   const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
 123
 124   // Comment from tensorflow lite:
 125   //
 126   // In Tensorflow, the dimensions are canonically named (batch_number, row,
 127   // col, channel), with extents (batches, height, width, depth), with the
 128   // trailing dimension changing most rapidly (channels has the smallest stride,
 129   // typically 1 element).
 130   //
 131   // In generated C code, we store arrays with the dimensions reversed. The
 132   // first dimension has smallest stride.
 133   //
 134   // We name our variables by their Tensorflow convention, but generate C code
 135   // nesting loops such that the innermost loop has the smallest stride for the
 136   // best cache behavior.
 137   for (int b = 0; b < extended_output_shape.Dims(0); ++b)
 138   {
 139     for (int y = 0; y < extended_output_shape.Dims(1); ++y)
 140     {
 141       for (int x = 0; x < extended_output_shape.Dims(2); ++x)
 142       {
 143         for (int c = 0; c < extended_output_shape.Dims(3); ++c)
 144         {
 145           output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax<T>(
 146               fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
 147                  input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
 148               params.quantized_activation_min, params.quantized_activation_max);
 149         }
 150       }
 151     }
 152   }
 153 }
 154
 155 template <>
 156 inline void BroadcastBinaryArithmeticOpSlow(
 157     const BinaryArithmeticOpParam &params, const Shape &input1_shape, const float *input1_data,
 158     const Shape &input2_shape, const float *input2_data, const Shape &output_shape,
 159     float *output_data, const std::function<float(const float &, const float &)> &fn)
 160 {
 161   NdArrayDesc<4> desc1;
 162   NdArrayDesc<4> desc2;
 163   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
 164   const Shape extended_output_shape = Shape::ExtendedShape(4, output_shape);
 165
 166   for (int b = 0; b < extended_output_shape.Dims(0); ++b)
 167   {
 168     for (int y = 0; y < extended_output_shape.Dims(1); ++y)
 169     {
 170       for (int x = 0; x < extended_output_shape.Dims(2); ++x)
 171       {
 172         for (int c = 0; c < extended_output_shape.Dims(3); ++c)
 173         {
 174           output_data[Offset(extended_output_shape, b, y, x, c)] = ActivationFunctionWithMinMax(
 175               fn(input1_data[SubscriptToIndex(desc1, b, y, x, c)],
 176                  input2_data[SubscriptToIndex(desc2, b, y, x, c)]),
 177               params.float_activation_min, params.float_activation_max);
 178         }
 179       }
 180     }
 181   }
 182 }
 183
 184 } // namespace reference
 185 } // namespace cker
 186 } // namespace nnfw
 187
 188 #endif // __NNFW_CKER_REFERENCE_BINARYARITHMETICOPS_H__