onert-micro/luci-interpreter/pal/common/PALMaxPool2DCommon.h

   1 /*
   2  * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef LUCI_INTERPRETER_PAL_MAX_POOL_2D_COMMON_H
  19 #define LUCI_INTERPRETER_PAL_MAX_POOL_2D_COMMON_H
  20
  21 #include "Params.h"
  22 #include "PALUtils.h"
  23
  24 namespace luci_interpreter_pal
  25 {
  26
  27 inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
  28                     const float *input_data, const luci_interpreter::RuntimeShape &output_shape,
  29                     float *output_data)
  30 {
  31   const int batches = input_shape.dims(0);
  32   const int depth = output_shape.dims(3);
  33   const int input_height = input_shape.dims(1);
  34   const int input_width = input_shape.dims(2);
  35   const int output_height = output_shape.dims(1);
  36   const int output_width = output_shape.dims(2);
  37   const int stride_height = params.stride_height;
  38   const int stride_width = params.stride_width;
  39   for (int batch = 0; batch < batches; ++batch)
  40   {
  41     for (int out_y = 0; out_y < output_height; ++out_y)
  42     {
  43       for (int out_x = 0; out_x < output_width; ++out_x)
  44       {
  45         for (int channel = 0; channel < depth; ++channel)
  46         {
  47           const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
  48           const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
  49           // Compute the boundaries of the filter region clamped so as to
  50           // ensure that the filter window fits in the input array.
  51           const int filter_x_start = std::max(0, -in_x_origin);
  52           const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
  53           const int filter_y_start = std::max(0, -in_y_origin);
  54           const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
  55           float max = std::numeric_limits<float>::lowest();
  56           for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
  57           {
  58             for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
  59             {
  60               const int in_x = in_x_origin + filter_x;
  61               const int in_y = in_y_origin + filter_y;
  62
  63               const int input_data_offset =
  64                 ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
  65                   input_shape.dims(3) +
  66                 channel;
  67
  68               max = std::max(max, input_data[input_data_offset]);
  69             }
  70           }
  71           const int output_data_offset =
  72             ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
  73               output_shape.dims(3) +
  74             channel;
  75
  76           output_data[output_data_offset] =
  77             std::min(std::max(max, params.float_activation_min), params.float_activation_max);
  78         }
  79       }
  80     }
  81   }
  82 }
  83
  84 template <typename T>
  85 inline void MaxPool(const PoolParams &params, const luci_interpreter::RuntimeShape &input_shape,
  86                     const T *input_data, const luci_interpreter::RuntimeShape &output_shape,
  87                     T *output_data)
  88 {
  89   const int batches = input_shape.dims(0);
  90   const int depth = output_shape.dims(3);
  91   const int input_height = input_shape.dims(1);
  92   const int input_width = input_shape.dims(2);
  93   const int output_height = output_shape.dims(1);
  94   const int output_width = output_shape.dims(2);
  95   const int stride_height = params.stride_height;
  96   const int stride_width = params.stride_width;
  97   for (int batch = 0; batch < batches; ++batch)
  98   {
  99     for (int out_y = 0; out_y < output_height; ++out_y)
 100     {
 101       for (int out_x = 0; out_x < output_width; ++out_x)
 102       {
 103         for (int channel = 0; channel < depth; ++channel)
 104         {
 105           const int in_x_origin = (out_x * stride_width) - params.padding_values.width;
 106           const int in_y_origin = (out_y * stride_height) - params.padding_values.height;
 107           // Compute the boundaries of the filter region clamped so as to
 108           // ensure that the filter window fits in the input array.
 109           const int filter_x_start = std::max(0, -in_x_origin);
 110           const int filter_x_end = std::min(params.filter_width, input_width - in_x_origin);
 111           const int filter_y_start = std::max(0, -in_y_origin);
 112           const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin);
 113           T max = std::numeric_limits<T>::lowest();
 114           for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
 115           {
 116             for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
 117             {
 118               const int in_x = in_x_origin + filter_x;
 119               const int in_y = in_y_origin + filter_y;
 120
 121               const int input_data_offset =
 122                 ((batch * input_shape.dims(1) + in_y) * input_shape.dims(2) + in_x) *
 123                   input_shape.dims(3) +
 124                 channel;
 125
 126               max = std::max(max, input_data[input_data_offset]);
 127             }
 128           }
 129           max = std::max<T>(max, params.quantized_activation_min);
 130           max = std::min<T>(max, params.quantized_activation_max);
 131
 132           const int output_data_offset =
 133             ((batch * output_shape.dims(1) + out_y) * output_shape.dims(2) + out_x) *
 134               output_shape.dims(3) +
 135             channel;
 136
 137           output_data[output_data_offset] = static_cast<T>(max);
 138         }
 139       }
 140     }
 141   }
 142 }
 143
 144 } // namespace luci_interpreter_pal
 145
 146 #endif // LUCI_INTERPRETER_PAL_MAX_POOL_2D_COMMON_H