compiler/locomotiv/src/Node/MaxPool2D.cpp

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #include "NodeExecution.h"
  19
  20 #include "NodeDataImpl.h"
  21 #include "NodeDomain.h"
  22 #include "Validation.h"
  23
  24 #include <nncc/core/ADT/tensor/Shape.h>
  25 #include <nncc/core/ADT/tensor/Buffer.h>
  26 #include <nncc/core/ADT/tensor/Index.h>
  27 #include <nncc/core/ADT/tensor/IndexEnumerator.h>
  28 #include <nncc/core/ADT/tensor/LexicalLayout.h>
  29
  30 #include <limits>
  31 #include <cassert>
  32 #include <algorithm>
  33 #include <stdexcept>
  34
  35 namespace
  36 {
  37
  38 /**
  39  * @brief Compute 1D output size based on given 1D arguments.
  40  *
  41  * @param whole_pad Sum of front and back pad
  42  */
  43 inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size,
  44                                  uint32_t stride)
  45 {
  46   assert((image_size + whole_pad - filter_size) % stride == 0);
  47   return (image_size + whole_pad - filter_size) / stride + 1;
  48 }
  49
  50 using nncc::core::ADT::tensor::Buffer;
  51 using nncc::core::ADT::tensor::Shape;
  52 using nncc::core::ADT::tensor::Index;
  53 using nncc::core::ADT::tensor::IndexEnumerator;
  54 using nncc::core::ADT::tensor::LexicalLayout;
  55 using nncc::core::ADT::tensor::make_buffer;
  56
  57 template <typename T>
  58 nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d,
  59                                              const Buffer<T> *ifm_buf)
  60 {
  61   auto ifm_shape = ifm_buf->shape();
  62
  63   const uint32_t batches = ifm_shape.dim(0);
  64   const uint32_t depth = ifm_shape.dim(3);
  65
  66   const uint32_t ifm_height = ifm_shape.dim(1);
  67   const uint32_t ifm_width = ifm_shape.dim(2);
  68
  69   const uint32_t window_height = maxpool2d->window()->vertical();
  70   const uint32_t window_width = maxpool2d->window()->horizontal();
  71
  72   const uint32_t stride_height = maxpool2d->stride()->vertical();
  73   const uint32_t stride_width = maxpool2d->stride()->horizontal();
  74
  75   const uint32_t pad_top = maxpool2d->pad()->top();
  76   const uint32_t pad_bottom = maxpool2d->pad()->bottom();
  77
  78   const uint32_t pad_left = maxpool2d->pad()->left();
  79   const uint32_t pad_right = maxpool2d->pad()->right();
  80
  81   const uint32_t output_height =
  82       compute_out_size(ifm_height, pad_top + pad_bottom, window_height, stride_height);
  83   const uint32_t output_width =
  84       compute_out_size(ifm_width, pad_left + pad_right, window_width, stride_width);
  85
  86   // prepare output buffer
  87   Shape output_shape{batches, output_height, output_width, depth};
  88   auto output_buf = make_buffer<T, LexicalLayout>(output_shape);
  89
  90   for (uint32_t batch = 0; batch < batches; ++batch)
  91   {
  92     for (uint32_t out_y = 0; out_y < output_height; ++out_y)
  93     {
  94       for (uint32_t out_x = 0; out_x < output_width; ++out_x)
  95       {
  96         for (uint32_t channel = 0; channel < depth; ++channel)
  97         {
  98           const int in_x_origin = (out_x * stride_width) - pad_left;
  99           const int in_y_origin = (out_y * stride_height) - pad_top;
 100
 101           // Compute the boundaries of the filter region clamped so as to
 102           // ensure that the filter window fits in the input array.
 103           const uint32_t filter_x_start = std::max(0, -in_x_origin);
 104           const uint32_t filter_x_end = std::min(window_width, ifm_width - in_x_origin);
 105
 106           const uint32_t filter_y_start = std::max(0, -in_y_origin);
 107           const uint32_t filter_y_end = std::min(window_height, ifm_height - in_y_origin);
 108
 109           T max = std::numeric_limits<T>::lowest();
 110
 111           for (uint32_t filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y)
 112           {
 113             for (uint32_t filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x)
 114             {
 115               const uint32_t in_x = in_x_origin + filter_x;
 116               const uint32_t in_y = in_y_origin + filter_y;
 117               max = std::max(max, ifm_buf->at(Index({batch, in_y, in_x, channel})));
 118             }
 119           }
 120
 121           output_buf.at(Index({batch, out_y, out_x, channel})) = max;
 122         }
 123       }
 124     }
 125   }
 126
 127   return output_buf;
 128 }
 129
 130 } // namespace
 131
 132 namespace
 133 {
 134
 135 using namespace locomotiv;
 136
 137 void execute_node(loco::MaxPool2D *maxpool2d)
 138 {
 139   auto ifm_data = annot_data(maxpool2d->ifm());
 140
 141   validate(ifm_data, "Can't find input data of MaxPool2D");
 142   validate(ifm_data->shape()->rank() == 4, "IFM rank should be 4");
 143   validate(annot_domain(maxpool2d->ifm()) == loco::Domain::Feature,
 144            "ifm of MaxPool2D is not Feature");
 145
 146   std::unique_ptr<NodeData> maxpool2d_data = nullptr;
 147
 148   switch (ifm_data->dtype())
 149   {
 150     case loco::DataType::FLOAT32:
 151     {
 152       auto ifm_buf = ifm_data->as_f32_bufptr();
 153
 154       auto maxpool2d_buf = maxPool2D<float>(maxpool2d, ifm_buf);
 155
 156       maxpool2d_data = make_data(maxpool2d_buf);
 157       break;
 158     }
 159     default:
 160       throw std::runtime_error("NYI for this DataType");
 161   }
 162
 163   assert(maxpool2d_data != nullptr);
 164
 165   annot_data(maxpool2d, std::move(maxpool2d_data));
 166   annot_domain(maxpool2d, loco::Domain::Feature);
 167 }
 168
 169 } // namespace
 170
 171 namespace locomotiv
 172 {
 173
 174 void NodeExecution::execute(loco::MaxPool2D *maxpool2d) { execute_node(maxpool2d); }
 175
 176 } // namespace locomotiv