compiler/locomotiv/src/Node/TransposedConv2D.cpp

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #include "NodeExecution.h"
  19
  20 #include "NodeDataImpl.h"
  21 #include "NodeDomain.h"
  22 #include "Validation.h"
  23
  24 #include <nncc/core/ADT/tensor/Shape.h>
  25 #include <nncc/core/ADT/tensor/Buffer.h>
  26 #include <nncc/core/ADT/tensor/Index.h>
  27 #include <nncc/core/ADT/tensor/IndexEnumerator.h>
  28 #include <nncc/core/ADT/tensor/LexicalLayout.h>
  29
  30 #include <cassert>
  31 #include <stdexcept>
  32
  33 namespace
  34 {
  35
  36 using nncc::core::ADT::tensor::Buffer;
  37 using nncc::core::ADT::tensor::Shape;
  38 using nncc::core::ADT::tensor::Index;
  39 using nncc::core::ADT::tensor::IndexEnumerator;
  40 using nncc::core::ADT::tensor::LexicalLayout;
  41 using nncc::core::ADT::tensor::make_buffer;
  42
  43 /**
  44  * @brief Compute 1D output size for transposed convolution based on given 1D arguments.
  45  *
  46  * @param whole_pad  Sum of front and rear pad
  47  */
  48 inline uint32_t compute_transposed_out_size(uint32_t input_size, uint32_t whole_pad,
  49                                             uint32_t filter_size, uint32_t stride)
  50 {
  51   return stride * (input_size - 1) + filter_size - whole_pad;
  52 }
  53
  54 /**
  55  * @brief Calculates TransposedConv2D
  56  * @note  Both input_buf and filter_buf have NHWC format
  57  */
  58 template <typename RET_T, typename IFM_T, typename FIL_T>
  59 Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
  60                              const Buffer<IFM_T> *input_buf, const Buffer<FIL_T> *filter_buf)
  61 {
  62   auto input_shape = input_buf->shape();
  63   auto filter_shape = filter_buf->shape();
  64
  65   locomotiv::validate(input_shape.rank() == 4, "ifm rank must be 4");
  66   locomotiv::validate(filter_shape.rank() == 4, "filter rank must be 4");
  67   locomotiv::validate(input_shape.dim(3) /* depth of input */ ==
  68                           filter_shape.dim(3) /* depth of filter */,
  69                       "channel value mismatch");
  70
  71   const uint32_t input_height = input_shape.dim(1);
  72   const uint32_t input_width = input_shape.dim(2);
  73
  74   const uint32_t filter_height = filter_shape.dim(1);
  75   const uint32_t filter_width = filter_shape.dim(2);
  76
  77   const uint32_t stride_width = tr_conv2d->stride()->horizontal();
  78   const uint32_t stride_height = tr_conv2d->stride()->vertical();
  79
  80   const uint32_t pad_top = tr_conv2d->pad()->top();
  81   const uint32_t pad_bottom = tr_conv2d->pad()->bottom();
  82
  83   const uint32_t pad_left = tr_conv2d->pad()->left();
  84   const uint32_t pad_right = tr_conv2d->pad()->right();
  85
  86   // TODO Support dilations
  87
  88   const uint32_t output_height =
  89       compute_transposed_out_size(input_height, pad_top + pad_bottom, filter_height, stride_height);
  90   const uint32_t output_width =
  91       compute_transposed_out_size(input_width, pad_left + pad_right, filter_width, stride_width);
  92
  93   const uint32_t batches = input_shape.dim(0);
  94   const uint32_t input_depth = input_shape.dim(3);
  95   const uint32_t output_depth = filter_shape.dim(0); // count of filter
  96
  97   Shape output_shape{batches, output_height, output_width, output_depth};
  98   auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape);
  99
 100   // initialize output
 101   for (IndexEnumerator e{output_shape}; e.valid(); e.advance())
 102   {
 103     const auto &index = e.current();
 104     output_buf.at(index) = static_cast<RET_T>(0);
 105   }
 106
 107   // Loop through input elements one at a time.
 108   for (uint32_t batch = 0; batch < batches; ++batch)
 109   {
 110     for (uint32_t in_y = 0; in_y < input_height; ++in_y)
 111     {
 112       for (uint32_t in_x = 0; in_x < input_width; ++in_x)
 113       {
 114         for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel)
 115         {
 116           // Loop through the output elements it will influence
 117           const int out_x_origin = (in_x * stride_width) - pad_left;
 118           const int out_y_origin = (in_y * stride_height) - pad_top;
 119           for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y)
 120           {
 121             for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x)
 122             {
 123               for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel)
 124               {
 125                 // Compute output element location
 126                 const int out_x = out_x_origin + filter_x;
 127                 const int out_y = out_y_origin + filter_y;
 128                 // We cannot accumulate out of bounds
 129                 if ((out_x >= 0) && ((unsigned)out_x < output_width) && (out_y >= 0) &&
 130                     ((unsigned)out_y < output_height))
 131                 {
 132                   auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel}));
 133                   auto filter_value =
 134                       filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
 135                   output_buf.at(Index({batch, (unsigned)out_y, (unsigned)out_x, out_channel})) +=
 136                       input_value * filter_value;
 137                 }
 138               }
 139             }
 140           }
 141         }
 142       }
 143     }
 144   }
 145   return output_buf;
 146 }
 147
 148 } // namespace
 149
 150 namespace
 151 {
 152
 153 using namespace locomotiv;
 154
 155 void execute_node(loco::TransposedConv2D *tr_conv2d)
 156 {
 157   auto ifm_data = annot_data(tr_conv2d->ifm());
 158   auto ker_data = annot_data(tr_conv2d->ker());
 159
 160   validate(ifm_data, "Can't find input data of TransposedConv2D");
 161   validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4");
 162
 163   validate(ker_data, "Can't find kernel data of TransposedConv2D");
 164   validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4");
 165
 166   validate(annot_domain(tr_conv2d->ifm()) == loco::Domain::Feature,
 167            "IFM of TransposedConv2D is not feature");
 168   validate(annot_domain(tr_conv2d->ker()) == loco::Domain::Filter,
 169            "Kernel of TransposedConv2D is not filter");
 170
 171   std::unique_ptr<NodeData> tr_conv2d_result = nullptr;
 172
 173   if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
 174   {
 175     auto ifm_buf = ifm_data->as_f32_bufptr();
 176     auto ker_buf = ker_data->as_f32_bufptr();
 177
 178     auto tr_conv2d_buf = calc_tr_conv2D<float, float, float>(tr_conv2d, ifm_buf, ker_buf);
 179
 180     tr_conv2d_result = make_data(tr_conv2d_buf);
 181   }
 182   else
 183     throw std::runtime_error("NYI for these DataTypes");
 184
 185   assert(tr_conv2d_result != nullptr);
 186
 187   annot_data(tr_conv2d, std::move(tr_conv2d_result));
 188   annot_domain(tr_conv2d, loco::Domain::Feature);
 189 }
 190
 191 } // namespace
 192
 193 namespace locomotiv
 194 {
 195
 196 void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) { execute_node(tr_conv2d); }
 197
 198 } // namespace locomotiv