runtime/onert/core/src/exec/IPermuteFunction.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef __ONERT_EXEC_I_PERMUTE_FUNCTION_H__
  18 #define __ONERT_EXEC_I_PERMUTE_FUNCTION_H__
  19
  20 #include "feature/IndexIterator.h"
  21 #include "feature/nchw/Reader.h"
  22 #include "feature/nchw/View.h"
  23 #include "feature/nhwc/Reader.h"
  24 #include "feature/nhwc/View.h"
  25
  26 #include "backend/ITensor.h"
  27 #include "exec/IFunction.h"
  28 #include "ir/Index.h"
  29 #include "ir/Shape.h"
  30 #include <memory>
  31 #include <typeinfo>
  32 #include "util/Utils.h"
  33 #include <vector>
  34 #include <unordered_map>
  35
  36 namespace onert
  37 {
  38 namespace exec
  39 {
  40
  41 inline void UpdateOffsets(::onert::backend::ITensor *src, ::onert::backend::ITensor *dst,
  42                           const ::onert::ir::Shape &loop_shape, std::vector<size_t> &src_offsets,
  43                           std::vector<size_t> &dst_offsets)
  44 {
  45   ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
  46     src_offsets.emplace_back(src->calcOffset(coords));
  47     dst_offsets.emplace_back(dst->calcOffset(coords));
  48   });
  49 }
  50
  51 inline void CopyStatic(const uint8_t *src_buffer, uint8_t *dst_buffer,
  52                        const std::vector<size_t> &src_offsets,
  53                        const std::vector<size_t> &dst_offsets, size_t copy_len)
  54 {
  55   assert(src_offsets.size() == dst_offsets.size());
  56   for (size_t i = 0; i < src_offsets.size(); ++i)
  57   {
  58     memcpy(dst_buffer + dst_offsets.at(i), src_buffer + src_offsets.at(i), copy_len);
  59   }
  60 }
  61
  62 inline void CopyDynamic(const ::onert::backend::ITensor *src, const ::onert::backend::ITensor *dst,
  63                         uint8_t *dst_buffer, const ::onert::ir::Shape &loop_shape, size_t copy_len)
  64 {
  65   ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
  66     // Copy src tensor's data to dst_buffer with calculated offset of dst tensor
  67     memcpy(dst_buffer + dst->calcOffset(coords), src->buffer() + src->calcOffset(coords), copy_len);
  68   });
  69 }
  70
  71 class IPermuteFunction : public IFunction
  72 {
  73 protected:
  74   enum class PermuteType
  75   {
  76     NHWC_TO_NCHW,
  77     NCHW_TO_NHWC,
  78     COPY
  79   };
  80
  81 public:
  82   virtual void run() override
  83   {
  84     // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
  85     assert(_src_tensors.size() == _dst_tensors.size());
  86     if (_src_tensors_offsets.size() == 0)
  87     {
  88       _src_tensors_offsets.resize(_src_tensors.size());
  89       _dst_tensors_offsets.resize(_dst_tensors.size());
  90     }
  91     assert(_src_tensors.size() == _src_tensors_offsets.size());
  92     assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
  93
  94     for (size_t i = 0; i < _src_tensors.size(); ++i)
  95     {
  96       auto src_tensor = _src_tensors.at(i);
  97       auto dst_tensor = _dst_tensors.at(i);
  98       auto &src_offsets = _src_tensors_offsets.at(i);
  99       auto &dst_offsets = _dst_tensors_offsets.at(i);
 100       if (src_tensor != dst_tensor)
 101       {
 102         const auto rank = src_tensor->num_dimensions();
 103         permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
 104       }
 105     }
 106   }
 107
 108   virtual void prepare() override { optimize(); }
 109
 110   virtual void optimize() = 0;
 111
 112 protected:
 113   void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank,
 114                std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
 115   {
 116     if (src_tensor->total_size() == 0)
 117     {
 118       assert(dst_tensor->total_size() == 0);
 119       return;
 120     }
 121
 122     assert(src_tensor != dst_tensor);
 123     assert(underlying_type(src_tensor->data_type()) == underlying_type(dst_tensor->data_type()));
 124     switch (src_tensor->data_type())
 125     {
 126       case ir::DataType::FLOAT32:
 127         permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
 128         break;
 129       case ir::DataType::INT32:
 130         permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
 131         break;
 132       case ir::DataType::UINT32:
 133         permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
 134         break;
 135       case ir::DataType::BOOL8:
 136       case ir::DataType::QUANT_UINT8_ASYMM:
 137       case ir::DataType::UINT8:
 138         permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
 139         break;
 140       case ir::DataType::QUANT_INT8_ASYMM:
 141       case ir::DataType::QUANT_INT8_SYMM:
 142         permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
 143         break;
 144       case ir::DataType::INT64:
 145         permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
 146         break;
 147       default:
 148         throw std::runtime_error("IPermuteFunction: Not supported data type");
 149         break;
 150     }
 151   }
 152
 153 private:
 154   // TODO make src const by proving const access()
 155   template <class T>
 156   void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank,
 157                std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
 158   {
 159     assert(src->total_size() != 0 && dst->total_size() != 0);
 160     // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
 161     if (dst->needMemoryMap() && !dst->is_subtensor())
 162     {
 163       // A assertion to check mapping without calling map()
 164       // Now there is no case where both src and dst have cl buffer.
 165       assert(!src->needMemoryMap());
 166
 167       if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
 168       {
 169         src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
 170       }
 171       else
 172       {
 173         // TODO Optimize this block in case of that padding size of dst is big.
 174         _buffers_map[dst].reserve(dst->total_size());
 175         auto dst_buffer = _buffers_map[dst].data();
 176         src->access([&](backend::ITensor &) {
 177           permute<T>(src, dst, rank, dst_buffer, dst->total_size(), src_offsets, dst_offsets);
 178         });
 179         dst->enqueueWriteBuffer(dst_buffer, false);
 180       }
 181     }
 182     else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
 183              !dst->has_padding() && src->layout() == dst->layout())
 184     {
 185       assert(!dst->needMemoryMap());
 186       dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
 187     }
 188     else
 189     {
 190       auto fn = [&](backend::ITensor &) {
 191         dst->access([&](backend::ITensor &) {
 192           permute<T>(src, dst, rank, dst->buffer(), dst->total_size(), src_offsets, dst_offsets);
 193         });
 194       };
 195       src->access(fn);
 196     }
 197   }
 198
 199   template <class T>
 200   void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank, uint8_t *dst_buffer,
 201                size_t dst_size, std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
 202   {
 203     assert(dst_buffer != nullptr);
 204     assert(dst_size == dst->total_size());
 205
 206     const auto permute_type = [&]() -> PermuteType {
 207       if (src->layout() == ir::Layout::NHWC && dst->layout() == ir::Layout::NCHW)
 208       {
 209         return PermuteType::NHWC_TO_NCHW;
 210       }
 211       else if (src->layout() == ir::Layout::NCHW && dst->layout() == ir::Layout::NHWC)
 212       {
 213         return PermuteType::NCHW_TO_NHWC;
 214       }
 215       else
 216       {
 217         return PermuteType::COPY;
 218       }
 219     }();
 220     if (rank == 4 && permute_type != PermuteType::COPY)
 221     {
 222       switch (permute_type)
 223       {
 224         case PermuteType::NHWC_TO_NCHW:
 225         {
 226           ir::FeatureShape shape;
 227           shape.N = dst->dimension(0);
 228           shape.C = dst->dimension(1);
 229           shape.H = dst->dimension(2);
 230           shape.W = dst->dimension(3);
 231
 232           typename feature::nchw::View<T>::Strides strides;
 233           const auto start_offset = dst->calcOffset({0, 0, 0, 0});
 234           strides.W = dst->dimension(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
 235           strides.H = dst->dimension(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
 236           strides.C = dst->dimension(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
 237           strides.N = dst->dimension(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
 238
 239           const feature::nhwc::Reader<T> from(src);
 240           feature::nchw::View<T> into(shape, strides,
 241                                       reinterpret_cast<T *>(dst_buffer + start_offset), dst_size);
 242           feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
 243             const auto value = from.at(batch, row, col, ch);
 244             into.at(batch, ch, row, col) = value;
 245           };
 246           break;
 247         }
 248         case PermuteType::NCHW_TO_NHWC:
 249         {
 250           ir::FeatureShape shape;
 251           shape.N = dst->dimension(0);
 252           shape.H = dst->dimension(1);
 253           shape.W = dst->dimension(2);
 254           shape.C = dst->dimension(3);
 255
 256           typename feature::nhwc::View<T>::Strides strides;
 257           const auto start_offset = dst->calcOffset({0, 0, 0, 0});
 258           strides.C = dst->dimension(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
 259           strides.W = dst->dimension(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
 260           strides.H = dst->dimension(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
 261           strides.N = dst->dimension(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
 262
 263           const feature::nchw::Reader<T> from(src);
 264           feature::nhwc::View<T> into(shape, strides,
 265                                       reinterpret_cast<T *>(dst_buffer + start_offset), dst_size);
 266           feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
 267             const auto value = from.at(batch, ch, row, col);
 268             into.at(batch, row, col, ch) = value;
 269           };
 270           break;
 271         }
 272         default:
 273         {
 274           throw std::runtime_error("Unsupported Permutation");
 275           break;
 276         }
 277       }
 278     }
 279     else if (!src->has_padding() && !dst->has_padding())
 280     {
 281       auto src_size = src->total_size();
 282       assert(src_size <= dst->total_size());
 283       memcpy(dst_buffer, src->buffer(), src_size);
 284     }
 285     else
 286     {
 287       auto loop_shape = src->getShape();
 288       const auto copy_axis = loop_shape.rank() - 1;
 289       const auto copy_len = loop_shape.dim(copy_axis) * sizeof(T);
 290       loop_shape.dim(copy_axis) = 1;
 291
 292       if (src->is_dynamic())
 293       {
 294         assert(dst->is_dynamic());
 295         CopyDynamic(src, dst, dst_buffer, loop_shape, copy_len);
 296       }
 297       else
 298       {
 299         // TODO Uncomment the assertion below
 300         // assert(!dst->is_dynamic() || dst is output of graph);
 301         if (src_offsets.size() == 0)
 302         {
 303           assert(dst_offsets.size() == 0);
 304
 305           auto loop_shape = src->getShape();
 306           const auto copy_axis = loop_shape.rank() - 1;
 307           loop_shape.dim(copy_axis) = 1;
 308           UpdateOffsets(src, dst, loop_shape, src_offsets, dst_offsets);
 309         }
 310         CopyStatic(src->buffer(), dst_buffer, src_offsets, dst_offsets, copy_len);
 311       }
 312     }
 313   }
 314
 315 protected:
 316   // NOTE The typeid expression is lvalue expression which refers to an object with static storage
 317   //      duration, of the polymorphic type const std::type_info or of some type derived from it.
 318   //      So std::type_info is non-copyable
 319   const std::type_info &underlying_type(ir::DataType type) const
 320   {
 321     switch (type)
 322     {
 323       case ir::DataType::FLOAT32:
 324         return typeid(float);
 325       case ir::DataType::INT32:
 326         return typeid(int32_t);
 327       case ir::DataType::UINT32:
 328         return typeid(uint32_t);
 329       case ir::DataType::INT64:
 330         return typeid(int64_t);
 331       case ir::DataType::BOOL8:
 332       case ir::DataType::QUANT_UINT8_ASYMM:
 333       case ir::DataType::UINT8:
 334         return typeid(uint8_t);
 335       case ir::DataType::QUANT_INT8_ASYMM:
 336       case ir::DataType::QUANT_INT8_SYMM:
 337         return typeid(int8_t);
 338       default:
 339         throw std::runtime_error("IPermuteFunction: Not supported data type");
 340     }
 341   }
 342
 343 protected:
 344   std::vector<backend::ITensor *> _src_tensors;
 345   std::vector<backend::ITensor *> _dst_tensors;
 346   std::vector<std::vector<size_t>> _src_tensors_offsets;
 347   std::vector<std::vector<size_t>> _dst_tensors_offsets;
 348   std::unordered_map<const backend::ITensor *, std::vector<uint8_t>> _buffers_map;
 349 };
 350
 351 } // namespace exec
 352 } // namespace onert
 353
 354 #endif // __ONERT_EXEC_I_PERMUTE_FUNCTION_H__