runtime/onert/backend/acl_common/AclTensorBuilder.h

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
  18 #define __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
  19
  20 #include <memory>
  21 #include <queue>
  22
  23 #include <arm_compute/core/Types.h>
  24 #include "ir/OperandIndexMap.h"
  25 #include <ir/Operands.h>
  26 #include "AclTensorManager.h"
  27 #include "AclTensorRegistry.h"
  28 #include <memory>
  29 #include "ParentInfo.h"
  30 #include <util/Utils.h>
  31
  32 namespace onert
  33 {
  34 namespace backend
  35 {
  36 namespace acl_common
  37 {
  38
  39 enum class UsesType
  40 {
  41   FIRST,
  42   LAST
  43 };
  44
  45 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
  46 {
  47 public:
  48   using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
  49
  50   AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
  51
  52   /**
  53    * @brief     Register tensor information to allocate on ACL-CL backend
  54    * @param[in] ind    Operand index
  55    * @param[in] info   Tensor information
  56    * @param[in] layout Tensor data layout
  57    */
  58   void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
  59                           ir::Layout backend_layout);
  60
  61   void notifyFirstUse(const ir::OperandIndex &);
  62   void notifyLastUse(const ir::OperandIndex &);
  63
  64   bool isRegistered(const ir::OperandIndex &) const;
  65
  66   void prepare(void);
  67   void allocate();
  68   void postFunctionPrepare();
  69
  70   T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
  71
  72   void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
  73   {
  74     assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
  75                                                                 : true);
  76     _uses_count_map[index] = num_uses;
  77   }
  78
  79   void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
  80   {
  81     _parent_map = std::move(parent_map);
  82   }
  83
  84   bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
  85
  86   /**
  87    * @brief     Check child tensor is allocated as subtensor of parent tensor
  88    * @param[in] parent  Index of parent
  89    * @param[in] child   Index of child
  90    * @return    @c true if child is allocated as subtensor of parent, otherwise @c false
  91    */
  92   bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
  93
  94 private:
  95   void buildTensors(void);
  96   ir::OperandIndex findRootParent(ir::OperandIndex index);
  97
  98 private:
  99   const ir::Operands &_operands;
 100   ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
 101   ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
 102   ir::OperandIndexMap<size_t> _uses_count_map;
 103
 104   std::unique_ptr<T_AclTensorManager> _tensor_mgr;
 105
 106   // for linear executor
 107   std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
 108
 109   // Extra info for concat elimination
 110   ir::OperandIndexMap<ParentInfo> _parent_map;
 111 };
 112
 113 } // namespace acl_common
 114 } // namespace backend
 115 } // namespace onert
 116
 117 #include <cassert>
 118 #include <stack>
 119
 120 #include "Convert.h"
 121
 122 #include "util/logging.h"
 123
 124 namespace onert
 125 {
 126 namespace backend
 127 {
 128 namespace acl_common
 129 {
 130
 131 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 132 AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
 133                                                                      T_AclTensorManager *tensor_mgr)
 134     : _operands{operands}, _tensor_mgr{tensor_mgr}
 135 {
 136   assert(_tensor_mgr);
 137 }
 138
 139 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 140 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::registerTensorInfo(
 141     const ir::OperandIndex &ind, const ir::OperandInfo &info, ir::Layout backend_layout)
 142 {
 143   assert(_tensor_mgr->constTensors().size() == 0);
 144   assert(_tensor_mgr->nonconstTensors().size() == 0);
 145
 146   _uses_count_map[ind] = _operands.at(ind).getUses().size();
 147
 148   if (_parent_map.count(ind) == 0)
 149   {
 150     // Normal Tensors
 151     _tensor_info_map.emplace(ind, info);
 152     _tensor_layout_map.insert({ind, backend_layout});
 153   }
 154   else
 155   {
 156     // SubTensors
 157     assert(!info.isConstant() && "Subtensors of constants are not supported yet.");
 158
 159     // Update offset info and emplace
 160     auto &parent_info = _parent_map[ind];
 161     const auto &obj = _operands.at(ind);
 162     auto parent_index = parent_info.parent;
 163     auto &offset = parent_info.coordinates;
 164     auto frontend_layout = parent_info.frontend_layout;
 165
 166     assert(obj.shape().rank() <= ir::Shape::MAX_RANK);
 167     auto shape = obj.shape();
 168     if (_operands.at(parent_index).shape().rank() >= 4 && frontend_layout == ir::Layout::NHWC &&
 169         backend_layout == ir::Layout::NCHW)
 170     {
 171       // Permutation changing layout beyond 4-D is not supported yet
 172       const auto parent_rank = _operands.at(parent_index).shape().rank();
 173       assert(parent_rank == 4);
 174       shape.extendRank(parent_rank);
 175       offset = {offset[0], offset[3], offset[1], offset[2]};
 176     }
 177     else if (_operands.at(parent_index).shape().rank() >= 4 &&
 178              frontend_layout == ir::Layout::NHWC && backend_layout == ir::Layout::NCHW)
 179     {
 180       // Permutation changing layout beyond 4-D is not supported yet
 181       const auto parent_rank = _operands.at(parent_index).shape().rank();
 182       assert(parent_rank == 4);
 183       shape.extendRank(parent_rank);
 184       offset = {offset[0], offset[2], offset[3], offset[1]};
 185     }
 186     auto new_shape = permuteShape(shape, frontend_layout, backend_layout);
 187     auto oi = ir::OperandInfo::createStaticInfo(new_shape, obj.typeInfo());
 188     _tensor_info_map.emplace(ind, oi);
 189   }
 190 }
 191
 192 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 193 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyFirstUse(const ir::OperandIndex &ind)
 194 {
 195   _lifetime_seq.emplace_back(UsesType::FIRST, ind);
 196 }
 197
 198 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 199 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::notifyLastUse(const ir::OperandIndex &ind)
 200 {
 201   _lifetime_seq.emplace_back(UsesType::LAST, ind);
 202 }
 203
 204 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 205 bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isRegistered(
 206     const ir::OperandIndex &ind) const
 207 {
 208   return _tensor_info_map.find(ind) != _tensor_info_map.end();
 209 }
 210
 211 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 212 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void)
 213 {
 214   buildTensors();
 215 }
 216
 217 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 218 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
 219 {
 220   // Update lifetime sequence to apply subtensor optimization
 221
 222   std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
 223   std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
 224       [&](ir::OperandIndex ind) -> ir::OperandIndex & {
 225     ir::OperandIndex &ret = root_map[ind];
 226
 227     // We know the root parent value already
 228     if (ret.valid())
 229       return ret;
 230
 231     auto itr = _parent_map.find(ind);
 232     if (itr == _parent_map.end())
 233     {
 234       // If there is no parent, let's store the value of itself
 235       return ret = ind;
 236     }
 237     else
 238     {
 239       return ret = find_root(itr->second.parent);
 240     }
 241   };
 242
 243   ir::OperandIndexMap<bool> first_use_check;
 244   ir::OperandIndexMap<bool> last_use_check;
 245   std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
 246   for (size_t i = 0; i < _lifetime_seq.size(); i++)
 247   {
 248     auto &entry = _lifetime_seq[i];
 249     if (entry.first != UsesType::FIRST)
 250       continue;
 251     auto root_ind = find_root(entry.second);
 252     if (first_use_check[root_ind])
 253       continue;
 254     first_use_check[root_ind] = true;
 255     lifetime_map[i] = {UsesType::FIRST, root_ind};
 256   }
 257
 258   for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
 259   {
 260     auto &entry = _lifetime_seq[i];
 261     if (entry.first != UsesType::LAST)
 262       continue;
 263     auto root_ind = find_root(entry.second);
 264     if (last_use_check[root_ind])
 265       continue;
 266     last_use_check[root_ind] = true;
 267     lifetime_map[i] = {UsesType::LAST, root_ind};
 268   }
 269
 270   for (auto &entry : lifetime_map)
 271   {
 272     auto &use = entry.second;
 273     auto use_type = use.first;
 274     auto use_index = use.second;
 275     assert(use_index.valid());
 276     if (use_type == UsesType::FIRST)
 277       _tensor_mgr->startLifetime(use_index);
 278     else
 279       _tensor_mgr->finishLifetime(use_index);
 280   }
 281
 282   _tensor_mgr->allocateConsts();
 283
 284   // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
 285   //      After refactoring BackendContext we can uncomment this
 286   // assert(_tensor_info_map.size() ==
 287   //       _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map +
 288   //       _parent_map.size());
 289   _tensor_mgr->allocateNonconsts();
 290
 291   _tensor_mgr->allocateInternalBufferManager();
 292 }
 293
 294 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 295 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(void)
 296 {
 297   _tensor_mgr->tryDeallocConstants();
 298 }
 299
 300 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 301 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildTensors(void)
 302 {
 303   assert(_tensor_mgr->constTensors().size() == 0);
 304   assert(_tensor_mgr->nonconstTensors().size() == 0);
 305
 306   // Normal tensors
 307   for (auto &entry : _tensor_info_map)
 308   {
 309     auto ind = entry.first;
 310     if (_parent_map.count(ind) > 0)
 311       continue;
 312
 313     const auto &info = entry.second;
 314     const auto &backend_layout = _tensor_layout_map[ind];
 315     auto tensor_info =
 316         asTensorInfo(info.shape(), info.typeInfo(), ir::Layout::UNKNOWN, backend_layout, true);
 317     _tensor_mgr->buildTensor(ind, tensor_info, info.shape().rank(), info.isConstant(),
 318                              _uses_count_map[ind]);
 319   }
 320
 321   // Subtensors
 322   assert(_tensor_mgr->nonconstSubtensors().size() == 0);
 323   // TODO Iterate `_parent_map` instead, once the optimizer bug is fixed
 324   //      `Optimizer` iterates the entire OpSequences, so there is a bug if iterating _parent_map
 325   for (auto &entry : _tensor_info_map)
 326   {
 327     auto ind = entry.first;
 328     if (_parent_map.count(ind) == 0)
 329       continue;
 330
 331     // To make subtensor, parent tensor must be made first
 332     // For this condition, use stack
 333     //  1) Push one subtensor index to stack (iterate subtensors)
 334     //  2) If tensor at stack top is already made, pop and go to 4)
 335     //  3) If tensor pushed at 1) is not made, check parent tensor
 336     //    3-1) If parent tensor is already made, we can make child tensor
 337     //         Make child tensor and pop, go to 4)
 338     //    3-2) If parent tensor is not made, we can't make child tensor yet
 339     //         Push parent tensor index to stack and return to 4)
 340     //  4) If stack is empty, return to 1), else return to 2)
 341     auto &subtensors = _tensor_mgr->nonconstSubtensors();
 342
 343     std::stack<ir::OperandIndex> stack;
 344     stack.push(ind);
 345
 346     while (!stack.empty())
 347     {
 348       const auto current = stack.top();
 349       const auto &tensor_info = _tensor_info_map.at(current);
 350       const auto &parent_info = _parent_map.at(current);
 351
 352       // Already generated SubTensor
 353       if (subtensors.find(current) != subtensors.end())
 354       {
 355         stack.pop();
 356         continue;
 357       }
 358
 359       auto parent = parent_info.parent;
 360       std::shared_ptr<T_ITensor> parent_tensor = _tensor_mgr->findTensorAsParent(parent);
 361       if (!parent_tensor)
 362       {
 363         // Cannot find allocated parent tensor: allocate parent first
 364         assert(_parent_map.count(parent) > 0);
 365         stack.push(parent);
 366         continue;
 367       }
 368       assert(parent_tensor != nullptr);
 369
 370       // Child's type should be same with parent
 371       assert(tensor_info.typeInfo().offset() ==
 372              parent_tensor->info()->quantization_info().uniform().offset);
 373       assert(tensor_info.typeInfo().scale() ==
 374              parent_tensor->info()->quantization_info().uniform().scale);
 375       assert(tensor_info.typeInfo().type() == parent_tensor->data_type());
 376
 377       // NOTE SubTensor's layout must be the same with layout of parent tensor
 378       const auto &root_parent = findRootParent(parent);
 379       const auto &backend_layout = _tensor_layout_map[root_parent];
 380
 381       auto shape = asTensorShape(tensor_info.shape(), ir::Layout::UNKNOWN, backend_layout, true);
 382       ::arm_compute::Coordinates coordinates =
 383           asTensorCoordinate(parent_info.coordinates, ir::Layout::UNKNOWN, backend_layout);
 384       _tensor_mgr->buildSubtensor(parent, current, shape, coordinates, tensor_info.shape().rank(),
 385                                   true);
 386       stack.pop();
 387     }
 388   }
 389 }
 390
 391 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 392 bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
 393     const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
 394 {
 395   for (auto &cand : seq)
 396   {
 397     if (!isSubTensorOf(parent, cand))
 398     {
 399       return false;
 400     }
 401   }
 402   return true;
 403 }
 404
 405 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 406 bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::isSubTensorOf(
 407     const ir::OperandIndex &parent, const ir::OperandIndex &child)
 408 {
 409   auto itr = _parent_map.find(child);
 410   if (itr == _parent_map.end())
 411   {
 412     return false;
 413   }
 414
 415   return itr->second.parent == parent;
 416 }
 417
 418 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 419 ir::OperandIndex
 420 AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::findRootParent(ir::OperandIndex ind)
 421 {
 422   if (_parent_map.find(ind) == _parent_map.end())
 423     return ind;
 424
 425   auto parent_ind = _parent_map.at(ind).parent;
 426   return findRootParent(parent_ind);
 427 }
 428
 429 } // namespace acl_common
 430 } // namespace backend
 431 } // namespace onert
 432
 433 #endif // __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__