runtime/onert/backend/acl_neon/BackendContext.cc

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "BackendContext.h"
  18
  19 #include "TensorBuilder.h"
  20 #include "KernelGenerator.h"
  21 #include "Optimizer.h"
  22 #include "util/logging.h"
  23 #include "ir/Index.h"
  24 #include "ir/OperandIndexMap.h"
  25 #include "ir/OperandIndexSequence.h"
  26
  27 namespace onert
  28 {
  29 namespace backend
  30 {
  31 namespace acl_neon
  32 {
  33
  34 void BackendContext::initConsts()
  35 {
  36   for (auto &op : operation_list())
  37   {
  38     constant_initializer->setLayout(op.layout);
  39     graph()->operations().at(op.index).accept(*constant_initializer);
  40   }
  41
  42   for (auto ind : operand_list())
  43   {
  44     const auto &obj = graph()->operands().at(ind);
  45     if (obj.isConstant() && !constant_initializer->exist(ind))
  46     {
  47       constant_initializer->registerDefaultInitializer(ind, obj);
  48     }
  49   }
  50
  51   constant_initializer->run();
  52 }
  53
  54 void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
  55                                  const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
  56 {
  57   ir::OperandIndexMap<uint32_t> uses_map;
  58   ir::OperandIndexMap<uint32_t> def_map;
  59   ir::OperandIndexSequence constants;
  60
  61   // Prepare scanning
  62   for (auto ind : operand_list())
  63   {
  64     const auto &obj = graph()->operands().at(ind);
  65     const auto &li = lower_info.operand.at(ind);
  66     if (li->def_factors().getOnlyElement().backend() != backend())
  67       continue;
  68
  69     // Ignore unused tensor
  70     if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
  71     {
  72       VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
  73                            << std::endl;
  74       return;
  75     }
  76
  77     uses_map[ind] = obj.getUses().size();
  78     def_map[ind] = obj.getDef().valid() ? 1 : 0;
  79
  80     if (obj.isConstant())
  81       constants.append(ind);
  82
  83     auto factor = li->def_factors().getOnlyElement();
  84     if (!tensor_builder->isRegistered(ind))
  85     {
  86       // These tensors do not exist in any op_seq (No use and def)
  87       const auto info = obj.info();
  88       const auto backend_layout = factor.layout();
  89       // TODO Change tensor info to have permuted shape
  90       tensor_builder->registerTensorInfo(ind, info, backend_layout);
  91     }
  92   }
  93
  94   // Start scanning to do notify{First|Last}Use for each tensor
  95
  96   // If a tensor is a constant, increase the use of the tensor and allocate it first.
  97   // Increasing use count here makes the tensor never be deallocated, i.e it they will be
  98   // deallocated last.
  99   VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
 100   for (const auto &ind : constants)
 101   {
 102     uses_map[ind]++;
 103     tensor_builder->notifyFirstUse(ind);
 104   }
 105
 106   // At each operation,
 107   // 1. Scan DEF of outputs. If the DEF, allocate it
 108   // 2. Scan DEF of inputs. If variable tensor, allocate it
 109   // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
 110   for (const auto op_seq_ind : order)
 111   {
 112     const auto &op_seq = op_seqs.at(op_seq_ind);
 113     for (const auto &op_idx : op_seq.operations())
 114     {
 115       auto &op = graph()->operations().at(op_idx);
 116       auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
 117       auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
 118
 119       // Define outputs
 120       for (const auto &ind : op_outputs)
 121       {
 122         if (!tensor_builder->isRegistered(ind))
 123           continue;
 124         assert(def_map.find(ind) != def_map.end());
 125         if (def_map[ind])
 126         {
 127           def_map[ind] = 0;
 128           tensor_builder->notifyFirstUse(ind);
 129         }
 130       }
 131
 132       // Scan variable tensors
 133       // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
 134       // non-constant because of less memory usage by memory planning in here
 135       for (const auto &ind : op_inputs)
 136       {
 137         if (!tensor_builder->isRegistered(ind))
 138           continue;
 139         const auto &operand = graph()->operands().at(ind);
 140         if (operand.info().isVariable())
 141         {
 142           // The variable tensor with buffer is not supported yet
 143           assert(operand.data() == nullptr);
 144           assert(operand.getUses().size() == 1 && !operand.getDef().valid());
 145           assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
 146                  lower_info.operand.at(ind)->use_factors().size() == 1);
 147           assert(uses_map[ind] == 1 && def_map[ind] == 0);
 148           tensor_builder->notifyFirstUse(ind);
 149         }
 150       }
 151
 152       for (const auto &ind : op_inputs)
 153       {
 154         if (!tensor_builder->isRegistered(ind))
 155           continue;
 156         assert(uses_map.find(ind) != uses_map.end());
 157         assert(uses_map[ind] > 0);
 158         uses_map[ind]--;
 159         if (uses_map[ind] == 0)
 160         {
 161           // plan for deallocation of static tensornode
 162           tensor_builder->notifyLastUse(ind);
 163         }
 164       }
 165     }
 166   }
 167
 168   // Dispose and validate
 169   for (const auto &ind : constants)
 170   {
 171     --uses_map[ind];
 172     if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
 173     {
 174       tensor_builder->notifyLastUse(ind);
 175     }
 176   }
 177
 178   assert(
 179       std::all_of(uses_map.begin(), uses_map.end(),
 180                   [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
 181
 182   assert(
 183       std::all_of(def_map.begin(), def_map.end(),
 184                   [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
 185 }
 186
 187 ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
 188                                             const ir::OpSequences &op_seqs,
 189                                             const ir::LowerInfoMap &lower_info)
 190 {
 191   optimizer->optimize();
 192
 193   for (const auto op_seq_ind : order)
 194   {
 195     const auto &op_seq = op_seqs.at(op_seq_ind);
 196     auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
 197                     ir::Remove::DUPLICATED;
 198     for (const auto op_ind : op_seq)
 199     {
 200       bool op_assigned = [&]() {
 201         for (auto &op_info : operation_list())
 202           if (op_info.index == op_ind)
 203             return true;
 204         return false;
 205       }();
 206       if (!op_assigned)
 207         continue;
 208
 209       const auto &op = graph()->operations().at(op_ind);
 210       for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
 211       {
 212         if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
 213             find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
 214         {
 215           const auto &operand_lower_info =
 216               lower_info.operand.at(index)->def_factors().getOnlyElement();
 217
 218           // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
 219           // op.getOutputs() of permute (CPU) returns tensor A
 220           // but tensor A belongs to the backend of acl_cl.
 221           // So, we have to make this tensor NOT registered for CPU.
 222           if (operand_lower_info.backend() != backend())
 223             continue;
 224
 225           const auto &obj = graph()->operands().at(index);
 226           const auto frontend_layout = op_seq.getLayout();
 227           const auto backend_layout = operand_lower_info.layout();
 228           ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
 229                                        obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
 230           tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
 231         }
 232       }
 233     }
 234   }
 235
 236   // TODO Get compiler options from compiler, and use it rather than getting it from Env
 237   if (util::getConfigString(util::config::EXECUTOR) == "Linear")
 238   {
 239     planTensors(order, op_seqs, lower_info);
 240   }
 241   else
 242   {
 243     // For the executors that does not have fixed linear execution order:
 244     // To make tensors never be deallocated, this is a workaround to use static memory planner
 245     for (auto ind : operand_list())
 246     {
 247       if (tensor_builder->isRegistered(ind))
 248         tensor_builder->notifyFirstUse(ind);
 249     }
 250   }
 251
 252   tensor_builder->prepare();
 253
 254   return tensor_registry.get();
 255 }
 256
 257 FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
 258                                        const ir::OpSequences &op_seqs)
 259 {
 260   FunctionMap ret;
 261
 262   for (auto op_seq_ind : order)
 263   {
 264     const auto &op_seq = op_seqs.at(op_seq_ind);
 265     bool assigned = [&]() {
 266       for (auto op_info : operation_list())
 267         if (op_seq.exist(op_info.index))
 268           return true;
 269       return false;
 270     }();
 271     if (!assigned)
 272       continue;
 273     auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
 274     ret.emplace_back(op_seq_ind, std::move(fn_seq));
 275   }
 276
 277   tensor_builder->allocate();
 278   initConsts();
 279
 280   // NOTE For memory optimization, we want to free some operand data
 281   for (auto ind : operand_list())
 282   {
 283     // TODO Remove const_cast
 284     auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
 285     obj.releaseData();
 286   }
 287
 288   for (auto &it : ret)
 289   {
 290     auto &fn_seq = it.second;
 291     fn_seq->iterate([&](exec::IFunction &ifunc) {
 292       ifunc.prepare();
 293       tensor_builder->postFunctionPrepare();
 294     });
 295   }
 296
 297   return ret;
 298 }
 299
 300 } // namespace neon
 301 } // namespace backend
 302 } // namespace onert