2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "BackendContext.h"
19 #include "TensorBuilder.h"
20 #include "KernelGenerator.h"
21 #include "Optimizer.h"
22 #include "util/logging.h"
24 #include "ir/OperandIndexMap.h"
25 #include "ir/OperandIndexSequence.h"
34 void BackendContext::initConsts()
36 for (auto &op : operation_list())
38 constant_initializer->setLayout(op.layout);
39 graph()->operations().at(op.index).accept(*constant_initializer);
42 for (auto ind : operand_list())
44 const auto &obj = graph()->operands().at(ind);
45 if (obj.isConstant() && !constant_initializer->exist(ind))
47 constant_initializer->registerDefaultInitializer(ind, obj);
51 constant_initializer->run();
54 void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
55 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
57 ir::OperandIndexMap<uint32_t> uses_map;
58 ir::OperandIndexMap<uint32_t> def_map;
59 ir::OperandIndexSequence constants;
62 for (auto ind : operand_list())
64 const auto &obj = graph()->operands().at(ind);
65 const auto &li = lower_info.operand.at(ind);
66 if (li->def_factors().getOnlyElement().backend() != backend())
69 // Ignore unused tensor
70 if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
72 VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
77 uses_map[ind] = obj.getUses().size();
78 def_map[ind] = obj.getDef().valid() ? 1 : 0;
81 constants.append(ind);
83 auto factor = li->def_factors().getOnlyElement();
84 if (!tensor_builder->isRegistered(ind))
86 // These tensors do not exist in any op_seq (No use and def)
87 const auto info = obj.info();
88 const auto backend_layout = factor.layout();
89 // TODO Change tensor info to have permuted shape
90 tensor_builder->registerTensorInfo(ind, info, backend_layout);
94 // Start scanning to do notify{First|Last}Use for each tensor
96 // If a tensor is a constant, increase the use of the tensor and allocate it first.
97 // Increasing use count here makes the tensor never be deallocated, i.e it they will be
99 VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
100 for (const auto &ind : constants)
103 tensor_builder->notifyFirstUse(ind);
106 // At each operation,
107 // 1. Scan DEF of outputs. If the DEF, allocate it
108 // 2. Scan DEF of inputs. If variable tensor, allocate it
109 // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
110 for (const auto op_seq_ind : order)
112 const auto &op_seq = op_seqs.at(op_seq_ind);
113 for (const auto &op_idx : op_seq.operations())
115 auto &op = graph()->operations().at(op_idx);
116 auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
117 auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
120 for (const auto &ind : op_outputs)
122 if (!tensor_builder->isRegistered(ind))
124 assert(def_map.find(ind) != def_map.end());
128 tensor_builder->notifyFirstUse(ind);
132 // Scan variable tensors
133 // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
134 // non-constant because of less memory usage by memory planning in here
135 for (const auto &ind : op_inputs)
137 if (!tensor_builder->isRegistered(ind))
139 const auto &operand = graph()->operands().at(ind);
140 if (operand.info().isVariable())
142 // The variable tensor with buffer is not supported yet
143 assert(operand.data() == nullptr);
144 assert(operand.getUses().size() == 1 && !operand.getDef().valid());
145 assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
146 lower_info.operand.at(ind)->use_factors().size() == 1);
147 assert(uses_map[ind] == 1 && def_map[ind] == 0);
148 tensor_builder->notifyFirstUse(ind);
152 for (const auto &ind : op_inputs)
154 if (!tensor_builder->isRegistered(ind))
156 assert(uses_map.find(ind) != uses_map.end());
157 assert(uses_map[ind] > 0);
159 if (uses_map[ind] == 0)
161 // plan for deallocation of static tensornode
162 tensor_builder->notifyLastUse(ind);
168 // Dispose and validate
169 for (const auto &ind : constants)
172 if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
174 tensor_builder->notifyLastUse(ind);
179 std::all_of(uses_map.begin(), uses_map.end(),
180 [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
183 std::all_of(def_map.begin(), def_map.end(),
184 [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
187 ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
188 const ir::OpSequences &op_seqs,
189 const ir::LowerInfoMap &lower_info)
191 optimizer->optimize();
193 for (const auto op_seq_ind : order)
195 const auto &op_seq = op_seqs.at(op_seq_ind);
196 auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
197 ir::Remove::DUPLICATED;
198 for (const auto op_ind : op_seq)
200 bool op_assigned = [&]() {
201 for (auto &op_info : operation_list())
202 if (op_info.index == op_ind)
209 const auto &op = graph()->operations().at(op_ind);
210 for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
212 if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
213 find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
215 const auto &operand_lower_info =
216 lower_info.operand.at(index)->def_factors().getOnlyElement();
218 // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
219 // op.getOutputs() of permute (CPU) returns tensor A
220 // but tensor A belongs to the backend of acl_cl.
221 // So, we have to make this tensor NOT registered for CPU.
222 if (operand_lower_info.backend() != backend())
225 const auto &obj = graph()->operands().at(index);
226 const auto frontend_layout = op_seq.getLayout();
227 const auto backend_layout = operand_lower_info.layout();
228 ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
229 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
230 tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
236 // TODO Get compiler options from compiler, and use it rather than getting it from Env
237 if (util::getConfigString(util::config::EXECUTOR) == "Linear")
239 planTensors(order, op_seqs, lower_info);
243 // For the executors that does not have fixed linear execution order:
244 // To make tensors never be deallocated, this is a workaround to use static memory planner
245 for (auto ind : operand_list())
247 if (tensor_builder->isRegistered(ind))
248 tensor_builder->notifyFirstUse(ind);
252 tensor_builder->prepare();
254 return tensor_registry.get();
257 FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
258 const ir::OpSequences &op_seqs)
262 for (auto op_seq_ind : order)
264 const auto &op_seq = op_seqs.at(op_seq_ind);
265 bool assigned = [&]() {
266 for (auto op_info : operation_list())
267 if (op_seq.exist(op_info.index))
273 auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
274 ret.emplace_back(op_seq_ind, std::move(fn_seq));
277 tensor_builder->allocate();
280 // NOTE For memory optimization, we want to free some operand data
281 for (auto ind : operand_list())
283 // TODO Remove const_cast
284 auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
290 auto &fn_seq = it.second;
291 fn_seq->iterate([&](exec::IFunction &ifunc) {
293 tensor_builder->postFunctionPrepare();
300 } // namespace acl_cl
301 } // namespace backend