2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "ExecutorBase.h"
19 #include "backend/ITensor.h"
20 #include "backend/controlflow/UserTensor.h"
21 #include "backend/cpu_common/Tensor.h"
22 #include "util/logging.h"
29 ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
30 const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
31 const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
32 const compiler::TensorBuilders &tensor_builders)
33 : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
34 _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
36 // TODO Fix the way of knowing whether it is primary or not
37 bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
38 if (!primary_executor)
40 auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
41 std::vector<std::shared_ptr<backend::ITensor>> list;
42 for (auto ind : ind_seq)
44 std::shared_ptr<backend::ITensor> tensor;
45 for (auto &tensor_builder : tensor_builders)
47 auto tensor_registry = tensor_builder->tensorRegistry();
48 assert(tensor_registry);
49 tensor = tensor_registry->getNativeITensor(ind);
50 if (tensor != nullptr)
52 if (tensor_builder->supportDynamicTensor())
54 DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
55 _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
60 assert(tensor != nullptr);
61 list.push_back(tensor);
65 auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
66 std::vector<std::shared_ptr<backend::ITensor>> list;
67 for (auto ind : ind_seq)
69 std::shared_ptr<backend::ITensor> tensor;
70 for (auto &tensor_builder : tensor_builders)
72 auto tensor_registry = tensor_builder->tensorRegistry();
73 assert(tensor_registry);
74 tensor = tensor_registry->getNativeITensor(ind);
75 if (tensor != nullptr)
77 if (tensor_builder->supportDynamicTensor())
79 DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
80 _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
85 assert(tensor != nullptr);
86 list.push_back(tensor);
90 _input_tensors = build_input_tensor_list(_graph.getInputs());
91 _output_tensors = build_output_tensor_list(_graph.getOutputs());
95 // If primary graph, all the inputs and outputs belong to controlflow backend
96 auto cf_dyn_tensor_builder = tensor_builders.getControlflowTensorBuilder();
97 assert(cf_dyn_tensor_builder);
99 assert(input_tensors.size() == _graph.getInputs().size());
100 assert(output_tensors.size() == _graph.getOutputs().size());
101 for (uint32_t i = 0; i < input_tensors.size(); i++)
103 auto tensor = input_tensors[i];
104 auto ind = _graph.getInputs().at(i);
105 DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
106 _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
108 for (uint32_t i = 0; i < output_tensors.size(); i++)
110 auto tensor = output_tensors[i];
111 auto ind = _graph.getOutputs().at(i);
112 DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
113 _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
117 // Prepare each TensorManager on each backend
118 for (auto &tensor_builder : tensor_builders)
120 auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
121 if (s_tensor_manager != nullptr)
122 _tensor_mgrs.insert(std::move(s_tensor_manager));
124 if (tensor_builder->supportDynamicTensor())
126 auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
127 if (d_tensor_manager != nullptr)
128 _tensor_mgrs.insert(std::move(d_tensor_manager));
133 void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
134 const std::shared_ptr<IPermuteFunction> &pre_fn)
136 // For thread-safe, use mutex
137 // TODO: if all used backends on this executor are thread-safe,
138 // do not need to use mutex (otherwise, use mutex)
139 // Deadlock occurs when an Executor is called recursively.
140 std::lock_guard<std::mutex> lock(_mutex);
142 assert(src_tensors.size() == _graph.getInputs().size());
143 assert(src_tensors.size() == _input_tensors.size());
144 for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
146 // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
147 // This code find the info to allocate dynamic tensor, and allocate memory based on the source
148 // tensor's shape set by caller.
149 const auto src_tensor = src_tensors[n];
150 auto input_tensor = _input_tensors[n];
151 // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
152 if (src_tensor != nullptr && input_tensor != nullptr)
154 auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[n]);
155 const auto orig_input_shape = input_tensor->getShape();
156 const auto changed_input_shape =
157 convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
158 if (orig_input_shape != changed_input_shape)
160 if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
162 // The input_tensor is a dynamic tensor of backend that doesn't support dynamic tensor
163 throw std::runtime_error("Unknown dim is found at execution time for a backend that "
164 "does not support dynamic tensor");
168 input_tensor->set_dynamic();
174 // TODO Move calling permute_fn.run() into executeImpl()
181 void ExecutorBase::execute(const IODescription &desc)
183 // For thread-safe, use mutex
184 // TODO: if all used backends on this executor are thread-safe,
185 // do not need to use mutex (otherwise, use mutex)
186 std::lock_guard<std::mutex> lock(_mutex);
189 assert(_input_tensors.size() == desc.inputs.size());
190 for (uint32_t i = 0; i < _input_tensors.size(); ++i)
192 // TODO Remove dynamic_cast
193 auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
195 auto input_shape = desc.input_shape_signature.find(ir::IOIndex{i});
196 if (input_shape != desc.input_shape_signature.end())
198 tensor->set_dynamic();
199 tensor->setShape(input_shape->second);
201 // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
202 tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
203 desc.inputs[i]->size);
205 handleDynamicInputTensor(ir::IOIndex{i}, desc);
208 assert(_output_tensors.size() == desc.outputs.size());
209 for (uint32_t i = 0; i < _output_tensors.size(); ++i)
211 // TODO Remove dynamic_cast
212 auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_output_tensors[i]);
214 tensor->set_dynamic(); // It can't be resized but shape could change
215 // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
216 tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.outputs[i]->buffer)),
217 desc.outputs[i]->size);
222 // Update output(s) desc
223 for (uint32_t n = 0; n < _graph.getOutputs().size(); ++n)
225 ir::IOIndex output_index{n};
227 if (desc.outputs.at(n) == nullptr)
231 auto &output = *desc.outputs.at(n);
233 // set shape of outputDesc to tensor shape since tensor can be dynamic
234 const auto output_tensor_shape = _output_tensors[n]->getShape();
236 convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
241 * @brief Changes tensor shape and allocate memory
242 * if input shape was changed by nnfw_set_input_tensorinfo()
245 * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
248 * at (a), operand is static, tensor is static - memory dealloc is not needed
249 * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
250 * at (b), operand is static, tensor is dynamic - memory dealloc is needed
252 * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
255 * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
256 * since it has not been allocated yet
257 * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
259 void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
261 auto shape_sig_found = desc.input_shape_signature.find(io_ind);
262 if (shape_sig_found != desc.input_shape_signature.end())
264 auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
265 if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
266 throw std::runtime_error("Unknown dim is found at execution time for a backend that "
267 "does not support dynamic tensor");
269 auto changed_input_shape = shape_sig_found->second;
270 auto operand_ind = dyn_alloc_info->second.ind;
272 dyn_alloc_info->second.dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
276 bool ExecutorBase::hasDynamicInput()
278 for (auto &tensor : _input_tensors)
280 if (tensor->is_dynamic())