2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "WhileLayer.h"
19 #include <backend/ITensor.h>
20 #include "exec/ExecutorBase.h"
21 #include <misc/polymorphic_downcast.h>
22 #include "PermuteLayer.h"
33 WhileLayer::WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
34 const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
35 const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
36 const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
37 const ir::SubgraphIndex &cond_subg_index,
38 const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map)
39 : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
40 _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors},
41 _output_tensors{output_tensors}, _outputs_dyn_alloc_info{outputs_dyn_alloc_info},
42 _executor_map{executor_map}
44 // At this point, executor_map may not have executors of cond subg and body subg
47 void WhileLayer::run()
49 // Copy "_input_tensors" -> "cond subg inputs"
51 // Start loop while output of cond subg is ture
52 // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
53 // outputs" -> "body subg inputs" in the second or more iterations
55 // // Copy "body subg outputs" -> "cond subg inputs"
57 // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
59 auto cond_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
60 _executor_map->at(_cond_subg_index).get());
61 auto body_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
62 _executor_map->at(_body_subg_index).get());
64 const auto &cond_graph = cond_exec->graph();
65 const auto &cond_inputs_dyn_alloc = cond_exec->getInputsDynamicAllocInfo();
66 const auto &body_graph = body_exec->graph();
67 const auto &body_inputs_dyn_alloc = body_exec->getInputsDynamicAllocInfo();
69 std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
70 std::vector<std::shared_ptr<backend::ITensor>> cond_input_tensors;
71 std::vector<std::shared_ptr<backend::ITensor>> body_input_tensors;
72 std::vector<std::shared_ptr<backend::ITensor>> body_output_tensors;
73 std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
75 // Add only used tensors in cond subgraph
76 assert(cond_graph.getInputs().size() == _input_tensors.size());
77 assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
78 for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
80 const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
81 if (cond_input.getUses().size() > 0)
83 input_tensors.emplace_back(_input_tensors.at(i));
84 cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
87 const auto permute_op_input_to_cond_input =
88 std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, cond_inputs_dyn_alloc);
90 // Add only used tensors among outputs of while operation
91 assert(_output_indices.size() == _input_tensors.size());
92 assert(_output_indices.size() == _output_tensors.size());
93 input_tensors.clear();
94 output_tensors.clear();
95 for (size_t i = 0; i < _output_indices.size(); ++i)
97 const auto &output_index = _output_indices.at(i);
98 const auto &output = _graph.operands().at(output_index);
99 if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
101 input_tensors.emplace_back(_input_tensors.at(i));
102 output_tensors.emplace_back(_output_tensors.at(i));
105 const auto permute_op_input_to_op_output =
106 std::make_shared<PermuteLayer>(input_tensors, output_tensors, _outputs_dyn_alloc_info);
108 // Add all tensors with unused tensors in body subgraph because unused input tensors will be
109 // copied output tensors in body subgraph
110 assert(_input_tensors.size() == body_exec->getInputTensors().size());
111 input_tensors = _input_tensors;
112 body_input_tensors = body_exec->getInputTensors();
113 const auto permute_op_input_to_body_input =
114 std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, body_inputs_dyn_alloc);
116 // Add only used tensors in cond subgraph
117 assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size());
118 assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
119 body_output_tensors.clear();
120 cond_input_tensors.clear();
121 for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
123 const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
124 if (cond_input.getUses().size() > 0)
126 body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
127 cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
130 const auto permute_body_output_to_cond_input = std::make_shared<PermuteLayer>(
131 body_output_tensors, cond_input_tensors, cond_inputs_dyn_alloc);
133 // Add only used tensors in body subgraph
134 assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size());
135 assert(body_graph.getInputs().size() == body_exec->getInputTensors().size());
136 body_output_tensors.clear();
137 body_input_tensors.clear();
138 for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i)
140 const auto &body_input_index = body_graph.getInputs().at(i);
141 const auto &body_input = body_graph.operands().at(body_input_index);
142 if (body_input.getUses().size() > 0 &&
143 !body_exec->graph().getOutputs().contains(body_input_index))
145 body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
146 body_input_tensors.emplace_back(body_exec->getInputTensors().at(i));
149 const auto permute_body_output_to_body_input = std::make_shared<PermuteLayer>(
150 body_output_tensors, body_input_tensors, body_inputs_dyn_alloc);
152 // Add only used tensors among outputs of while operation
153 assert(_output_indices.size() == body_exec->getOutputTensors().size());
154 assert(_output_indices.size() == _output_tensors.size());
155 body_output_tensors.clear();
156 output_tensors.clear();
157 for (size_t i = 0; i < _output_indices.size(); ++i)
159 const auto &output_index = _output_indices.at(i);
160 const auto &output = _graph.operands().at(output_index);
161 if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
163 body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
164 output_tensors.emplace_back(_output_tensors.at(i));
167 const auto permute_body_output_to_op_output =
168 std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _outputs_dyn_alloc_info);
170 // Remove copying of unused tensor
171 permute_op_input_to_cond_input->prepare();
172 permute_op_input_to_op_output->prepare();
173 permute_op_input_to_body_input->prepare();
174 permute_body_output_to_cond_input->prepare();
175 permute_body_output_to_body_input->prepare();
176 permute_body_output_to_op_output->prepare();
178 cond_exec->execute(_input_tensors, permute_op_input_to_cond_input);
180 assert(cond_exec->getOutputTensors().size() == 1);
181 auto &cond_output_tensor = cond_exec->getOutputTensors().at(0);
182 auto getResultCond = [](backend::ITensor *tensor) -> bool {
184 tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
188 const auto body_execute_with_op_inputs = [&]() {
189 body_exec->execute(_input_tensors, permute_op_input_to_body_input);
192 const auto body_execute_with_body_outputs = [&]() {
193 body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input);
196 std::function<void()> body_execute = body_execute_with_op_inputs;
197 const auto cond_execute = [&]() {
198 cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input);
200 auto permute_to_outputs_fn = permute_op_input_to_op_output;
202 // Loop while Cond subgraph's output is true
203 while (getResultCond(cond_output_tensor.get()))
207 body_execute = body_execute_with_body_outputs;
208 permute_to_outputs_fn = permute_body_output_to_op_output;
210 permute_to_outputs_fn->run();
213 } // namespace kernel
214 } // namespace controlflow
215 } // namespace backend