2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef __ONERT_EXEC_I_PERMUTE_FUNCTION_H__
18 #define __ONERT_EXEC_I_PERMUTE_FUNCTION_H__
20 #include "feature/IndexIterator.h"
21 #include "feature/nchw/Reader.h"
22 #include "feature/nchw/View.h"
23 #include "feature/nhwc/Reader.h"
24 #include "feature/nhwc/View.h"
26 #include "backend/ITensor.h"
27 #include "exec/IFunction.h"
32 #include "util/Utils.h"
34 #include <unordered_map>
41 inline void UpdateOffsets(::onert::backend::ITensor *src, ::onert::backend::ITensor *dst,
42 const ::onert::ir::Shape &loop_shape, std::vector<size_t> &src_offsets,
43 std::vector<size_t> &dst_offsets)
45 ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
46 src_offsets.emplace_back(src->calcOffset(coords));
47 dst_offsets.emplace_back(dst->calcOffset(coords));
51 inline void CopyStatic(const uint8_t *src_buffer, uint8_t *dst_buffer,
52 const std::vector<size_t> &src_offsets,
53 const std::vector<size_t> &dst_offsets, size_t copy_len)
55 assert(src_offsets.size() == dst_offsets.size());
56 for (size_t i = 0; i < src_offsets.size(); ++i)
58 memcpy(dst_buffer + dst_offsets.at(i), src_buffer + src_offsets.at(i), copy_len);
62 inline void CopyDynamic(const ::onert::backend::ITensor *src, const ::onert::backend::ITensor *dst,
63 uint8_t *dst_buffer, const ::onert::ir::Shape &loop_shape, size_t copy_len)
65 ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
66 // Copy src tensor's data to dst_buffer with calculated offset of dst tensor
67 memcpy(dst_buffer + dst->calcOffset(coords), src->buffer() + src->calcOffset(coords), copy_len);
71 class IPermuteFunction : public IFunction
74 enum class PermuteType
82 virtual void run() override
84 // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
85 assert(_src_tensors.size() == _dst_tensors.size());
86 if (_src_tensors_offsets.size() == 0)
88 _src_tensors_offsets.resize(_src_tensors.size());
89 _dst_tensors_offsets.resize(_dst_tensors.size());
91 assert(_src_tensors.size() == _src_tensors_offsets.size());
92 assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
94 for (size_t i = 0; i < _src_tensors.size(); ++i)
96 auto src_tensor = _src_tensors.at(i);
97 auto dst_tensor = _dst_tensors.at(i);
98 auto &src_offsets = _src_tensors_offsets.at(i);
99 auto &dst_offsets = _dst_tensors_offsets.at(i);
100 if (src_tensor != dst_tensor)
102 const auto rank = src_tensor->num_dimensions();
103 permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
108 virtual void prepare() override { optimize(); }
110 virtual void optimize() = 0;
113 void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank,
114 std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
116 if (src_tensor->total_size() == 0)
118 assert(dst_tensor->total_size() == 0);
122 assert(src_tensor != dst_tensor);
123 assert(underlying_type(src_tensor->data_type()) == underlying_type(dst_tensor->data_type()));
124 switch (src_tensor->data_type())
126 case ir::DataType::FLOAT32:
127 permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
129 case ir::DataType::INT32:
130 permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
132 case ir::DataType::UINT32:
133 permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
135 case ir::DataType::BOOL8:
136 case ir::DataType::QUANT_UINT8_ASYMM:
137 case ir::DataType::UINT8:
138 permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
140 case ir::DataType::QUANT_INT8_ASYMM:
141 case ir::DataType::QUANT_INT8_SYMM:
142 permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
144 case ir::DataType::INT64:
145 permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
148 throw std::runtime_error("IPermuteFunction: Not supported data type");
154 // TODO make src const by proving const access()
156 void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank,
157 std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
159 assert(src->total_size() != 0 && dst->total_size() != 0);
160 // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
161 if (dst->needMemoryMap() && !dst->is_subtensor())
163 // A assertion to check mapping without calling map()
164 // Now there is no case where both src and dst have cl buffer.
165 assert(!src->needMemoryMap());
167 if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
169 src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
173 // TODO Optimize this block in case of that padding size of dst is big.
174 _buffers_map[dst].reserve(dst->total_size());
175 auto dst_buffer = _buffers_map[dst].data();
176 src->access([&](backend::ITensor &) {
177 permute<T>(src, dst, rank, dst_buffer, dst->total_size(), src_offsets, dst_offsets);
179 dst->enqueueWriteBuffer(dst_buffer, false);
182 else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
183 !dst->has_padding() && src->layout() == dst->layout())
185 assert(!dst->needMemoryMap());
186 dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
190 auto fn = [&](backend::ITensor &) {
191 dst->access([&](backend::ITensor &) {
192 permute<T>(src, dst, rank, dst->buffer(), dst->total_size(), src_offsets, dst_offsets);
200 void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank, uint8_t *dst_buffer,
201 size_t dst_size, std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
203 assert(dst_buffer != nullptr);
204 assert(dst_size == dst->total_size());
206 const auto permute_type = [&]() -> PermuteType {
207 if (src->layout() == ir::Layout::NHWC && dst->layout() == ir::Layout::NCHW)
209 return PermuteType::NHWC_TO_NCHW;
211 else if (src->layout() == ir::Layout::NCHW && dst->layout() == ir::Layout::NHWC)
213 return PermuteType::NCHW_TO_NHWC;
217 return PermuteType::COPY;
220 if (rank == 4 && permute_type != PermuteType::COPY)
222 switch (permute_type)
224 case PermuteType::NHWC_TO_NCHW:
226 ir::FeatureShape shape;
227 shape.N = dst->dimension(0);
228 shape.C = dst->dimension(1);
229 shape.H = dst->dimension(2);
230 shape.W = dst->dimension(3);
232 typename feature::nchw::View<T>::Strides strides;
233 const auto start_offset = dst->calcOffset({0, 0, 0, 0});
234 strides.W = dst->dimension(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
235 strides.H = dst->dimension(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
236 strides.C = dst->dimension(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
237 strides.N = dst->dimension(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
239 const feature::nhwc::Reader<T> from(src);
240 feature::nchw::View<T> into(shape, strides,
241 reinterpret_cast<T *>(dst_buffer + start_offset), dst_size);
242 feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
243 const auto value = from.at(batch, row, col, ch);
244 into.at(batch, ch, row, col) = value;
248 case PermuteType::NCHW_TO_NHWC:
250 ir::FeatureShape shape;
251 shape.N = dst->dimension(0);
252 shape.H = dst->dimension(1);
253 shape.W = dst->dimension(2);
254 shape.C = dst->dimension(3);
256 typename feature::nhwc::View<T>::Strides strides;
257 const auto start_offset = dst->calcOffset({0, 0, 0, 0});
258 strides.C = dst->dimension(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
259 strides.W = dst->dimension(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
260 strides.H = dst->dimension(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
261 strides.N = dst->dimension(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
263 const feature::nchw::Reader<T> from(src);
264 feature::nhwc::View<T> into(shape, strides,
265 reinterpret_cast<T *>(dst_buffer + start_offset), dst_size);
266 feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
267 const auto value = from.at(batch, ch, row, col);
268 into.at(batch, row, col, ch) = value;
274 throw std::runtime_error("Unsupported Permutation");
279 else if (!src->has_padding() && !dst->has_padding())
281 auto src_size = src->total_size();
282 assert(src_size <= dst->total_size());
283 memcpy(dst_buffer, src->buffer(), src_size);
287 auto loop_shape = src->getShape();
288 const auto copy_axis = loop_shape.rank() - 1;
289 const auto copy_len = loop_shape.dim(copy_axis) * sizeof(T);
290 loop_shape.dim(copy_axis) = 1;
292 if (src->is_dynamic())
294 assert(dst->is_dynamic());
295 CopyDynamic(src, dst, dst_buffer, loop_shape, copy_len);
299 // TODO Uncomment the assertion below
300 // assert(!dst->is_dynamic() || dst is output of graph);
301 if (src_offsets.size() == 0)
303 assert(dst_offsets.size() == 0);
305 auto loop_shape = src->getShape();
306 const auto copy_axis = loop_shape.rank() - 1;
307 loop_shape.dim(copy_axis) = 1;
308 UpdateOffsets(src, dst, loop_shape, src_offsets, dst_offsets);
310 CopyStatic(src->buffer(), dst_buffer, src_offsets, dst_offsets, copy_len);
316 // NOTE The typeid expression is lvalue expression which refers to an object with static storage
317 // duration, of the polymorphic type const std::type_info or of some type derived from it.
318 // So std::type_info is non-copyable
319 const std::type_info &underlying_type(ir::DataType type) const
323 case ir::DataType::FLOAT32:
324 return typeid(float);
325 case ir::DataType::INT32:
326 return typeid(int32_t);
327 case ir::DataType::UINT32:
328 return typeid(uint32_t);
329 case ir::DataType::INT64:
330 return typeid(int64_t);
331 case ir::DataType::BOOL8:
332 case ir::DataType::QUANT_UINT8_ASYMM:
333 case ir::DataType::UINT8:
334 return typeid(uint8_t);
335 case ir::DataType::QUANT_INT8_ASYMM:
336 case ir::DataType::QUANT_INT8_SYMM:
337 return typeid(int8_t);
339 throw std::runtime_error("IPermuteFunction: Not supported data type");
344 std::vector<backend::ITensor *> _src_tensors;
345 std::vector<backend::ITensor *> _dst_tensors;
346 std::vector<std::vector<size_t>> _src_tensors_offsets;
347 std::vector<std::vector<size_t>> _dst_tensors_offsets;
348 std::unordered_map<const backend::ITensor *, std::vector<uint8_t>> _buffers_map;
354 #endif // __ONERT_EXEC_I_PERMUTE_FUNCTION_H__