2 // Copyright (c) 2016-2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 #include "tensor_type.h"
19 #include "common_tools.h"
21 namespace kernel_selector
25 std::array<std::array<int, 5>, DataLayout::DataLayoutCount> DataTensor::dataChannelArray
28 // 0, 1, 2, 3, 4 means the ordering starts from X, then Y, then F, thenR, then B
29 // -1 means it's not used
31 {-1,-1, 0,-1, 1 }, // DataLayout::bf
32 {-1,-1, 1,-1, 0 }, // DataLayout::fb
33 { 0, 1, 2,-1, 3 }, // DataLayout::bfyx
34 { 2, 3, 1,-1, 0 }, // DataLayout::yxfb
35 { 1, 2, 0,-1, 3 }, // DataLayout::byxf
36 { 1, 2, 3,-1, 0 }, // DataLayout::fyxb
37 {-1,-1, 0,-1, 1 }, // DataLayout::bs_f_bsv8__af8
38 {-1,-1, 0,-1, 1 }, // DataLayout::bs_f_bsv16__af8
39 { 0, 1, 2,-1, 3 }, // DataLayout::bf8_xy16
40 { 0, 1, 2, 3, 4 }, // DataLayout::brfyx
41 { 2, 1, 0,-1, 3 }, // DataLayout::winograd_2x3_s1_data
42 { 1, 2, 0,-1, 3 }, // DataLayout::byxf_af32
43 { 1, 2, 0,-1, 3 }, // DataLayout::byx8_f8
44 { 0, 1, 3,-1, 2 }, // DataLayout::fs_bs_yx_bsv4_fsv32
45 { 0, 1, 2, -1, 3 },// DataLayout::b_fs_yx_fsv4
48 std::array<std::array<int, 6>, WeightsLayout::WeightsLayoutCount> WeightsTensor::weightsChannelArray
50 // X, Y, I, O, LX, LY,
51 { -1, -1, 0, 1, -1, -1 }, // WeightsLayout::oi
52 { -1, -1, 1, 0, -1, -1 }, // WeightsLayout::io
53 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::oiyx
54 { 1, 2, 0, 3, -1, -1 }, // WeightsLayout::oyxi
55 { 1, 2, 3, 0, -1, -1 }, // WeightsLayout::iyxo
56 { 2, 3, 1, 0, -1, -1 }, // WeightsLayout::yxio
57 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::os_iyx_osv16
58 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::os_iyx_osv32
59 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::os_iyx_osv64
60 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::os_iyx_osv16_rotate_180
61 { -1, -1, 0, 1, -1, -1 }, // WeightsLayout::os_i_osv8__ai8
62 { -1, -1, 0, 1, -1, -1 }, // WeightsLayout::os_i_osv16__ai8
63 { -1, -1, 0, 1, -1, -1 }, // WeightsLayout::os_i_osv16
64 { 1, 2, 3, 0, -1, -1 }, // WeightsLayout::i_yxs_os_yxsv2_osv16
65 { 1, 2, 3, 0, -1, -1 }, // WeightsLayout::iy_xs_os_xsv2_osv16__ao32
66 { 1, 2, 3, 0, -1, -1 }, // WeightsLayout::iy_xs_os_xsv2_osv8__ao32
67 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::image_2d_weights_c4_fyx_b
68 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::image_2d_weights_c1_b_fyx
69 { 3, 2, 1, 0, -1, -1 }, // WeightsLayout::winograd_2x3_s1_weights
70 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::winograd_2x3_s1_fused_weights
71 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::winograd_6x3_s1_fused_weights
72 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb
73 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb
74 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::os_is_yx_isa8_osv8_isv4
75 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::os_is_yx_isa8_osv8_isv4_swizzled_by_4
76 { 1, 2, 0, 3, -1, -1 }, // WeightsLayout::is_o_yx_isv32
77 { 1, 2, 0, 3, -1, -1 }, // WeightsLayout::is_o32_yx_isv32_swizzled_by_4
78 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::os_is_y_x8_osv8_isv4
79 { 0, 1, 2, 3, 4, 5 }, // WeightsLayout::bf_lyx_yx
80 { 0, 1, 2, 3, -1, -1 }, // WeightsLayout::os_is_yx_osv16_isv4
83 NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l)
85 std::vector<size_t> newDims = d;
87 // TOOD: it's not the right pitches. it's here in order to calculate physical size
91 assert(newDims.size() == 2);
92 newDims[0] = RoundUp(newDims[0], 8);
93 newDims[1] = RoundUp(newDims[1], 8);
96 assert(newDims.size() == 2);
97 newDims[0] = RoundUp(newDims[0], 8);
98 newDims[1] = RoundUp(newDims[1], 16);
101 assert(newDims.size() == 4);
102 newDims[1] = RoundUp(newDims[1], 8);
103 newDims[3] = RoundUp(newDims[2] * newDims[3], 16);
107 assert(newDims.size() == 4);
108 newDims[0] = RoundUp(newDims[0], 32);
111 assert(newDims.size() == 4);
112 newDims[0] = RoundUp(newDims[0], 4);
113 newDims[1] = RoundUp(newDims[1], 8);
115 case fs_bs_yx_bsv4_fsv32:
116 assert(newDims.size() == 4);
117 newDims[3] = RoundUp(newDims[3], 32);
118 newDims[2] = RoundUp(newDims[2], 4);
124 NDims ret(newDims.size());
127 for (size_t i = 0; i < newDims.size(); i++)
129 Pad p = { 0, newDims[i] - d[i] };
130 ret[i] = { d[i], pitch, p };
134 if (l == byxf_af32 || l == fs_bs_yx_bsv4_fsv32 || l == byx8_f4)
137 ret[1].pitch = ret[0].pitch * newDims[0];
138 ret[2].pitch = ret[1].pitch * newDims[1];
139 ret[3].pitch = ret[2].pitch * newDims[2];
140 ret[4].pitch = ret[3].pitch * newDims[3];
146 DataTensor DataTensor::TransformIgnorePadding(DataLayout l) const
148 const uint32_t src_channels = ChannelsCount(layout);
149 const uint32_t dst_channels = ChannelsCount(l);
151 const size_t src_x = X().v;
152 const size_t src_y = Y().v;
154 std::vector<size_t> vec(dst_channels);
155 if (src_channels == 2 && dst_channels == 2)
157 vec[Channelndex(l, DataChannelName::FEATURE)] = Feature().v;
158 vec[Channelndex(l, DataChannelName::BATCH)] = Batch().v;
160 else if (src_channels == 4 && dst_channels == 4)
162 vec[Channelndex(l, DataChannelName::X)] = X().v;
163 vec[Channelndex(l, DataChannelName::Y)] = Y().v;
164 vec[Channelndex(l, DataChannelName::FEATURE)] = Feature().v;
165 vec[Channelndex(l, DataChannelName::BATCH)] = Batch().v;
167 else if (src_channels == 2 && dst_channels == 4)
169 const size_t dst_ifm = Feature().v / (src_x*src_y);
170 const size_t dst_xy = Feature().v % (src_x*src_y);
171 const size_t dst_y = dst_xy / src_x;
172 const size_t dst_x = dst_xy % src_x;
173 vec[Channelndex(l, DataChannelName::X)] = dst_x;
174 vec[Channelndex(l, DataChannelName::Y)] = dst_y;
175 vec[Channelndex(l, DataChannelName::FEATURE)] = dst_ifm;
176 vec[Channelndex(l, DataChannelName::BATCH)] = Batch().v;
178 else if (src_channels == 4 && dst_channels == 2)
180 const size_t dst_ifm = Feature().v * src_x * src_y;
181 vec[Channelndex(l, DataChannelName::FEATURE)] = dst_ifm;
182 vec[Channelndex(l, DataChannelName::BATCH)] = Batch().v;
186 // TODO: implement ROI
190 return{ vec, dtype, l };
193 DataTensor DataTensor::FlattenFeatureAndSpatials() const
199 const auto f = Feature();
200 const auto b = Batch();
202 DataLayout targetLayout = Tensor::bf;
210 targetLayout = Tensor::fb;
212 // TODO: [FUTURE] Use C++17 [[fallthrough]] instead of code duplication to get portable warning avoidance.
213 if (f.pitch == y.v*x.v*x.pitch) // no padding in X/Y axis
218 throw std::runtime_error("Unsupported - cannot flatten with padding");
221 if (f.pitch == y.v*x.v*x.pitch) // no padding in X/Y axis
226 throw std::runtime_error("Unsupported - cannot flatten with padding");
229 targetLayout = Tensor::fb;
231 // TODO: [FUTURE] Use C++17 [[fallthrough]] instead of code duplication to get portable warning avoidance.
232 if ((x.pitch == f.pitch && y.pitch == x.v*x.pitch) || // YX - no Features (val/pitch)
233 (y.v == 1 && x.v == 1 && x.pitch == f.pitch && y.pitch == f.pitch) || // Feature only
234 (f.v * f.pitch == x.pitch && f.v * f.pitch == y.pitch && y.v == 1 && x.v == 1)) // Feature only
239 throw std::runtime_error("Unsupported - cannot flatten yxf to f if f/yx != 1");
242 if ((x.pitch == f.pitch && y.pitch == x.v*x.pitch) || // YX - no Features (val/pitch)
243 (y.v == 1 && x.v == 1 && x.pitch == f.pitch && y.pitch == f.pitch) || // Feature only
244 (f.v * f.pitch == x.pitch && f.v * f.pitch == y.pitch && y.v == 1 && x.v == 1)) // Feature only
249 throw std::runtime_error("Unsupported - cannot flatten yxf to f if f/yx != 1");
251 throw std::runtime_error("Unsupported - unsupported layout");
255 DataTensor res = TransformIgnorePadding(l);
257 if (l == DataLayout::bf)
259 res.dims[Channelndex(l, DataChannelName::BATCH)].pitch = b.pitch;
260 res.dims[Channelndex(l, DataChannelName::BATCH)].pad = b.pad;
264 res.dims[Channelndex(l, DataChannelName::FEATURE)].pitch = dims[Channelndex(l, DataChannelName::BATCH) + 1].pitch;
265 res.dims[Channelndex(l, DataChannelName::FEATURE)].pad = dims[Channelndex(l, DataChannelName::BATCH) + 1].pad;
271 NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l)
273 std::vector<size_t> newDims = d;
275 // TOOD: it's not the right pitches. it's here in order to calculate physical size
279 case os_iyx_osv16_rotate_180:
280 assert(newDims.size() == 4);
281 newDims[3] = RoundUp(newDims[3], 16);
284 assert(newDims.size() == 4);
285 newDims[3] = RoundUp(newDims[3], 32);
288 assert(newDims.size() == 4);
289 newDims[3] = RoundUp(newDims[3], 64);
292 assert(newDims.size() == 2);
293 newDims[0] = RoundUp(newDims[0], 8);
294 newDims[1] = RoundUp(newDims[1], 8);
296 case os_i_osv16__ai8:
297 assert(newDims.size() == 2);
298 newDims[0] = RoundUp(newDims[0], 8);
299 newDims[1] = RoundUp(newDims[1], 16);
302 assert(newDims.size() == 2);
303 newDims[1] = RoundUp(newDims[1], 16);
305 case i_yxs_os_yxsv2_osv16:
306 assert(newDims.size() == 4);
307 newDims[0] = RoundUp(newDims[0], 16);
309 case iy_xs_os_xsv2_osv16__ao32:
310 case iy_xs_os_xsv2_osv8__ao32:
311 assert(newDims.size() == 4);
312 newDims[0] = RoundUp(newDims[0], 32);
314 case os_is_yx_isa8_osv8_isv4:
315 assert(newDims.size() == 4);
316 newDims[3] = RoundUp(newDims[3], 8);
317 newDims[2] = RoundUp(newDims[2], 32);
319 case os_is_yx_isa8_osv8_isv4_swizzled_by_4:
320 assert(newDims.size() == 4);
321 newDims[3] = RoundUp(newDims[3], 32);
322 newDims[2] = RoundUp(newDims[2], 32);
325 assert(newDims.size() == 4);
326 newDims[0] = RoundUp(newDims[0], 32);
328 case is_o32_yx_isv32_swizzled_by_4:
329 assert(newDims.size() == 4);
330 newDims[0] = RoundUp(newDims[0], 32);
331 newDims[3] = RoundUp(newDims[3], 32);
333 case os_is_y_x8_osv8_isv4:
334 assert(newDims.size() == 4);
335 newDims[2] = RoundUp(newDims[2], 4);
336 newDims[3] = RoundUp(newDims[3], 8);
337 newDims[0] = RoundUp(newDims[0], 8);
339 case os_is_yx_osv16_isv4:
340 assert(newDims.size() == 4);
341 newDims[2] = RoundUp(newDims[2], 4);
342 newDims[3] = RoundUp(newDims[3], 16);
348 NDims ret(newDims.size());
351 for (size_t i = 0; i < newDims.size(); i++)
353 Pad p = { 0, newDims[i] - d[i] };
354 ret[i] = { d[i], pitch, p };
358 if (l == i_yxs_os_yxsv2_osv16)
360 ret[3].pitch = RoundUp(ret[1].v * ret[2].v, 2) * ret[1].pitch;
361 ret[2].pad.after = newDims[2] - ret[2].v;
363 else if (l == iy_xs_os_xsv2_osv16__ao32 ||
364 l == iy_xs_os_xsv2_osv8__ao32)
366 ret[2].pitch = RoundUp(ret[1].v, 2) * ret[1].pitch;
367 ret[1].pad.after = newDims[1] - ret[1].v;
369 ret[3].pitch = ret[2].v * ret[2].pitch;
370 ret[2].pad.after = newDims[2] - ret[2].v;
372 else if (l == os_is_yx_isa8_osv8_isv4 || l == os_is_yx_isa8_osv8_isv4_swizzled_by_4)
375 ret[1].pitch = ret[0].pitch * ret[0].v;
377 else if (l == bf_lyx_yx)
379 ret[2].pitch = ret[0].v * ret[1].v * ret[2].v * ret[3].v;
380 ret[3].pitch = ret[2].pitch * ret[5].v;
386 WeightsTensor WeightsTensor::TransformIgnorePadding(WeightsLayout l, WeightsType t) const
388 const uint32_t src_channels = ChannelsCount(layout);
389 const uint32_t dst_channels = ChannelsCount(l);
391 const size_t src_x = X().v;
392 const size_t src_y = Y().v;
394 std::vector<size_t> vec(dst_channels);
395 if (src_channels == 2 && dst_channels == 2)
397 vec[Channelndex(l, WeightsChannelName::IFM)] = IFM().v;
398 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
400 else if (src_channels == 4 && dst_channels == 4)
402 vec[Channelndex(l, WeightsChannelName::X)] = X().v;
403 vec[Channelndex(l, WeightsChannelName::Y)] = Y().v;
404 vec[Channelndex(l, WeightsChannelName::IFM)] = IFM().v;
405 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
407 //requirement for winograd 2x3
408 if (l == WeightsLayout::winograd_2x3_s1_weights || l == WeightsLayout::winograd_2x3_s1_fused_weights)
410 vec[Channelndex(l, WeightsChannelName::X)] = 4;
411 vec[Channelndex(l, WeightsChannelName::Y)] = 3;
413 else if (l == WeightsLayout::winograd_6x3_s1_fused_weights)
415 vec[Channelndex(l, WeightsChannelName::X)] = 8;
416 vec[Channelndex(l, WeightsChannelName::Y)] = 3;
419 else if (src_channels == 2 && dst_channels == 4)
421 const size_t dst_ifm = IFM().v / (src_x*src_y);
422 const size_t dst_xy = IFM().v % (src_x*src_y);
423 const size_t dst_y = dst_xy / src_x;
424 const size_t dst_x = dst_xy % src_x;
425 vec[Channelndex(l, WeightsChannelName::X)] = dst_x;
426 vec[Channelndex(l, WeightsChannelName::Y)] = dst_y;
427 vec[Channelndex(l, WeightsChannelName::IFM)] = dst_ifm;
428 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
430 else if (src_channels == 4 && dst_channels == 2)
432 const size_t dst_ifm = IFM().v * src_x * src_y;
433 vec[Channelndex(l, WeightsChannelName::IFM)] = dst_ifm;
434 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
436 else if (src_channels == 6 && dst_channels == 6)
438 vec[Channelndex(l, WeightsChannelName::X)] = IFM().v;
439 vec[Channelndex(l, WeightsChannelName::Y)] = OFM().v;
440 vec[Channelndex(l, WeightsChannelName::IFM)] = LX().v;
441 vec[Channelndex(l, WeightsChannelName::OFM)] = LY().v;
442 vec[Channelndex(l, WeightsChannelName::LX)] = X().v;
443 vec[Channelndex(l, WeightsChannelName::LY)] = Y().v;