Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / common / tensor_type.cpp
1 /*
2 // Copyright (c) 2016-2019 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <cstddef>
18 #include "tensor_type.h"
19 #include "common_tools.h"
20
21 namespace kernel_selector
22 {
23     namespace Tensor
24     {
25         std::array<std::array<int, 5>, DataLayout::DataLayoutCount> DataTensor::dataChannelArray
26         { {
27             // explaination:
28             // 0, 1, 2, 3, 4 means the ordering starts from X, then Y, then F, thenR, then B
29             // -1 means it's not used
30             //X, Y, F, R, B
31             {-1,-1, 0,-1, 1 }, // DataLayout::bf
32             {-1,-1, 1,-1, 0 }, // DataLayout::fb
33             { 0, 1, 2,-1, 3 }, // DataLayout::bfyx
34             { 2, 3, 1,-1, 0 }, // DataLayout::yxfb
35             { 1, 2, 0,-1, 3 }, // DataLayout::byxf
36             { 1, 2, 3,-1, 0 }, // DataLayout::fyxb
37             {-1,-1, 0,-1, 1 }, // DataLayout::bs_f_bsv8__af8
38             {-1,-1, 0,-1, 1 }, // DataLayout::bs_f_bsv16__af8
39             { 0, 1, 2,-1, 3 }, // DataLayout::bf8_xy16
40             { 0, 1, 2, 3, 4 }, // DataLayout::brfyx
41             { 2, 1, 0,-1, 3 }, // DataLayout::winograd_2x3_s1_data
42             { 1, 2, 0,-1, 3 }, // DataLayout::byxf_af32
43             { 1, 2, 0,-1, 3 }, // DataLayout::byx8_f8
44             { 0, 1, 3,-1, 2 }, // DataLayout::fs_bs_yx_bsv4_fsv32
45             { 0, 1, 2, -1, 3 },// DataLayout::b_fs_yx_fsv4
46         } };
47
48         std::array<std::array<int, 6>, WeightsLayout::WeightsLayoutCount> WeightsTensor::weightsChannelArray
49         { {
50             // X, Y,   I,  O, LX, LY,
51             { -1, -1,  0,  1, -1, -1 }, // WeightsLayout::oi
52             { -1, -1,  1,  0, -1, -1 }, // WeightsLayout::io
53             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::oiyx
54             {  1,  2,  0,  3, -1, -1 }, // WeightsLayout::oyxi
55             {  1,  2,  3,  0, -1, -1 }, // WeightsLayout::iyxo
56             {  2,  3,  1,  0, -1, -1 }, // WeightsLayout::yxio
57             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_iyx_osv16
58             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_iyx_osv32
59             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_iyx_osv64
60             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_iyx_osv16_rotate_180
61             { -1, -1,  0,  1, -1, -1 }, // WeightsLayout::os_i_osv8__ai8
62             { -1, -1,  0,  1, -1, -1 }, // WeightsLayout::os_i_osv16__ai8
63             { -1, -1,  0,  1, -1, -1 }, // WeightsLayout::os_i_osv16
64             {  1,  2,  3,  0, -1, -1 }, // WeightsLayout::i_yxs_os_yxsv2_osv16
65             {  1,  2,  3,  0, -1, -1 }, // WeightsLayout::iy_xs_os_xsv2_osv16__ao32
66             {  1,  2,  3,  0, -1, -1 }, // WeightsLayout::iy_xs_os_xsv2_osv8__ao32
67             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::image_2d_weights_c4_fyx_b
68             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::image_2d_weights_c1_b_fyx
69             {  3,  2,  1,  0, -1, -1 }, // WeightsLayout::winograd_2x3_s1_weights
70             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::winograd_2x3_s1_fused_weights
71             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::winograd_6x3_s1_fused_weights
72             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb
73             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb
74             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_is_yx_isa8_osv8_isv4
75             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_is_yx_isa8_osv8_isv4_swizzled_by_4
76             {  1,  2,  0,  3, -1, -1 }, // WeightsLayout::is_o_yx_isv32
77             {  1,  2,  0,  3, -1, -1 }, // WeightsLayout::is_o32_yx_isv32_swizzled_by_4
78             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_is_y_x8_osv8_isv4
79             {  0,  1,  2,  3,  4,  5 }, // WeightsLayout::bf_lyx_yx
80             {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_is_yx_osv16_isv4
81         } };
82
83         NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l)
84         {
85             std::vector<size_t> newDims = d;
86
87             // TOOD: it's not the right pitches. it's here in order to calculate physical size
88             switch (l)
89             {
90             case bs_f_bsv8__af8:
91                 assert(newDims.size() == 2);
92                 newDims[0] = RoundUp(newDims[0], 8);
93                 newDims[1] = RoundUp(newDims[1], 8);
94                 break;
95             case bs_f_bsv16__af8:
96                 assert(newDims.size() == 2);
97                 newDims[0] = RoundUp(newDims[0], 8);
98                 newDims[1] = RoundUp(newDims[1], 16);
99                 break;
100             case bf8_xy16:
101                 assert(newDims.size() == 4);
102                 newDims[1] = RoundUp(newDims[1], 8);
103                 newDims[3] = RoundUp(newDims[2] * newDims[3], 16);
104                 newDims[2] = 1;
105                 break;
106             case byxf_af32:
107                 assert(newDims.size() == 4);
108                 newDims[0] = RoundUp(newDims[0], 32);
109                 break;
110             case byx8_f4:
111                 assert(newDims.size() == 4);
112                 newDims[0] = RoundUp(newDims[0], 4);
113                 newDims[1] = RoundUp(newDims[1], 8);
114                 break;
115             case fs_bs_yx_bsv4_fsv32:
116                 assert(newDims.size() == 4);
117                 newDims[3] = RoundUp(newDims[3], 32);
118                 newDims[2] = RoundUp(newDims[2], 4);
119                 break;
120             default:
121                 break;
122             }
123
124             NDims ret(newDims.size());
125             size_t pitch = 1;
126
127             for (size_t i = 0; i < newDims.size(); i++)
128             {
129                 Pad p = { 0, newDims[i] - d[i] };
130                 ret[i] = { d[i], pitch, p };
131                 pitch *= newDims[i];
132             }
133
134             if (l == byxf_af32 || l == fs_bs_yx_bsv4_fsv32 || l == byx8_f4)
135             {
136                 ret[0].pitch = 1;
137                 ret[1].pitch = ret[0].pitch * newDims[0];
138                 ret[2].pitch = ret[1].pitch * newDims[1];
139                 ret[3].pitch = ret[2].pitch * newDims[2];
140                 ret[4].pitch = ret[3].pitch * newDims[3];
141             }
142
143             return ret;
144         }
145
146         DataTensor DataTensor::TransformIgnorePadding(DataLayout l) const
147         {
148             const uint32_t src_channels = ChannelsCount(layout);
149             const uint32_t dst_channels = ChannelsCount(l);
150
151             const size_t src_x = X().v;
152             const size_t src_y = Y().v;
153
154             std::vector<size_t> vec(dst_channels);
155             if (src_channels == 2 && dst_channels == 2)
156             {
157                 vec[Channelndex(l, DataChannelName::FEATURE)] = Feature().v;
158                 vec[Channelndex(l, DataChannelName::BATCH)] = Batch().v;
159             }
160             else if (src_channels == 4 && dst_channels == 4)
161             {
162                 vec[Channelndex(l, DataChannelName::X)] = X().v;
163                 vec[Channelndex(l, DataChannelName::Y)] = Y().v;
164                 vec[Channelndex(l, DataChannelName::FEATURE)] = Feature().v;
165                 vec[Channelndex(l, DataChannelName::BATCH)] = Batch().v;
166             }
167             else if (src_channels == 2 && dst_channels == 4)
168             {
169                 const size_t dst_ifm = Feature().v / (src_x*src_y);
170                 const size_t dst_xy = Feature().v % (src_x*src_y);
171                 const size_t dst_y = dst_xy / src_x;
172                 const size_t dst_x = dst_xy % src_x;
173                 vec[Channelndex(l, DataChannelName::X)] = dst_x;
174                 vec[Channelndex(l, DataChannelName::Y)] = dst_y;
175                 vec[Channelndex(l, DataChannelName::FEATURE)] = dst_ifm;
176                 vec[Channelndex(l, DataChannelName::BATCH)] = Batch().v;
177             }
178             else if (src_channels == 4 && dst_channels == 2)
179             {
180                 const size_t dst_ifm = Feature().v * src_x * src_y;
181                 vec[Channelndex(l, DataChannelName::FEATURE)] = dst_ifm;
182                 vec[Channelndex(l, DataChannelName::BATCH)] = Batch().v;
183             }
184             else
185             {
186                 // TODO: implement ROI
187                 assert(0);
188             }
189
190             return{ vec, dtype, l };
191         }
192
193         DataTensor DataTensor::FlattenFeatureAndSpatials() const
194         {
195             DataLayout l;
196
197             const auto x = X();
198             const auto y = Y();
199             const auto f = Feature();
200             const auto b = Batch();
201
202             DataLayout targetLayout = Tensor::bf;
203             switch (layout)
204             {
205             case Tensor::bf:
206             case Tensor::fb:
207                 return *this;
208
209             case Tensor::fyxb:
210                 targetLayout = Tensor::fb;
211
212                 // TODO: [FUTURE] Use C++17 [[fallthrough]] instead of code duplication to get portable warning avoidance.
213                 if (f.pitch == y.v*x.v*x.pitch)                                         // no padding in X/Y axis
214                 {
215                     l = targetLayout;
216                     break;
217                 }
218                 throw std::runtime_error("Unsupported - cannot flatten with padding");
219
220             case Tensor::bfyx:
221                 if (f.pitch == y.v*x.v*x.pitch)                                         // no padding in X/Y axis
222                 {
223                     l = targetLayout;
224                     break;
225                 }
226                 throw std::runtime_error("Unsupported - cannot flatten with padding");
227
228             case Tensor::yxfb:
229                 targetLayout = Tensor::fb;
230
231                 // TODO: [FUTURE] Use C++17 [[fallthrough]] instead of code duplication to get portable warning avoidance.
232                 if ((x.pitch == f.pitch && y.pitch == x.v*x.pitch) ||                   // YX - no Features (val/pitch)
233                     (y.v == 1 && x.v == 1 && x.pitch == f.pitch && y.pitch == f.pitch) || // Feature only
234                     (f.v * f.pitch == x.pitch && f.v * f.pitch == y.pitch && y.v == 1 && x.v == 1)) // Feature only
235                 {
236                     l = targetLayout;
237                     break;
238                 }
239                 throw std::runtime_error("Unsupported - cannot flatten yxf to f if f/yx != 1");
240
241             case Tensor::byxf:
242                 if ((x.pitch == f.pitch && y.pitch == x.v*x.pitch) ||                   // YX - no Features (val/pitch)
243                     (y.v == 1 && x.v == 1 && x.pitch == f.pitch && y.pitch == f.pitch) || // Feature only
244                     (f.v * f.pitch == x.pitch && f.v * f.pitch == y.pitch && y.v == 1 && x.v == 1)) // Feature only
245                 {
246                     l = targetLayout;
247                     break;
248                 }
249                 throw std::runtime_error("Unsupported - cannot flatten yxf to f if f/yx != 1");
250             default:
251                 throw std::runtime_error("Unsupported - unsupported layout");
252                 break;
253             }
254
255             DataTensor res = TransformIgnorePadding(l);
256
257             if (l == DataLayout::bf)
258             {
259                 res.dims[Channelndex(l, DataChannelName::BATCH)].pitch = b.pitch;
260                 res.dims[Channelndex(l, DataChannelName::BATCH)].pad   = b.pad;
261             }
262             else
263             {
264                 res.dims[Channelndex(l, DataChannelName::FEATURE)].pitch = dims[Channelndex(l, DataChannelName::BATCH) + 1].pitch;
265                 res.dims[Channelndex(l, DataChannelName::FEATURE)].pad   = dims[Channelndex(l, DataChannelName::BATCH) + 1].pad;
266             }
267
268             return res;
269         }
270
271         NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l)
272         {
273             std::vector<size_t> newDims = d;
274
275             // TOOD: it's not the right pitches. it's here in order to calculate physical size
276             switch (l)
277             {
278             case os_iyx_osv16:
279             case os_iyx_osv16_rotate_180:
280                 assert(newDims.size() == 4);
281                 newDims[3] = RoundUp(newDims[3], 16);
282                 break;
283             case os_iyx_osv32:
284                 assert(newDims.size() == 4);
285                 newDims[3] = RoundUp(newDims[3], 32);
286                 break;
287             case os_iyx_osv64:
288                 assert(newDims.size() == 4);
289                 newDims[3] = RoundUp(newDims[3], 64);
290                 break;
291             case os_i_osv8__ai8:
292                 assert(newDims.size() == 2);
293                 newDims[0] = RoundUp(newDims[0], 8);
294                 newDims[1] = RoundUp(newDims[1], 8);
295                 break;
296             case os_i_osv16__ai8:
297                 assert(newDims.size() == 2);
298                 newDims[0] = RoundUp(newDims[0], 8);
299                 newDims[1] = RoundUp(newDims[1], 16);
300                 break;
301             case os_i_osv16:
302                 assert(newDims.size() == 2);
303                 newDims[1] = RoundUp(newDims[1], 16);
304                 break;
305             case i_yxs_os_yxsv2_osv16:
306                 assert(newDims.size() == 4);
307                 newDims[0] = RoundUp(newDims[0], 16);
308                 break;
309             case iy_xs_os_xsv2_osv16__ao32:
310             case iy_xs_os_xsv2_osv8__ao32:
311                 assert(newDims.size() == 4);
312                 newDims[0] = RoundUp(newDims[0], 32);
313                 break;
314             case os_is_yx_isa8_osv8_isv4:
315                 assert(newDims.size() == 4);
316                 newDims[3] = RoundUp(newDims[3], 8);
317                 newDims[2] = RoundUp(newDims[2], 32);
318                 break;
319             case os_is_yx_isa8_osv8_isv4_swizzled_by_4:
320                 assert(newDims.size() == 4);
321                 newDims[3] = RoundUp(newDims[3], 32);
322                 newDims[2] = RoundUp(newDims[2], 32);
323                 break;
324             case is_o_yx_isv32:
325                 assert(newDims.size() == 4);
326                 newDims[0] = RoundUp(newDims[0], 32);
327                 break;
328             case is_o32_yx_isv32_swizzled_by_4:
329                 assert(newDims.size() == 4);
330                 newDims[0] = RoundUp(newDims[0], 32);
331                 newDims[3] = RoundUp(newDims[3], 32);
332                 break;
333             case os_is_y_x8_osv8_isv4:
334                 assert(newDims.size() == 4);
335                 newDims[2] = RoundUp(newDims[2], 4);
336                 newDims[3] = RoundUp(newDims[3], 8);
337                 newDims[0] = RoundUp(newDims[0], 8);
338                 break;
339             case os_is_yx_osv16_isv4:
340                 assert(newDims.size() == 4);
341                 newDims[2] = RoundUp(newDims[2], 4);
342                 newDims[3] = RoundUp(newDims[3], 16);
343                 break;
344             default:
345                 break;
346             }
347
348             NDims ret(newDims.size());
349             size_t pitch = 1;
350
351             for (size_t i = 0; i < newDims.size(); i++)
352             {
353                 Pad p = { 0, newDims[i] - d[i] };
354                 ret[i] = { d[i], pitch, p };
355                 pitch *= newDims[i];
356             }
357
358             if (l == i_yxs_os_yxsv2_osv16)
359             {
360                 ret[3].pitch = RoundUp(ret[1].v * ret[2].v, 2) * ret[1].pitch;
361                 ret[2].pad.after = newDims[2] - ret[2].v;
362             }
363             else if (l == iy_xs_os_xsv2_osv16__ao32 ||
364                      l == iy_xs_os_xsv2_osv8__ao32)
365             {
366                 ret[2].pitch     = RoundUp(ret[1].v, 2) * ret[1].pitch;
367                 ret[1].pad.after = newDims[1] - ret[1].v;
368
369                 ret[3].pitch     = ret[2].v * ret[2].pitch;
370                 ret[2].pad.after = newDims[2] - ret[2].v;
371             }
372             else if (l == os_is_yx_isa8_osv8_isv4 || l == os_is_yx_isa8_osv8_isv4_swizzled_by_4)
373             {
374                 ret[0].pitch = 256;
375                 ret[1].pitch = ret[0].pitch * ret[0].v;
376             }
377             else if (l == bf_lyx_yx)
378             {
379                 ret[2].pitch = ret[0].v * ret[1].v * ret[2].v * ret[3].v;
380                 ret[3].pitch = ret[2].pitch * ret[5].v;
381             }
382
383             return ret;
384         }
385
386         WeightsTensor WeightsTensor::TransformIgnorePadding(WeightsLayout l, WeightsType t) const
387         {
388             const uint32_t src_channels = ChannelsCount(layout);
389             const uint32_t dst_channels = ChannelsCount(l);
390
391             const size_t src_x = X().v;
392             const size_t src_y = Y().v;
393
394             std::vector<size_t> vec(dst_channels);
395             if (src_channels == 2 && dst_channels == 2)
396             {
397                 vec[Channelndex(l, WeightsChannelName::IFM)] = IFM().v;
398                 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
399             }
400             else if (src_channels == 4 && dst_channels == 4)
401             {
402                 vec[Channelndex(l, WeightsChannelName::X)] = X().v;
403                 vec[Channelndex(l, WeightsChannelName::Y)] = Y().v;
404                 vec[Channelndex(l, WeightsChannelName::IFM)] = IFM().v;
405                 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
406
407                 //requirement for winograd 2x3
408                 if (l == WeightsLayout::winograd_2x3_s1_weights || l == WeightsLayout::winograd_2x3_s1_fused_weights)
409                 {
410                     vec[Channelndex(l, WeightsChannelName::X)] = 4;
411                     vec[Channelndex(l, WeightsChannelName::Y)] = 3;
412                 }
413                 else if (l == WeightsLayout::winograd_6x3_s1_fused_weights)
414                 {
415                     vec[Channelndex(l, WeightsChannelName::X)] = 8;
416                     vec[Channelndex(l, WeightsChannelName::Y)] = 3;
417                 }
418             }
419             else if (src_channels == 2 && dst_channels == 4)
420             {
421                 const size_t dst_ifm = IFM().v / (src_x*src_y);
422                 const size_t dst_xy = IFM().v % (src_x*src_y);
423                 const size_t dst_y = dst_xy / src_x;
424                 const size_t dst_x = dst_xy % src_x;
425                 vec[Channelndex(l, WeightsChannelName::X)] = dst_x;
426                 vec[Channelndex(l, WeightsChannelName::Y)] = dst_y;
427                 vec[Channelndex(l, WeightsChannelName::IFM)] = dst_ifm;
428                 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
429             }
430             else if (src_channels == 4 && dst_channels == 2)
431             {
432                 const size_t dst_ifm = IFM().v * src_x * src_y;
433                 vec[Channelndex(l, WeightsChannelName::IFM)] = dst_ifm;
434                 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
435             }
436             else if (src_channels == 6 && dst_channels == 6)
437             {
438                 vec[Channelndex(l, WeightsChannelName::X)] = IFM().v;
439                 vec[Channelndex(l, WeightsChannelName::Y)] = OFM().v;
440                 vec[Channelndex(l, WeightsChannelName::IFM)] = LX().v;
441                 vec[Channelndex(l, WeightsChannelName::OFM)] = LY().v;
442                 vec[Channelndex(l, WeightsChannelName::LX)] = X().v;
443                 vec[Channelndex(l, WeightsChannelName::LY)] = Y().v;
444             }
445             else
446             {
447                 assert(0);
448             }
449
450             return{ vec, t, l };
451         }
452     }
453 }