Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / kernel_selector / common / tensor_type.h
1 /*
2 // Copyright (c) 2016-2018 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #pragma once
18
19 #include "common_types.h"
20 #include "common_tools.h"
21 #include <vector>
22 #include <assert.h>
23 #include <numeric>
24 #include <cstddef>
25 #include <algorithm>
26 #include <array>
27
28 namespace kernel_selector
29 {
30 #define KERNEL_SELECTOR_TENSOR_DIM_MAX 8
31
32     namespace Tensor
33     {
34         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
35         // DataLayout
36         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
37         enum DataLayout
38         {
39             bf = 0,             // 1D+batch
40             fb,                 // 1D+batch
41             bfyx,               // 3D+batch
42             yxfb,               // 3D+batch
43             byxf,               // 3D+batch
44             fyxb,               // 3D+batch
45             bs_f_bsv8__af8,     // for optimized FC
46             bs_f_bsv16__af8,    // for optimized FC
47             bf8_xy16,           // for optimized conv1x1
48             // TODO: most of the kernel doesn't support ROI. we need to handle it correctly.
49             brfyx,              // 4D+batch
50             winograd_2x3_s1_data, //winograd convolution input, F(2,3) -- filter 3x3 with stride 1
51             byxf_af32, // for MMAD convolution
52             byx8_f4, // for MMAD convolution
53             fs_bs_yx_bsv4_fsv32, // for batched MMAD
54             b_fs_yx_fsv4,        // reordering format for swizzled input for convolution using IMAD
55             DataLayoutCount // NMBER OF ELEMENTS IN ENUM
56         };
57
58         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
59         // WeightsLayout
60         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
61         enum WeightsLayout
62         {
63             oi = 0,
64             io,
65             oiyx,
66             oyxi,
67             iyxo,
68             yxio,
69             os_iyx_osv16,
70             os_iyx_osv32,
71             os_iyx_osv64,
72             os_iyx_osv16_rotate_180,
73             os_i_osv16,
74             os_i_osv8__ai8,         // TODO can we drop the alignment form layout name?
75             os_i_osv16__ai8,
76             i_yxs_os_yxsv2_osv16,
77             iy_xs_os_xsv2_osv16__ao32,
78             iy_xs_os_xsv2_osv8__ao32,
79             image_2d_weights_c4_fyx_b,           // image type fyx_b
80             image_2d_weights_c1_b_fyx,     // image type 2d b_fyx single channel
81             winograd_2x3_s1_weights, //winograd convolution weights, F(2, 3) --filter 3x3 with stride 1
82             winograd_2x3_s1_fused_weights, //winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
83             winograd_6x3_s1_fused_weights, //winograd convolution weights for fused kernel, F(6, 3) --filter 3x3 with stride 1
84             image_2d_weights_winograd_6x3_s1_fbxyb, // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
85             image_2d_weights_winograd_6x3_s1_xfbyb, // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
86             os_is_yx_isa8_osv8_isv4, // for MMAD convolution
87             os_is_yx_isa8_osv8_isv4_swizzled_by_4, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, 1,5...
88             is_o_yx_isv32,           // for MMAD 1x1 convolutions
89             is_o32_yx_isv32_swizzled_by_4,           // for MMAD 1x1 convolutions swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, 1,5...
90             os_is_y_x8_osv8_isv4, // for MMAD convolutions
91             bf_lyx_yx,               // local convolution
92             os_is_yx_osv16_isv4,     // swizzled weights for convolution using IMAD
93             WeightsLayoutCount       // NMBER OF ELEMENTS IN ENUM
94         };
95
96         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
97         // Pad
98         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
99         struct Pad
100         {
101             size_t before;
102             size_t after;
103
104             size_t Total() const { return before + after; }
105         };
106
107         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
108         // Dim
109         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
110         struct Dim
111         {
112             size_t v;
113             size_t pitch;
114             Pad    pad;
115
116             size_t LogicalDimPadded() const { return v + pad.Total(); }
117         };
118
119         using NDims = std::vector<Dim>;
120
121         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
122         // extract code
123         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
124         enum class DataChannelName
125         {
126             X       = 0,
127             Y       = 1,
128             FEATURE = 2,
129             ROI     = 3,
130             BATCH   = 4,
131         };
132
133         enum class WeightsChannelName
134         {
135             X   = 0,
136             Y   = 1,
137             IFM = 2,
138             OFM = 3,
139             LX = 4,
140             LY = 5,
141         };
142
143         inline bool SimpleLayout(WeightsLayout l)
144         {
145             switch (l)
146             {
147             case WeightsLayout::oi:
148             case WeightsLayout::io:
149             case WeightsLayout::oiyx:
150             case WeightsLayout::oyxi:
151             case WeightsLayout::iyxo:
152             case WeightsLayout::yxio:
153                 return true;
154             default:
155                 return false;
156             }
157         }
158
159         inline bool SimpleLayout(DataLayout l)
160         {
161             switch (l)
162             {
163             case DataLayout::bf:
164             case DataLayout::fb:
165             case DataLayout::bfyx:
166             case DataLayout::yxfb:
167             case DataLayout::byxf:
168             case DataLayout::fyxb:
169                 return true;
170             default:
171                 return false;
172             }
173         }
174
175         inline bool IsImageType(WeightsLayout l)
176         {
177             switch (l)
178             {
179             case WeightsLayout::image_2d_weights_c4_fyx_b:
180             case WeightsLayout::image_2d_weights_c1_b_fyx:
181             case WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb:
182             case WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb:
183                 return true;
184             default:
185                 return false;
186             }
187         }
188
189         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
190         // Tensor Exaplnation
191         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
192         //
193         // resource     - 80x80
194         //      totalSize   - 6400
195         //      x pitch     - 1
196         //      y pitch     - 80
197         //
198         // view         - 60x60
199         // viewOffset   - (20,20) => 20*80+20 = 1620
200         //
201         // padding (contains "paddedVal"):
202         //      before  - x=20, y=20
203         //      after   - x=20, y=20.
204         //
205         // logical data - 40x40 (contains the actual data).
206         //
207         // firstElementOffset: 
208         //      (viewOffset_x + padBefore_x) + (viewOffset_y + padBefore_y)*y_pitch =
209         //      viewOffset + padBefore_x + padBefore_y*y_pitch = 
210         //      1620 + 20 + 20*80 = 3240
211         //
212         //
213         //                                      whole resource (80x80)
214         // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
215         // +                                                                                               +
216         // +                                                                                               +
217         // +                                view inside resource (60x60)                                   +
218         // +       +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++       +
219         // +       + start of padded part(20,20) = viewOffset                                      +       +
220         // +       +                                                                               +       +
221         // +       +                             logical data (40x40)                              +       +
222         // +       +       +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++       +       +
223         // +       +       + first element (40,40)                                         +       +       +
224         // +       +       +                                                               +       +       +
225         // +       +       +                                                               +       +       +
226         // +       +       +                                                               +       +       +
227         // +       +       +                                                               +       +       +
228         // +       +       +                                                               +       +       +
229         // +       +       +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++       +       +
230         // +       +                                                                               +       +
231         // +       +                                                                               +       +
232         // +       +                                                                               +       +
233         // +       +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++       +
234         // +                                                                                               +
235         // +                                                                                               +
236         // +                                                                                               +
237         // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
238         //
239         //
240         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
241         // TensorBase
242         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
243         struct TensorBase
244         {
245         protected:
246             NDims   dims;
247             size_t  viewOffset          = 0;    // in elements
248             size_t  firstElementOffset  = 0;
249             size_t  totalSize           = 0;    // in elements
250             float   paddedVal           = 0.f;
251
252         public:
253             TensorBase() = default;
254             TensorBase(const TensorBase&) = default;
255             TensorBase& operator=(const TensorBase&) = default;
256
257             TensorBase(const NDims& nd, size_t viewOf, size_t sz, float pv)
258                 : dims(nd)
259                 , viewOffset(viewOf)
260                 , firstElementOffset(std::accumulate(nd.cbegin(), nd.cend(), viewOf, [](size_t val, const Dim& d) { return val + d.pitch*d.pad.before; }))
261                 , totalSize(sz)
262                 , paddedVal(pv)
263             {
264                 if (totalSize == 0)
265                 {
266                     for (const auto& d : dims)
267                     {
268                         totalSize = std::max(totalSize, d.pitch*(d.LogicalDimPadded()));
269                     }
270
271                     totalSize += viewOffset;
272                 }
273
274                 size_t minimalPitch = 1;
275
276                 for (const auto& d : dims)
277                 {
278                     if (d.pitch < minimalPitch)
279                     {
280                         throw std::runtime_error("Tensor pitches didn't set correctly");
281                     }
282
283                     minimalPitch *= d.LogicalDimPadded();
284                 }
285
286                 if (totalSize < (minimalPitch + viewOffset))
287                 {
288                     throw std::runtime_error("Tensor total Size didn't set correctly");
289                 }
290             }
291
292             float           GetPaddedVal()          const { return paddedVal; }
293             size_t          GetFirstElementOffset() const { return firstElementOffset; }
294             size_t          GetViewOffset()         const { return viewOffset; }
295             const NDims&    GetDims()               const { return dims; }
296
297             virtual uint32_t    ElementSize() const = 0;
298
299             // Size of the actual data (without padded part)
300             size_t LogicalSize() const
301             {
302                 return std::accumulate(dims.cbegin(), dims.cend(), (size_t)1, [](size_t val, const Dim& d) {return val*d.v; });
303             }
304
305             // Dimensions of the actual data (without padded part)
306             std::vector<size_t> LogicalDims() const
307             {
308                 std::vector<size_t> res(dims.size());
309                 std::transform(dims.begin(), dims.end(), res.begin(), [](const Dim& d) { return d.v; });
310                 return res;
311             }
312
313             // Whole buffer size (in elements)
314             size_t PhysicalSize() const
315             {
316                 return totalSize;
317             }
318
319             // Whole buffer size (in bytes)
320             size_t PhysicalSizeInBytes() const
321             {
322                 return totalSize * ElementSize();
323             }
324
325             // if padded/view exists between logical dimensions.
326             // in other words, if we can consider the data as a 1Dim resource.
327             bool PitchesDifferFromLogicalDims() const
328             {
329                 bool differ = false;
330
331                 size_t calc_pitch = 1;
332                 for (const auto& d : dims)
333                 {
334                     differ |= (d.pitch != calc_pitch);
335                     calc_pitch *= d.v;
336                 }
337                 
338                 return differ;
339             }
340         };
341
342         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
343         // TensorBaseT
344         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
345         template<typename DType, typename Layout>
346         struct TensorBaseT : public TensorBase
347         {
348         protected:
349             DType     dtype;
350             Layout    layout;
351
352             template <typename ArrayT, typename ChannelName>
353             static inline int Channelndex(const ArrayT& channelArr, Layout l, ChannelName channelName)
354             {
355                 size_t channel = static_cast<size_t>(channelName);
356                 assert(channel < channelArr[l].size());
357
358                 return channelArr[l][channel];
359             }
360
361             template <typename ArrayT, typename ChannelName>
362             static inline Dim Extract(const ArrayT& channelArr, Layout l, ChannelName channelName, const NDims& dims)
363             {
364                 const int i = Channelndex(channelArr, l, channelName);
365                 return ((i < 0) || (i >= (int)dims.size())) ? Dim{ 1, 1,{ 0,0 } } : dims[i];
366             }
367
368             template <typename ArrayT>
369             static inline uint32_t ChannelsCount(const ArrayT& channelArr, Layout l)
370             {
371                 const auto& entry = channelArr[l];
372                 return std::accumulate(entry.begin(), entry.end(), 0U, [](uint32_t count, int v) {return count + ((v != -1) ? 1 : 0); });
373             }
374
375         public:
376             TensorBaseT() = default;
377             TensorBaseT(const TensorBaseT&) = default;
378             TensorBaseT& operator=(const TensorBaseT&) = default;
379
380             TensorBaseT(const NDims& nd, DType dt, Layout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
381                 TensorBase(nd, of, sz, pv), dtype(dt), layout(l) {}
382
383             DType       GetDType()      const           { return dtype; }
384             Layout      GetLayout()     const           { return layout; }
385             uint32_t    ElementSize()   const override  { return BytesPerElement(dtype); }
386             size_t      Dimentions()    const           { return dims.size(); }
387             bool        SimpleLayout()  const           { return Tensor::SimpleLayout(layout); }
388
389             bool operator==(const TensorBaseT& t) const
390             {
391                 bool same =
392                     dtype == t.dtype      &&
393                     layout == t.layout     &&
394                     paddedVal == t.paddedVal  &&
395                     viewOffset == t.viewOffset     &&
396                     dims.size() == t.dims.size();
397                 if (same)
398                 {
399                     for (size_t i = 0; i < dims.size(); i++)
400                     {
401                         same &=
402                             dims[i].v == t.dims[i].v &&
403                             dims[i].pad.before == t.dims[i].pad.before &&
404                             dims[i].pad.after == t.dims[i].pad.after &&
405                             dims[i].pitch == t.dims[i].pitch;
406                     }
407                 }
408
409                 return same;
410             }
411
412             bool SameDims(const TensorBaseT& t) const
413             {
414                 bool same =
415                     dtype == t.dtype &&
416                     layout == t.layout &&
417                     dims.size() == t.dims.size();
418                 if (same)
419                 {
420                     for (size_t i = 0; i < dims.size(); i++)
421                     {
422                         same &= dims[i].v == t.dims[i].v;
423                     }
424                 }
425
426                 return same;
427             }
428
429             bool SameDimsSizes(const TensorBaseT& t) const
430             {
431                 bool same =
432                     dims.size() == t.dims.size();
433                 if (same)
434                 {
435                     for (size_t i = 0; i < dims.size(); i++)
436                     {
437                         same &= dims[i].v == t.dims[i].v;
438                     }
439                 }
440
441                 return same;
442             }
443         };
444
445         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
446         // DataTensor
447         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
448         struct DataTensor : public TensorBaseT<Datatype, DataLayout>
449         {
450             DataTensor() = default;
451             DataTensor(const DataTensor&) = default;
452             DataTensor& operator=(const DataTensor&) = default;
453
454             DataTensor(const NDims& nd, Datatype dt, DataLayout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
455                 TensorBaseT(nd, dt, l, of, sz, pv) {}
456
457             DataTensor(const std::vector<size_t>& d, Datatype dt, DataLayout l) :
458                 TensorBaseT<Datatype, DataLayout>(GetSimpleDims(d, l), dt, l) {}
459
460             Dim X()         const { return Extract(layout, DataChannelName::X, dims); }
461             Dim Y()         const { return Extract(layout, DataChannelName::Y, dims); }
462             Dim Feature()   const { return Extract(layout, DataChannelName::FEATURE, dims); }
463             Dim ROI()       const { return Extract(layout, DataChannelName::ROI, dims); }
464             Dim Batch()     const { return Extract(layout, DataChannelName::BATCH, dims); }
465
466             DataTensor  TransformIgnorePadding(DataLayout l) const;
467             DataTensor  FlattenFeatureAndSpatials() const;
468
469             static inline Dim Extract(DataLayout l, DataChannelName channel, const NDims& d)
470             {
471                 return TensorBaseT::Extract(dataChannelArray, l, channel, d);
472             }
473
474             static inline int Channelndex(DataLayout l, DataChannelName channel)
475             {
476                 return TensorBaseT::Channelndex(dataChannelArray, l, channel);
477             }
478
479             static inline uint32_t ChannelsCount(DataLayout l)
480             {
481                 return TensorBaseT::ChannelsCount(dataChannelArray, l);
482             }
483         private:
484             static std::array<std::array<int, 5>, DataLayout::DataLayoutCount> dataChannelArray;
485             static NDims GetSimpleDims(const std::vector<size_t>& d, DataLayout l);
486         };
487
488         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
489         // WeightsTensor
490         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
491         struct WeightsTensor : TensorBaseT<WeightsType, WeightsLayout>
492         {
493             WeightsTensor() = default;
494             WeightsTensor(const WeightsTensor&) = default;
495             WeightsTensor& operator=(const WeightsTensor&) = default;
496
497             WeightsTensor(const NDims& nd, WeightsType dt, WeightsLayout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
498                 TensorBaseT(nd, dt, l, of, sz, pv) {}
499
500             WeightsTensor(const std::vector<size_t>& d, WeightsType dt, WeightsLayout l) :
501                 TensorBaseT<WeightsType, WeightsLayout>(GetSimpleDims(d, l), dt, l) {}
502
503             WeightsTensor TransformIgnorePadding(WeightsLayout l) const { return TransformIgnorePadding(l, dtype); }
504             WeightsTensor TransformIgnorePadding(WeightsLayout l, WeightsType t) const;
505
506             Dim X()   const { return Extract(layout, WeightsChannelName::X, dims); }
507             Dim Y()   const { return Extract(layout, WeightsChannelName::Y, dims); }
508             Dim IFM() const { return Extract(layout, WeightsChannelName::IFM, dims); }
509             Dim OFM() const { return Extract(layout, WeightsChannelName::OFM, dims); }
510             Dim LX()  const { return Extract(layout, WeightsChannelName::LX, dims); }
511             Dim LY()  const { return Extract(layout, WeightsChannelName::LY, dims); }
512
513             static inline Dim Extract(WeightsLayout l, WeightsChannelName channel, const NDims& d)
514             {
515                 return TensorBaseT::Extract(weightsChannelArray, l, channel, d);
516             }
517
518             static inline int Channelndex(WeightsLayout l, WeightsChannelName channel)
519             {
520                 return TensorBaseT::Channelndex(weightsChannelArray, l, channel);
521             }
522
523             static inline uint32_t ChannelsCount(WeightsLayout l)
524             {
525                 return TensorBaseT::ChannelsCount(weightsChannelArray, l);
526             }
527         private:
528             static NDims GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l);
529             static std::array<std::array<int, 6>, WeightsLayout::WeightsLayoutCount> weightsChannelArray;
530         };
531     }
532 }