inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h

   1 /*
   2 // Copyright (c) 2016-2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #pragma once
  18
  19 #include "common_types.h"
  20 #include "common_tools.h"
  21 #include <vector>
  22 #include <assert.h>
  23 #include <numeric>
  24 #include <cstddef>
  25 #include <algorithm>
  26 #include <array>
  27
  28 namespace kernel_selector
  29 {
  30 #define KERNEL_SELECTOR_TENSOR_DIM_MAX 8
  31
  32     namespace Tensor
  33     {
  34         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  35         // DataLayout
  36         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  37         enum DataLayout
  38         {
  39             bf = 0,             // 1D+batch
  40             fb,                 // 1D+batch
  41             bfyx,               // 3D+batch
  42             yxfb,               // 3D+batch
  43             byxf,               // 3D+batch
  44             fyxb,               // 3D+batch
  45             bs_f_bsv8__af8,     // for optimized FC
  46             bs_f_bsv16__af8,    // for optimized FC
  47             bf8_xy16,           // for optimized conv1x1
  48             // TODO: most of the kernel doesn't support ROI. we need to handle it correctly.
  49             brfyx,              // 4D+batch
  50             winograd_2x3_s1_data, //winograd convolution input, F(2,3) -- filter 3x3 with stride 1
  51             byxf_af32, // for MMAD convolution
  52             byx8_f4, // for MMAD convolution
  53             fs_bs_yx_bsv4_fsv32, // for batched MMAD
  54             b_fs_yx_fsv4,        // reordering format for swizzled input for convolution using IMAD
  55             DataLayoutCount // NMBER OF ELEMENTS IN ENUM
  56         };
  57
  58         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  59         // WeightsLayout
  60         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  61         enum WeightsLayout
  62         {
  63             oi = 0,
  64             io,
  65             oiyx,
  66             oyxi,
  67             iyxo,
  68             yxio,
  69             os_iyx_osv16,
  70             os_iyx_osv32,
  71             os_iyx_osv64,
  72             os_iyx_osv16_rotate_180,
  73             os_i_osv16,
  74             os_i_osv8__ai8,         // TODO can we drop the alignment form layout name?
  75             os_i_osv16__ai8,
  76             i_yxs_os_yxsv2_osv16,
  77             iy_xs_os_xsv2_osv16__ao32,
  78             iy_xs_os_xsv2_osv8__ao32,
  79             image_2d_weights_c4_fyx_b,           // image type fyx_b
  80             image_2d_weights_c1_b_fyx,     // image type 2d b_fyx single channel
  81             winograd_2x3_s1_weights, //winograd convolution weights, F(2, 3) --filter 3x3 with stride 1
  82             winograd_2x3_s1_fused_weights, //winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
  83             winograd_6x3_s1_fused_weights, //winograd convolution weights for fused kernel, F(6, 3) --filter 3x3 with stride 1
  84             image_2d_weights_winograd_6x3_s1_fbxyb, // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
  85             image_2d_weights_winograd_6x3_s1_xfbyb, // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
  86             os_is_yx_isa8_osv8_isv4, // for MMAD convolution
  87             os_is_yx_isa8_osv8_isv4_swizzled_by_4, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, 1,5...
  88             is_o_yx_isv32,           // for MMAD 1x1 convolutions
  89             is_o32_yx_isv32_swizzled_by_4,           // for MMAD 1x1 convolutions swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, 1,5...
  90             os_is_y_x8_osv8_isv4, // for MMAD convolutions
  91             bf_lyx_yx,               // local convolution
  92             os_is_yx_osv16_isv4,     // swizzled weights for convolution using IMAD
  93             WeightsLayoutCount       // NMBER OF ELEMENTS IN ENUM
  94         };
  95
  96         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  97         // Pad
  98         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  99         struct Pad
 100         {
 101             size_t before;
 102             size_t after;
 103
 104             size_t Total() const { return before + after; }
 105         };
 106
 107         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 108         // Dim
 109         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 110         struct Dim
 111         {
 112             size_t v;
 113             size_t pitch;
 114             Pad    pad;
 115
 116             size_t LogicalDimPadded() const { return v + pad.Total(); }
 117         };
 118
 119         using NDims = std::vector<Dim>;
 120
 121         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 122         // extract code
 123         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 124         enum class DataChannelName
 125         {
 126             X       = 0,
 127             Y       = 1,
 128             FEATURE = 2,
 129             ROI     = 3,
 130             BATCH   = 4,
 131         };
 132
 133         enum class WeightsChannelName
 134         {
 135             X   = 0,
 136             Y   = 1,
 137             IFM = 2,
 138             OFM = 3,
 139             LX = 4,
 140             LY = 5,
 141         };
 142
 143         inline bool SimpleLayout(WeightsLayout l)
 144         {
 145             switch (l)
 146             {
 147             case WeightsLayout::oi:
 148             case WeightsLayout::io:
 149             case WeightsLayout::oiyx:
 150             case WeightsLayout::oyxi:
 151             case WeightsLayout::iyxo:
 152             case WeightsLayout::yxio:
 153                 return true;
 154             default:
 155                 return false;
 156             }
 157         }
 158
 159         inline bool SimpleLayout(DataLayout l)
 160         {
 161             switch (l)
 162             {
 163             case DataLayout::bf:
 164             case DataLayout::fb:
 165             case DataLayout::bfyx:
 166             case DataLayout::yxfb:
 167             case DataLayout::byxf:
 168             case DataLayout::fyxb:
 169                 return true;
 170             default:
 171                 return false;
 172             }
 173         }
 174
 175         inline bool IsImageType(WeightsLayout l)
 176         {
 177             switch (l)
 178             {
 179             case WeightsLayout::image_2d_weights_c4_fyx_b:
 180             case WeightsLayout::image_2d_weights_c1_b_fyx:
 181             case WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb:
 182             case WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb:
 183                 return true;
 184             default:
 185                 return false;
 186             }
 187         }
 188
 189         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 190         // Tensor Exaplnation
 191         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 192         //
 193         // resource     - 80x80
 194         //      totalSize   - 6400
 195         //      x pitch     - 1
 196         //      y pitch     - 80
 197         //
 198         // view         - 60x60
 199         // viewOffset   - (20,20) => 20*80+20 = 1620
 200         //
 201         // padding (contains "paddedVal"):
 202         //      before  - x=20, y=20
 203         //      after   - x=20, y=20.
 204         //
 205         // logical data - 40x40 (contains the actual data).
 206         //
 207         // firstElementOffset:
 208         //      (viewOffset_x + padBefore_x) + (viewOffset_y + padBefore_y)*y_pitch =
 209         //      viewOffset + padBefore_x + padBefore_y*y_pitch =
 210         //      1620 + 20 + 20*80 = 3240
 211         //
 212         //
 213         //                                      whole resource (80x80)
 214         // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 215         // +                                                                                               +
 216         // +                                                                                               +
 217         // +                                view inside resource (60x60)                                   +
 218         // +       +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++       +
 219         // +       + start of padded part(20,20) = viewOffset                                      +       +
 220         // +       +                                                                               +       +
 221         // +       +                             logical data (40x40)                              +       +
 222         // +       +       +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++       +       +
 223         // +       +       + first element (40,40)                                         +       +       +
 224         // +       +       +                                                               +       +       +
 225         // +       +       +                                                               +       +       +
 226         // +       +       +                                                               +       +       +
 227         // +       +       +                                                               +       +       +
 228         // +       +       +                                                               +       +       +
 229         // +       +       +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++       +       +
 230         // +       +                                                                               +       +
 231         // +       +                                                                               +       +
 232         // +       +                                                                               +       +
 233         // +       +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++       +
 234         // +                                                                                               +
 235         // +                                                                                               +
 236         // +                                                                                               +
 237         // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 238         //
 239         //
 240         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 241         // TensorBase
 242         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 243         struct TensorBase
 244         {
 245         protected:
 246             NDims   dims;
 247             size_t  viewOffset          = 0;    // in elements
 248             size_t  firstElementOffset  = 0;
 249             size_t  totalSize           = 0;    // in elements
 250             float   paddedVal           = 0.f;
 251
 252         public:
 253             TensorBase() = default;
 254             TensorBase(const TensorBase&) = default;
 255             TensorBase& operator=(const TensorBase&) = default;
 256
 257             TensorBase(const NDims& nd, size_t viewOf, size_t sz, float pv)
 258                 : dims(nd)
 259                 , viewOffset(viewOf)
 260                 , firstElementOffset(std::accumulate(nd.cbegin(), nd.cend(), viewOf, [](size_t val, const Dim& d) { return val + d.pitch*d.pad.before; }))
 261                 , totalSize(sz)
 262                 , paddedVal(pv)
 263             {
 264                 if (totalSize == 0)
 265                 {
 266                     for (const auto& d : dims)
 267                     {
 268                         totalSize = std::max(totalSize, d.pitch*(d.LogicalDimPadded()));
 269                     }
 270
 271                     totalSize += viewOffset;
 272                 }
 273
 274                 size_t minimalPitch = 1;
 275
 276                 for (const auto& d : dims)
 277                 {
 278                     if (d.pitch < minimalPitch)
 279                     {
 280                         throw std::runtime_error("Tensor pitches didn't set correctly");
 281                     }
 282
 283                     minimalPitch *= d.LogicalDimPadded();
 284                 }
 285
 286                 if (totalSize < (minimalPitch + viewOffset))
 287                 {
 288                     throw std::runtime_error("Tensor total Size didn't set correctly");
 289                 }
 290             }
 291
 292             float           GetPaddedVal()          const { return paddedVal; }
 293             size_t          GetFirstElementOffset() const { return firstElementOffset; }
 294             size_t          GetViewOffset()         const { return viewOffset; }
 295             const NDims&    GetDims()               const { return dims; }
 296
 297             virtual uint32_t    ElementSize() const = 0;
 298
 299             // Size of the actual data (without padded part)
 300             size_t LogicalSize() const
 301             {
 302                 return std::accumulate(dims.cbegin(), dims.cend(), (size_t)1, [](size_t val, const Dim& d) {return val*d.v; });
 303             }
 304
 305             // Dimensions of the actual data (without padded part)
 306             std::vector<size_t> LogicalDims() const
 307             {
 308                 std::vector<size_t> res(dims.size());
 309                 std::transform(dims.begin(), dims.end(), res.begin(), [](const Dim& d) { return d.v; });
 310                 return res;
 311             }
 312
 313             // Whole buffer size (in elements)
 314             size_t PhysicalSize() const
 315             {
 316                 return totalSize;
 317             }
 318
 319             // Whole buffer size (in bytes)
 320             size_t PhysicalSizeInBytes() const
 321             {
 322                 return totalSize * ElementSize();
 323             }
 324
 325             // if padded/view exists between logical dimensions.
 326             // in other words, if we can consider the data as a 1Dim resource.
 327             bool PitchesDifferFromLogicalDims() const
 328             {
 329                 bool differ = false;
 330
 331                 size_t calc_pitch = 1;
 332                 for (const auto& d : dims)
 333                 {
 334                     differ |= (d.pitch != calc_pitch);
 335                     calc_pitch *= d.v;
 336                 }
 337
 338                 return differ;
 339             }
 340         };
 341
 342         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 343         // TensorBaseT
 344         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 345         template<typename DType, typename Layout>
 346         struct TensorBaseT : public TensorBase
 347         {
 348         protected:
 349             DType     dtype;
 350             Layout    layout;
 351
 352             template <typename ArrayT, typename ChannelName>
 353             static inline int Channelndex(const ArrayT& channelArr, Layout l, ChannelName channelName)
 354             {
 355                 size_t channel = static_cast<size_t>(channelName);
 356                 assert(channel < channelArr[l].size());
 357
 358                 return channelArr[l][channel];
 359             }
 360
 361             template <typename ArrayT, typename ChannelName>
 362             static inline Dim Extract(const ArrayT& channelArr, Layout l, ChannelName channelName, const NDims& dims)
 363             {
 364                 const int i = Channelndex(channelArr, l, channelName);
 365                 return ((i < 0) || (i >= (int)dims.size())) ? Dim{ 1, 1,{ 0,0 } } : dims[i];
 366             }
 367
 368             template <typename ArrayT>
 369             static inline uint32_t ChannelsCount(const ArrayT& channelArr, Layout l)
 370             {
 371                 const auto& entry = channelArr[l];
 372                 return std::accumulate(entry.begin(), entry.end(), 0U, [](uint32_t count, int v) {return count + ((v != -1) ? 1 : 0); });
 373             }
 374
 375         public:
 376             TensorBaseT() = default;
 377             TensorBaseT(const TensorBaseT&) = default;
 378             TensorBaseT& operator=(const TensorBaseT&) = default;
 379
 380             TensorBaseT(const NDims& nd, DType dt, Layout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
 381                 TensorBase(nd, of, sz, pv), dtype(dt), layout(l) {}
 382
 383             DType       GetDType()      const           { return dtype; }
 384             Layout      GetLayout()     const           { return layout; }
 385             uint32_t    ElementSize()   const override  { return BytesPerElement(dtype); }
 386             size_t      Dimentions()    const           { return dims.size(); }
 387             bool        SimpleLayout()  const           { return Tensor::SimpleLayout(layout); }
 388
 389             bool operator==(const TensorBaseT& t) const
 390             {
 391                 bool same =
 392                     dtype == t.dtype      &&
 393                     layout == t.layout     &&
 394                     paddedVal == t.paddedVal  &&
 395                     viewOffset == t.viewOffset     &&
 396                     dims.size() == t.dims.size();
 397                 if (same)
 398                 {
 399                     for (size_t i = 0; i < dims.size(); i++)
 400                     {
 401                         same &=
 402                             dims[i].v == t.dims[i].v &&
 403                             dims[i].pad.before == t.dims[i].pad.before &&
 404                             dims[i].pad.after == t.dims[i].pad.after &&
 405                             dims[i].pitch == t.dims[i].pitch;
 406                     }
 407                 }
 408
 409                 return same;
 410             }
 411
 412             bool SameDims(const TensorBaseT& t) const
 413             {
 414                 bool same =
 415                     dtype == t.dtype &&
 416                     layout == t.layout &&
 417                     dims.size() == t.dims.size();
 418                 if (same)
 419                 {
 420                     for (size_t i = 0; i < dims.size(); i++)
 421                     {
 422                         same &= dims[i].v == t.dims[i].v;
 423                     }
 424                 }
 425
 426                 return same;
 427             }
 428
 429             bool SameDimsSizes(const TensorBaseT& t) const
 430             {
 431                 bool same =
 432                     dims.size() == t.dims.size();
 433                 if (same)
 434                 {
 435                     for (size_t i = 0; i < dims.size(); i++)
 436                     {
 437                         same &= dims[i].v == t.dims[i].v;
 438                     }
 439                 }
 440
 441                 return same;
 442             }
 443         };
 444
 445         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 446         // DataTensor
 447         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 448         struct DataTensor : public TensorBaseT<Datatype, DataLayout>
 449         {
 450             DataTensor() = default;
 451             DataTensor(const DataTensor&) = default;
 452             DataTensor& operator=(const DataTensor&) = default;
 453
 454             DataTensor(const NDims& nd, Datatype dt, DataLayout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
 455                 TensorBaseT(nd, dt, l, of, sz, pv) {}
 456
 457             DataTensor(const std::vector<size_t>& d, Datatype dt, DataLayout l) :
 458                 TensorBaseT<Datatype, DataLayout>(GetSimpleDims(d, l), dt, l) {}
 459
 460             Dim X()         const { return Extract(layout, DataChannelName::X, dims); }
 461             Dim Y()         const { return Extract(layout, DataChannelName::Y, dims); }
 462             Dim Feature()   const { return Extract(layout, DataChannelName::FEATURE, dims); }
 463             Dim ROI()       const { return Extract(layout, DataChannelName::ROI, dims); }
 464             Dim Batch()     const { return Extract(layout, DataChannelName::BATCH, dims); }
 465
 466             DataTensor  TransformIgnorePadding(DataLayout l) const;
 467             DataTensor  FlattenFeatureAndSpatials() const;
 468
 469             static inline Dim Extract(DataLayout l, DataChannelName channel, const NDims& d)
 470             {
 471                 return TensorBaseT::Extract(dataChannelArray, l, channel, d);
 472             }
 473
 474             static inline int Channelndex(DataLayout l, DataChannelName channel)
 475             {
 476                 return TensorBaseT::Channelndex(dataChannelArray, l, channel);
 477             }
 478
 479             static inline uint32_t ChannelsCount(DataLayout l)
 480             {
 481                 return TensorBaseT::ChannelsCount(dataChannelArray, l);
 482             }
 483         private:
 484             static std::array<std::array<int, 5>, DataLayout::DataLayoutCount> dataChannelArray;
 485             static NDims GetSimpleDims(const std::vector<size_t>& d, DataLayout l);
 486         };
 487
 488         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 489         // WeightsTensor
 490         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 491         struct WeightsTensor : TensorBaseT<WeightsType, WeightsLayout>
 492         {
 493             WeightsTensor() = default;
 494             WeightsTensor(const WeightsTensor&) = default;
 495             WeightsTensor& operator=(const WeightsTensor&) = default;
 496
 497             WeightsTensor(const NDims& nd, WeightsType dt, WeightsLayout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
 498                 TensorBaseT(nd, dt, l, of, sz, pv) {}
 499
 500             WeightsTensor(const std::vector<size_t>& d, WeightsType dt, WeightsLayout l) :
 501                 TensorBaseT<WeightsType, WeightsLayout>(GetSimpleDims(d, l), dt, l) {}
 502
 503             WeightsTensor TransformIgnorePadding(WeightsLayout l) const { return TransformIgnorePadding(l, dtype); }
 504             WeightsTensor TransformIgnorePadding(WeightsLayout l, WeightsType t) const;
 505
 506             Dim X()   const { return Extract(layout, WeightsChannelName::X, dims); }
 507             Dim Y()   const { return Extract(layout, WeightsChannelName::Y, dims); }
 508             Dim IFM() const { return Extract(layout, WeightsChannelName::IFM, dims); }
 509             Dim OFM() const { return Extract(layout, WeightsChannelName::OFM, dims); }
 510             Dim LX()  const { return Extract(layout, WeightsChannelName::LX, dims); }
 511             Dim LY()  const { return Extract(layout, WeightsChannelName::LY, dims); }
 512
 513             static inline Dim Extract(WeightsLayout l, WeightsChannelName channel, const NDims& d)
 514             {
 515                 return TensorBaseT::Extract(weightsChannelArray, l, channel, d);
 516             }
 517
 518             static inline int Channelndex(WeightsLayout l, WeightsChannelName channel)
 519             {
 520                 return TensorBaseT::Channelndex(weightsChannelArray, l, channel);
 521             }
 522
 523             static inline uint32_t ChannelsCount(WeightsLayout l)
 524             {
 525                 return TensorBaseT::ChannelsCount(weightsChannelArray, l);
 526             }
 527         private:
 528             static NDims GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l);
 529             static std::array<std::array<int, 6>, WeightsLayout::WeightsLayoutCount> weightsChannelArray;
 530         };
 531     }
 532 }