2 // Copyright (c) 2016-2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include "common_types.h"
20 #include "common_tools.h"
28 namespace kernel_selector
30 #define KERNEL_SELECTOR_TENSOR_DIM_MAX 8
34 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
36 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
45 bs_f_bsv8__af8, // for optimized FC
46 bs_f_bsv16__af8, // for optimized FC
47 bf8_xy16, // for optimized conv1x1
48 // TODO: most of the kernel doesn't support ROI. we need to handle it correctly.
50 winograd_2x3_s1_data, //winograd convolution input, F(2,3) -- filter 3x3 with stride 1
51 byxf_af32, // for MMAD convolution
52 byx8_f4, // for MMAD convolution
53 fs_bs_yx_bsv4_fsv32, // for batched MMAD
54 b_fs_yx_fsv4, // reordering format for swizzled input for convolution using IMAD
55 DataLayoutCount // NMBER OF ELEMENTS IN ENUM
58 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
60 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
72 os_iyx_osv16_rotate_180,
74 os_i_osv8__ai8, // TODO can we drop the alignment form layout name?
77 iy_xs_os_xsv2_osv16__ao32,
78 iy_xs_os_xsv2_osv8__ao32,
79 image_2d_weights_c4_fyx_b, // image type fyx_b
80 image_2d_weights_c1_b_fyx, // image type 2d b_fyx single channel
81 winograd_2x3_s1_weights, //winograd convolution weights, F(2, 3) --filter 3x3 with stride 1
82 winograd_2x3_s1_fused_weights, //winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
83 winograd_6x3_s1_fused_weights, //winograd convolution weights for fused kernel, F(6, 3) --filter 3x3 with stride 1
84 image_2d_weights_winograd_6x3_s1_fbxyb, // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
85 image_2d_weights_winograd_6x3_s1_xfbyb, // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
86 os_is_yx_isa8_osv8_isv4, // for MMAD convolution
87 os_is_yx_isa8_osv8_isv4_swizzled_by_4, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, 1,5...
88 is_o_yx_isv32, // for MMAD 1x1 convolutions
89 is_o32_yx_isv32_swizzled_by_4, // for MMAD 1x1 convolutions swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, 1,5...
90 os_is_y_x8_osv8_isv4, // for MMAD convolutions
91 bf_lyx_yx, // local convolution
92 os_is_yx_osv16_isv4, // swizzled weights for convolution using IMAD
93 WeightsLayoutCount // NMBER OF ELEMENTS IN ENUM
96 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
98 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
104 size_t Total() const { return before + after; }
107 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
109 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
116 size_t LogicalDimPadded() const { return v + pad.Total(); }
119 using NDims = std::vector<Dim>;
121 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
123 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
124 enum class DataChannelName
133 enum class WeightsChannelName
143 inline bool SimpleLayout(WeightsLayout l)
147 case WeightsLayout::oi:
148 case WeightsLayout::io:
149 case WeightsLayout::oiyx:
150 case WeightsLayout::oyxi:
151 case WeightsLayout::iyxo:
152 case WeightsLayout::yxio:
159 inline bool SimpleLayout(DataLayout l)
165 case DataLayout::bfyx:
166 case DataLayout::yxfb:
167 case DataLayout::byxf:
168 case DataLayout::fyxb:
175 inline bool IsImageType(WeightsLayout l)
179 case WeightsLayout::image_2d_weights_c4_fyx_b:
180 case WeightsLayout::image_2d_weights_c1_b_fyx:
181 case WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb:
182 case WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb:
189 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
190 // Tensor Exaplnation
191 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
199 // viewOffset - (20,20) => 20*80+20 = 1620
201 // padding (contains "paddedVal"):
202 // before - x=20, y=20
203 // after - x=20, y=20.
205 // logical data - 40x40 (contains the actual data).
207 // firstElementOffset:
208 // (viewOffset_x + padBefore_x) + (viewOffset_y + padBefore_y)*y_pitch =
209 // viewOffset + padBefore_x + padBefore_y*y_pitch =
210 // 1620 + 20 + 20*80 = 3240
213 // whole resource (80x80)
214 // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
217 // + view inside resource (60x60) +
218 // + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +
219 // + + start of padded part(20,20) = viewOffset + +
221 // + + logical data (40x40) + +
222 // + + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +
223 // + + + first element (40,40) + + +
229 // + + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +
233 // + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +
237 // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
240 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
242 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
247 size_t viewOffset = 0; // in elements
248 size_t firstElementOffset = 0;
249 size_t totalSize = 0; // in elements
250 float paddedVal = 0.f;
253 TensorBase() = default;
254 TensorBase(const TensorBase&) = default;
255 TensorBase& operator=(const TensorBase&) = default;
257 TensorBase(const NDims& nd, size_t viewOf, size_t sz, float pv)
260 , firstElementOffset(std::accumulate(nd.cbegin(), nd.cend(), viewOf, [](size_t val, const Dim& d) { return val + d.pitch*d.pad.before; }))
266 for (const auto& d : dims)
268 totalSize = std::max(totalSize, d.pitch*(d.LogicalDimPadded()));
271 totalSize += viewOffset;
274 size_t minimalPitch = 1;
276 for (const auto& d : dims)
278 if (d.pitch < minimalPitch)
280 throw std::runtime_error("Tensor pitches didn't set correctly");
283 minimalPitch *= d.LogicalDimPadded();
286 if (totalSize < (minimalPitch + viewOffset))
288 throw std::runtime_error("Tensor total Size didn't set correctly");
292 float GetPaddedVal() const { return paddedVal; }
293 size_t GetFirstElementOffset() const { return firstElementOffset; }
294 size_t GetViewOffset() const { return viewOffset; }
295 const NDims& GetDims() const { return dims; }
297 virtual uint32_t ElementSize() const = 0;
299 // Size of the actual data (without padded part)
300 size_t LogicalSize() const
302 return std::accumulate(dims.cbegin(), dims.cend(), (size_t)1, [](size_t val, const Dim& d) {return val*d.v; });
305 // Dimensions of the actual data (without padded part)
306 std::vector<size_t> LogicalDims() const
308 std::vector<size_t> res(dims.size());
309 std::transform(dims.begin(), dims.end(), res.begin(), [](const Dim& d) { return d.v; });
313 // Whole buffer size (in elements)
314 size_t PhysicalSize() const
319 // Whole buffer size (in bytes)
320 size_t PhysicalSizeInBytes() const
322 return totalSize * ElementSize();
325 // if padded/view exists between logical dimensions.
326 // in other words, if we can consider the data as a 1Dim resource.
327 bool PitchesDifferFromLogicalDims() const
331 size_t calc_pitch = 1;
332 for (const auto& d : dims)
334 differ |= (d.pitch != calc_pitch);
342 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
344 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
345 template<typename DType, typename Layout>
346 struct TensorBaseT : public TensorBase
352 template <typename ArrayT, typename ChannelName>
353 static inline int Channelndex(const ArrayT& channelArr, Layout l, ChannelName channelName)
355 size_t channel = static_cast<size_t>(channelName);
356 assert(channel < channelArr[l].size());
358 return channelArr[l][channel];
361 template <typename ArrayT, typename ChannelName>
362 static inline Dim Extract(const ArrayT& channelArr, Layout l, ChannelName channelName, const NDims& dims)
364 const int i = Channelndex(channelArr, l, channelName);
365 return ((i < 0) || (i >= (int)dims.size())) ? Dim{ 1, 1,{ 0,0 } } : dims[i];
368 template <typename ArrayT>
369 static inline uint32_t ChannelsCount(const ArrayT& channelArr, Layout l)
371 const auto& entry = channelArr[l];
372 return std::accumulate(entry.begin(), entry.end(), 0U, [](uint32_t count, int v) {return count + ((v != -1) ? 1 : 0); });
376 TensorBaseT() = default;
377 TensorBaseT(const TensorBaseT&) = default;
378 TensorBaseT& operator=(const TensorBaseT&) = default;
380 TensorBaseT(const NDims& nd, DType dt, Layout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
381 TensorBase(nd, of, sz, pv), dtype(dt), layout(l) {}
383 DType GetDType() const { return dtype; }
384 Layout GetLayout() const { return layout; }
385 uint32_t ElementSize() const override { return BytesPerElement(dtype); }
386 size_t Dimentions() const { return dims.size(); }
387 bool SimpleLayout() const { return Tensor::SimpleLayout(layout); }
389 bool operator==(const TensorBaseT& t) const
393 layout == t.layout &&
394 paddedVal == t.paddedVal &&
395 viewOffset == t.viewOffset &&
396 dims.size() == t.dims.size();
399 for (size_t i = 0; i < dims.size(); i++)
402 dims[i].v == t.dims[i].v &&
403 dims[i].pad.before == t.dims[i].pad.before &&
404 dims[i].pad.after == t.dims[i].pad.after &&
405 dims[i].pitch == t.dims[i].pitch;
412 bool SameDims(const TensorBaseT& t) const
416 layout == t.layout &&
417 dims.size() == t.dims.size();
420 for (size_t i = 0; i < dims.size(); i++)
422 same &= dims[i].v == t.dims[i].v;
429 bool SameDimsSizes(const TensorBaseT& t) const
432 dims.size() == t.dims.size();
435 for (size_t i = 0; i < dims.size(); i++)
437 same &= dims[i].v == t.dims[i].v;
445 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
447 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
448 struct DataTensor : public TensorBaseT<Datatype, DataLayout>
450 DataTensor() = default;
451 DataTensor(const DataTensor&) = default;
452 DataTensor& operator=(const DataTensor&) = default;
454 DataTensor(const NDims& nd, Datatype dt, DataLayout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
455 TensorBaseT(nd, dt, l, of, sz, pv) {}
457 DataTensor(const std::vector<size_t>& d, Datatype dt, DataLayout l) :
458 TensorBaseT<Datatype, DataLayout>(GetSimpleDims(d, l), dt, l) {}
460 Dim X() const { return Extract(layout, DataChannelName::X, dims); }
461 Dim Y() const { return Extract(layout, DataChannelName::Y, dims); }
462 Dim Feature() const { return Extract(layout, DataChannelName::FEATURE, dims); }
463 Dim ROI() const { return Extract(layout, DataChannelName::ROI, dims); }
464 Dim Batch() const { return Extract(layout, DataChannelName::BATCH, dims); }
466 DataTensor TransformIgnorePadding(DataLayout l) const;
467 DataTensor FlattenFeatureAndSpatials() const;
469 static inline Dim Extract(DataLayout l, DataChannelName channel, const NDims& d)
471 return TensorBaseT::Extract(dataChannelArray, l, channel, d);
474 static inline int Channelndex(DataLayout l, DataChannelName channel)
476 return TensorBaseT::Channelndex(dataChannelArray, l, channel);
479 static inline uint32_t ChannelsCount(DataLayout l)
481 return TensorBaseT::ChannelsCount(dataChannelArray, l);
484 static std::array<std::array<int, 5>, DataLayout::DataLayoutCount> dataChannelArray;
485 static NDims GetSimpleDims(const std::vector<size_t>& d, DataLayout l);
488 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
490 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
491 struct WeightsTensor : TensorBaseT<WeightsType, WeightsLayout>
493 WeightsTensor() = default;
494 WeightsTensor(const WeightsTensor&) = default;
495 WeightsTensor& operator=(const WeightsTensor&) = default;
497 WeightsTensor(const NDims& nd, WeightsType dt, WeightsLayout l, size_t of = 0, size_t sz = 0, float pv = 0.f) :
498 TensorBaseT(nd, dt, l, of, sz, pv) {}
500 WeightsTensor(const std::vector<size_t>& d, WeightsType dt, WeightsLayout l) :
501 TensorBaseT<WeightsType, WeightsLayout>(GetSimpleDims(d, l), dt, l) {}
503 WeightsTensor TransformIgnorePadding(WeightsLayout l) const { return TransformIgnorePadding(l, dtype); }
504 WeightsTensor TransformIgnorePadding(WeightsLayout l, WeightsType t) const;
506 Dim X() const { return Extract(layout, WeightsChannelName::X, dims); }
507 Dim Y() const { return Extract(layout, WeightsChannelName::Y, dims); }
508 Dim IFM() const { return Extract(layout, WeightsChannelName::IFM, dims); }
509 Dim OFM() const { return Extract(layout, WeightsChannelName::OFM, dims); }
510 Dim LX() const { return Extract(layout, WeightsChannelName::LX, dims); }
511 Dim LY() const { return Extract(layout, WeightsChannelName::LY, dims); }
513 static inline Dim Extract(WeightsLayout l, WeightsChannelName channel, const NDims& d)
515 return TensorBaseT::Extract(weightsChannelArray, l, channel, d);
518 static inline int Channelndex(WeightsLayout l, WeightsChannelName channel)
520 return TensorBaseT::Channelndex(weightsChannelArray, l, channel);
523 static inline uint32_t ChannelsCount(WeightsLayout l)
525 return TensorBaseT::ChannelsCount(weightsChannelArray, l);
528 static NDims GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l);
529 static std::array<std::array<int, 6>, WeightsLayout::WeightsLayoutCount> weightsChannelArray;