1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
14 #include <vpu/backend/blob_format.hpp>
15 #include <vpu/model/data.hpp>
16 #include <vpu/hw/utility.hpp>
17 #include <vpu/utils/io.hpp>
18 #include <vpu/utils/dot_io.hpp>
26 const HwDataMode CNN_DATA_TYPE = HwDataMode::FP16;
27 const HwCoeffMode CNN_COEFF_TYPE = HwCoeffMode::FP16;
29 const std::array<int, 2> CNN_COEFF_PER_WORD_VALUES{1, 2};
30 const std::array<int, 2> CNN_BYTES_PER_PIXEL{2, 1};
32 const std::array<HwOpMode, 5> CNN_MODES{HwOpMode::MODE_1_256, HwOpMode::MODE_2_128, HwOpMode::MODE_4_64, HwOpMode::MODE_8_32, HwOpMode::MODE_16_16};
33 const std::array<int, 5> CNN_MODES_COST{0, 5, 11, 19, 31};
35 const int CNN_MAX_INPUT_WIDTH = 4096;
36 const int CNN_MAX_INPUT_HEIGHT = 4096;
37 const int CNN_MAX_INPUT_CHANNELS = 2048;
38 const int CNN_MAX_OUTPUT_CHANNELS = 2048;
40 const int CNN_MAX_BYTES = 128 * 1024;
41 const int CNN_MAX_COEFF_PER_BLOCK = 256;
43 const int CMX_DATA_BYTE_WIDTH = 16;
49 struct HwPlaneTileInfo final {
50 int inputWithJunk = 0, outputWithJunk = 0;
51 int outputJunkBefore = 0, outputJunkAfter = 0;
52 int inputStartIndex = 0, inputEndIndex = 0;
53 int outputStartIndex = 0, outputEndIndex = 0;
56 template <class Tiles> struct HwChannelTile;
57 template <class Tiles> using HwChannelTilePtr = std::shared_ptr<HwChannelTile<Tiles>>;
59 template <class Tiles> struct HwPlaneTile;
60 template <class Tiles> using HwPlaneTilePtr = std::shared_ptr<HwPlaneTile<Tiles>>;
61 template <class Tiles> using HwPlaneTileWeakPtr = std::weak_ptr<HwPlaneTile<Tiles>>;
63 template <class Tiles> struct HwTiling;
64 template <class Tiles> using HwTilingPtr = std::shared_ptr<HwTiling<Tiles>>;
65 template <class Tiles> using HwTilingWeakPtr = std::weak_ptr<HwTiling<Tiles>>;
67 template <class Tiles>
68 struct HwChannelTile final {
69 HwPlaneTileWeakPtr<Tiles> parent;
73 int channelStartIndex = 0;
74 int numInputChannels = 0;
76 int extendedInputDimC = 0;
77 int extendedOutputDimC = 0;
82 template <class Tiles>
83 struct HwPlaneTile final {
84 HwTilingWeakPtr<Tiles> parent;
89 HwPlaneTileInfo heightInfo = {};
90 HwPlaneTileInfo widthInfo = {};
92 SmallVector<HwChannelTilePtr<Tiles>> channelTiles;
95 template <class Tiles>
96 struct HwTiling final {
101 SmallVector<HwPlaneTilePtr<Tiles>> planeTiles;
104 template <class Tiles>
105 void printTo(std::ostream& os, const HwTilingPtr<Tiles>& tiling) {
106 os << "[" << std::endl;
107 os << "sohTiles=" << tiling->sohTiles << std::endl;
108 os << "sowTiles=" << tiling->sowTiles << std::endl;
109 os << "socTiles=" << tiling->socTiles << std::endl;
113 template <class Tiles>
114 void printTo(DotLabel& lbl, const HwTilingPtr<Tiles>& tiling) {
115 DotLabel subLbl(lbl);
116 subLbl.appendPair("sohTiles", tiling->sohTiles);
117 subLbl.appendPair("sowTiles", tiling->sowTiles);
118 subLbl.appendPair("socTiles", tiling->socTiles);
121 template <class Tiles>
122 std::string getChannelTilePostfix(const HwChannelTilePtr<Tiles>& channelTile) {
123 auto planeTile = channelTile->parent.lock();
124 IE_ASSERT(planeTile != nullptr);
126 auto tiling = planeTile->parent.lock();
127 IE_ASSERT(tiling != nullptr);
129 std::ostringstream ostr;
131 if (tiling->socTiles > 1)
132 ostr << "@soc=" << channelTile->socInd + 1 << "/" << tiling->socTiles;
137 template <class Tiles>
138 std::string getPlaneTilePostfix(const HwPlaneTilePtr<Tiles>& planeTile) {
139 auto tiling = planeTile->parent.lock();
140 IE_ASSERT(tiling != nullptr);
142 std::ostringstream ostr;
144 if (tiling->sohTiles > 1)
145 ostr << "@soh=" << planeTile->sohInd + 1 << "/" << tiling->sohTiles;
146 if (tiling->sowTiles > 1)
147 ostr << "@sow=" << planeTile->sowInd + 1 << "/" << tiling->sowTiles;
152 struct HwConvTileInfo final {
153 HwOpMode mode = HwOpMode::MODE_1_256;
155 int outChansPerDescr = 0;
156 int lastOutChans = 0;
157 int extendedInputDimC = 0;
158 int extendedOutputDimC = 0;
159 double cost = std::numeric_limits<double>::max();
162 void printTo(std::ostream& os, const HwConvTileInfo& convTiles);
163 void printTo(DotLabel& lbl, const HwConvTileInfo& convTiles);
165 using HwConvChannelTile = HwChannelTile<HwConvTileInfo>;
166 using HwConvChannelTilePtr = HwChannelTilePtr<HwConvTileInfo>;
167 using HwConvPlaneTile = HwPlaneTile<HwConvTileInfo>;
168 using HwConvPlaneTilePtr = HwPlaneTilePtr<HwConvTileInfo>;
169 using HwConvTiling = HwTiling<HwConvTileInfo>;
170 using HwConvTilingPtr = HwTilingPtr<HwConvTileInfo>;
172 struct HwPoolTileInfo final {
173 HwOpMode mode = HwOpMode::MODE_1_256;
175 int chansPerDescr = 0;
178 void printTo(std::ostream& os, const HwPoolTileInfo& poolTiles);
179 void printTo(DotLabel& lbl, const HwPoolTileInfo& poolTiles);
181 using HwPoolChannelTile = HwChannelTile<HwPoolTileInfo>;
182 using HwPoolChannelTilePtr = HwChannelTilePtr<HwPoolTileInfo>;
183 using HwPoolPlaneTile = HwPlaneTile<HwPoolTileInfo>;
184 using HwPoolPlaneTilePtr = HwPlaneTilePtr<HwPoolTileInfo>;
185 using HwPoolTiling = HwTiling<HwPoolTileInfo>;
186 using HwPoolTilingPtr = HwTilingPtr<HwPoolTileInfo>;
188 struct HwFullyConnectedTileInfo final {
189 HwOpMode mode = HwOpMode::MODE_1_256;
191 int numInSubTiles = 0;
196 void printTo(std::ostream& os, const HwFullyConnectedTileInfo& fcTiles);
197 void printTo(DotLabel& lbl, const HwFullyConnectedTileInfo& fcTiles);
199 using HwFullyConnectedChannelTile = HwChannelTile<HwFullyConnectedTileInfo>;
200 using HwFullyConnectedChannelTilePtr = HwChannelTilePtr<HwFullyConnectedTileInfo>;
201 using HwFullyConnectedPlaneTile = HwPlaneTile<HwFullyConnectedTileInfo>;
202 using HwFullyConnectedPlaneTilePtr = HwPlaneTilePtr<HwFullyConnectedTileInfo>;
203 using HwFullyConnectedTiling = HwTiling<HwFullyConnectedTileInfo>;
204 using HwFullyConnectedTilingPtr = HwTilingPtr<HwFullyConnectedTileInfo>;
207 // Input<->Output tile calculation
212 int kernelSize, int kernelStride,
213 int padBefore, int padAfter,
217 // Plane tiles calculation.
220 SmallVector<HwPlaneTileInfo> splitIntoPlaneTilesWithPool(
222 int kernelSize, int kernelStride,
226 // Due to possible junk may return more tiles than requested (1) (O -> I)
227 SmallVector<HwPlaneTileInfo> splitIntoPlaneTiles(
228 int inputSize, int outputSize,
229 int kernelSize, int kernelStride,
230 int padBefore, int padAfter,
231 // max size of output tile with junk included
236 // Check HW-unit memory restrictions for tile.
239 bool checkPoolingHWRestrictions(
240 int inTileWidth, int inTileHeight,
241 int inTileChannels, int outTileChannels,
242 int kernelSizeX, int kernelSizeY,
245 bool checkConvHWRestrictions(
246 int inTileWidth, int inTileHeight,
247 int inTileChannels, int outTileChannels,
248 int kernelSizeX, int kernelSizeY,
253 // HW Convolution tiling over output channels.
256 // This function tries to split the output over channels.
257 // split OC is invoked at the very end (3)
258 HwConvTileInfo splitHwConvIntoOutChannelsTiles(
259 int inTileWidth, int inTileHeight, int inTileChannels,
261 int kernelSizeX, int kernelSizeY,