1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <vpu/pass_manager.hpp>
11 #include <unordered_set>
14 #include <vpu/sw/utility.hpp>
16 #define REFERENCE_CONVOLUTION 0
22 class ConvIm2ColWeightsContent final : public CalculatedDataContent {
24 explicit ConvIm2ColWeightsContent(const DataContent::Ptr& origContent) :
25 CalculatedDataContent({origContent}) {
29 void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
30 VPU_PROFILE(ConvIm2ColWeightsContent);
31 kchw_to_khwc(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
35 class Conv3x3WeightsContent final : public CalculatedDataContent {
37 explicit Conv3x3WeightsContent(const DataContent::Ptr& origContent) :
38 CalculatedDataContent({origContent}) {
42 void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
43 VPU_PROFILE(Conv3x3WeightsContent);
44 kchw_to_hwkc(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
48 class ConvCHWWeightsContent final : public CalculatedDataContent {
50 explicit ConvCHWWeightsContent(const DataContent::Ptr& origContent) :
51 CalculatedDataContent({origContent}) {
55 void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
56 VPU_PROFILE(ConvCHWWeightsContent);
57 kchw_to_hwkc(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
61 class ConvStage final : public StageNode {
63 StagePtr cloneImpl() const override {
64 return std::make_shared<ConvStage>(*this);
67 void propagateScaleFactorsImpl(
68 const SmallVector<float>&,
69 ScalePropagationStep) override {
70 VPU_THROW_EXCEPTION << "Must never be called";
73 void propagateDataOrderImpl() const override {
74 IE_ASSERT(_inputEdges.size() == 3);
75 IE_ASSERT(_outputEdges.size() == 1);
77 auto input = _inputEdges[0]->input();
78 auto weights = _inputEdges[1]->input();
79 auto output = _outputEdges[0]->output();
81 auto finalOrder = input->desc().dimsOrder();
82 if (finalOrder.dimInd(Dim::C) == 1) {
84 finalOrder.moveDim(Dim::C, 2);
87 if (_type == StageType::Conv ||
88 _type == StageType::Im2ColConvolution) {
89 if (finalOrder != input->desc().dimsOrder()) {
90 _orderInfo.setInput(_inputEdges[0], finalOrder);
92 _orderInfo.setOutput(_outputEdges[0], finalOrder);
93 } else if (_type == StageType::DepthConv) {
94 if (finalOrder != input->desc().dimsOrder()) {
95 _orderInfo.setInput(_inputEdges[0], finalOrder);
97 _orderInfo.setOutput(_outputEdges[0], finalOrder);
99 _orderInfo.setInput(_inputEdges[0], finalOrder.createMovedDim(Dim::C, 0));
100 _orderInfo.setOutput(_outputEdges[0], finalOrder.createMovedDim(Dim::C, 0));
104 void getDataStridesRequirementsImpl() const override {
105 IE_ASSERT(_inputEdges.size() == 3);
106 IE_ASSERT(_outputEdges.size() == 1);
108 if (_type != StageType::DepthConv) {
109 _stridesInfo.setInput(_inputEdges[0], StridesRequirement::compact());
110 _stridesInfo.setOutput(_outputEdges[0], StridesRequirement::compact());
114 void finalizeDataLayoutImpl() override {
115 IE_ASSERT(_inputEdges.size() == 3);
116 IE_ASSERT(_outputEdges.size() == 1);
118 auto input = _inputEdges[0]->input();
119 auto weights = _inputEdges[1]->input();
120 auto output = _outputEdges[0]->output();
122 auto kernelSizeX = attrs().get<int>("kernelSizeX");
123 auto kernelSizeY = attrs().get<int>("kernelSizeY");
127 if (_type == StageType::DepthConv) {
128 swWeights = weights->attrs().getOrDefault<Data>("swWeights", nullptr);
129 if (swWeights == nullptr) {
130 DataDesc newWeightsDesc({
131 kernelSizeX * kernelSizeY,
133 output->desc().dim(Dim::C)});
135 swWeights = _model->duplicateData(
139 std::make_shared<DefaultSwWeightsContent>(weights->content()));
141 weights->attrs().set<Data>("swWeights", swWeights);
143 } else if (input->desc().dimsOrder().dimInd(Dim::C) == 0) {
148 auto isSpatialConv = attrs().get<bool>("isSpatialConv");
149 auto isConv1x1 = attrs().get<bool>("isConv1x1");
150 auto isConv3x3 = attrs().get<bool>("isConv3x3");
152 swWeights = weights->attrs().getOrDefault<Data>("swWeights", nullptr);
153 if (swWeights == nullptr) {
154 DataDesc newWeightsDesc({
155 kernelSizeX * kernelSizeY,
156 input->desc().dim(Dim::C),
157 output->desc().dim(Dim::C)});
160 swWeights = _model->duplicateData(
164 std::make_shared<DefaultSwWeightsContent>(weights->content()));
165 } else if (isConv1x1) {
166 swWeights = _model->duplicateData(
171 } else if (isConv3x3) {
172 swWeights = _model->duplicateData(
176 std::make_shared<Conv3x3WeightsContent>(weights->content()));
178 swWeights = _model->duplicateData(
182 std::make_shared<ConvIm2ColWeightsContent>(weights->content()));
185 weights->attrs().set<Data>("swWeights", swWeights);
187 } else if (input->desc().dimsOrder().dimInd(Dim::C) == 2) {
192 auto isConv1x1 = attrs().get<bool>("isConv1x1");
194 if (_type == StageType::Im2ColConvolution) {
195 // Transform CHW "Im2ColConvolution" into CHW "Conv"
196 _type = StageType::Conv;
199 swWeights = weights->attrs().getOrDefault<Data>("swWeights", nullptr);
200 if (swWeights == nullptr) {
201 DataDesc newWeightsDesc({
202 kernelSizeX * kernelSizeY,
203 input->desc().dim(Dim::C),
204 output->desc().dim(Dim::C)});
207 swWeights = _model->duplicateData(
213 swWeights = _model->duplicateData(
217 std::make_shared<ConvCHWWeightsContent>(weights->content()));
220 weights->attrs().set<Data>("swWeights", swWeights);
224 IE_ASSERT(swWeights != nullptr);
226 _model->replaceStageInput(_inputEdges[1], swWeights);
229 void getBatchSupportInfoImpl() const override {
230 IE_ASSERT(_inputEdges.size() == 3);
231 IE_ASSERT(_outputEdges.size() == 1);
233 _batchInfo.setInput(_inputEdges[0], BatchSupport::Split);
234 _batchInfo.setOutput(_outputEdges[0], BatchSupport::Split);
237 void finalCheckImpl() const override {
240 void serializeParamsImpl(BlobSerializer& serializer) const override {
241 auto kernelSizeX = attrs().get<int>("kernelSizeX");
242 auto kernelSizeY = attrs().get<int>("kernelSizeY");
243 auto kernelStrideX = attrs().get<int>("kernelStrideX");
244 auto kernelStrideY = attrs().get<int>("kernelStrideY");
245 auto padLeft = attrs().get<int>("padLeft");
246 auto padTop = attrs().get<int>("padTop");
247 auto dilationX = attrs().get<int>("dilationX");
248 auto dilationY = attrs().get<int>("dilationY");
250 serializer.append(static_cast<uint32_t>(kernelSizeX));
251 serializer.append(static_cast<uint32_t>(kernelSizeY));
252 serializer.append(static_cast<uint32_t>(kernelStrideX));
253 serializer.append(static_cast<uint32_t>(kernelStrideY));
254 serializer.append(static_cast<uint32_t>(padLeft));
255 serializer.append(static_cast<uint32_t>(padTop));
256 serializer.append(static_cast<uint32_t>(dilationX));
257 serializer.append(static_cast<uint32_t>(dilationY));
260 void serializeDataImpl(BlobSerializer& serializer) const override {
261 IE_ASSERT(_inputEdges.size() == 3);
262 IE_ASSERT(_outputEdges.size() == 1);
264 auto input = _inputEdges[0]->input();
265 auto weights = _inputEdges[1]->input();
266 auto biases = _inputEdges[2]->input();
267 auto output = _outputEdges[0]->output();
269 input->serializeOldBuffer(handle_from_this(), serializer);
270 output->serializeOldBuffer(handle_from_this(), serializer);
271 weights->serializeOldBuffer(handle_from_this(), serializer);
273 if (!_tempBufferEdges.empty()) {
274 _tempBufferEdges[0]->tempBuffer()->serializeOldBuffer(handle_from_this(), serializer);
278 biases->serializeOldBuffer(handle_from_this(), serializer);
282 class PassImpl final : public Pass {
284 explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
286 void run(const Model::Ptr& model) override;
289 StageBuilder::Ptr _stageBuilder;
292 void PassImpl::run(const Model::Ptr& model) {
293 VPU_PROFILE(swConvAdaptation);
295 for (const auto& stage : model->getStages()) {
296 if (stage->type() != StageType::StubConv)
299 auto origStageName = stage->name();
300 auto origLayer = stage->origLayer();
302 auto input = stage->input(0);
303 auto weights = stage->input(1);
304 auto biases = stage->input(2);
305 auto output = stage->output(0);
307 auto kernelSizeX = stage->attrs().get<int>("kernelSizeX");
308 auto kernelSizeY = stage->attrs().get<int>("kernelSizeY");
309 auto kernelStrideX = stage->attrs().get<int>("kernelStrideX");
310 auto kernelStrideY = stage->attrs().get<int>("kernelStrideY");
311 auto padLeft = stage->attrs().get<int>("padLeft");
312 auto padRight = stage->attrs().get<int>("padRight");
313 auto padTop = stage->attrs().get<int>("padTop");
314 auto padBottom = stage->attrs().get<int>("padBottom");
315 auto dilationX = stage->attrs().get<int>("dilationX");
316 auto dilationY = stage->attrs().get<int>("dilationY");
317 auto groupSize = stage->attrs().get<int>("groupSize");
319 model->removeStage(stage);
322 kernelSizeX == 1 && kernelSizeY == 1 &&
323 kernelStrideX == 1 && kernelStrideY == 1 &&
324 padLeft == 0 && padRight == 0 && padTop == 0 && padBottom == 0 &&
325 dilationX == 1 && dilationY == 1 &&
326 input->desc().dim(Dim::W) == 1 && input->desc().dim(Dim::H) == 1 &&
327 output->desc().dim(Dim::W) == 1 && output->desc().dim(Dim::H) == 1);
330 kernelSizeX == 1 && kernelSizeY == 1 &&
331 dilationX == 1 && dilationY == 1 &&
335 kernelSizeX == 3 && kernelSizeY == 3 &&
336 (input->desc().dim(Dim::C) / groupSize) > 3 &&
337 ((input->desc().dim(Dim::C) / groupSize) * (output->desc().dim(Dim::C) / groupSize)) > 256);
339 bool iskernelSizeMatchSpatial = (
340 kernelSizeX > 1 && kernelSizeX < 12 && kernelSizeX % 2 == 1);
342 bool isSpatialConv = (
343 iskernelSizeMatchSpatial &&
344 kernelSizeY != 1 && // kernelSizeX != 1 was checked in iskernelSizeMatchSpatial condition
345 ((input->desc().dim(Dim::C) / groupSize) * (output->desc().dim(Dim::C) / groupSize)) <= 256 &&
348 #if REFERENCE_CONVOLUTION
349 isSpatialConv = false;
354 if (groupSize == 1) {
356 _stageBuilder->addSwFullyConnectedStage(
365 if (biases->usage() != DataUsage::Fake) {
366 auto tempOutput = model->duplicateData(
370 _stageBuilder->addBiasStage(
372 origStageName + "@biases",
381 if (isConv1x1 || isSpatialConv || isConv3x3) {
382 swStage = model->addNewStage<ConvStage>(
386 {input, weights, biases},
389 swStage = model->addNewStage<ConvStage>(
391 #if REFERENCE_CONVOLUTION
392 StageType::RefConvolution,
394 StageType::Im2ColConvolution,
397 {input, weights, biases},
400 double im2ColBufSizeF = static_cast<double>(kernelSizeX) * kernelSizeY *
401 output->desc().dim(Dim::W) * output->desc().dim(Dim::H) * input->desc().dim(Dim::C)
404 if (im2ColBufSizeF >= std::numeric_limits<int>::max()) {
405 VPU_THROW_EXCEPTION << "stage: " << origStageName << ", im2col bufferSize cannot fit 32s: "
406 << std::setprecision(0) << std::fixed << im2ColBufSizeF
407 << "(" << kernelSizeX << "x" << kernelSizeY << "x"
408 << output->desc().dim(Dim::W) << "x" << output->desc().dim(Dim::H) << "x" << output->desc().dim(Dim::C) << ")";
411 model->addTempBuffer(swStage, DataDesc({static_cast<int>(im2ColBufSizeF)}));
414 swStage->attrs().set<int>("kernelSizeX", kernelSizeX);
415 swStage->attrs().set<int>("kernelSizeY", kernelSizeY);
417 swStage->attrs().set<int>("kernelStrideX", kernelStrideX);
418 swStage->attrs().set<int>("kernelStrideY", kernelStrideY);
420 swStage->attrs().set<int>("padLeft", padLeft);
421 swStage->attrs().set<int>("padRight", padRight);
422 swStage->attrs().set<int>("padTop", padTop);
423 swStage->attrs().set<int>("padBottom", padBottom);
425 swStage->attrs().set<int>("dilationX", dilationX);
426 swStage->attrs().set<int>("dilationY", dilationY);
428 swStage->attrs().set<bool>("isSpatialConv", isSpatialConv);
429 swStage->attrs().set<bool>("isConv1x1", isConv1x1);
430 swStage->attrs().set<bool>("isConv3x3", isConv3x3);
432 } else if (groupSize == input->desc().dim(Dim::C) &&
433 groupSize == output->desc().dim(Dim::C)) {
434 if (biases->usage() != DataUsage::Fake) {
435 auto tempOutput = model->duplicateData(
439 _stageBuilder->addBiasStage(
441 origStageName + "@biases",
449 auto swStage = model->addNewStage<ConvStage>(
451 StageType::DepthConv,
453 {input, weights, biases},
456 swStage->attrs().set<int>("kernelSizeX", kernelSizeX);
457 swStage->attrs().set<int>("kernelSizeY", kernelSizeY);
459 swStage->attrs().set<int>("kernelStrideX", kernelStrideX);
460 swStage->attrs().set<int>("kernelStrideY", kernelStrideY);
462 swStage->attrs().set<int>("padLeft", padLeft);
463 swStage->attrs().set<int>("padRight", padRight);
464 swStage->attrs().set<int>("padTop", padTop);
465 swStage->attrs().set<int>("padBottom", padBottom);
467 swStage->attrs().set<int>("dilationX", dilationX);
468 swStage->attrs().set<int>("dilationY", dilationY);
470 VPU_THROW_EXCEPTION << "Internal error : grouped convolution was not processed";
477 Pass::Ptr PassManager::swConvAdaptation() {
478 return std::make_shared<PassImpl>(_stageBuilder);