inference-engine/src/vpu/graph_transformer/src/passes/replace_deconv_by_conv.cpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include <vpu/pass_manager.hpp>
   6
   7 #include <tuple>
   8 #include <vector>
   9 #include <algorithm>
  10 #include <limits>
  11 #include <string>
  12 #include <utility>
  13 #include <cmath>
  14 #include <list>
  15 #include <set>
  16 #include <unordered_map>
  17 #include <memory>
  18
  19 #include <vpu/stub_stage.hpp>
  20 #include <vpu/sw/utility.hpp>
  21 #include <vpu/compile_env.hpp>
  22
  23 namespace vpu {
  24
  25 namespace {
  26
  27 using ReplicatedDataMap = std::unordered_map<int, Data>;
  28
  29 class UpsamplingStage final : public StageNode {
  30 private:
  31     StagePtr cloneImpl() const override {
  32         return std::make_shared<UpsamplingStage>(*this);
  33     }
  34
  35     DataMap<float> propagateScaleFactorsImpl(
  36             const DataMap<float>&,
  37             ScalePropagationStep) override {
  38         VPU_THROW_EXCEPTION << "Must never be called";
  39     }
  40
  41     DataMap<DimsOrder> propagateDataOrderImpl() const override {
  42         IE_ASSERT(_inputEdges.size() == 1);
  43         IE_ASSERT(_outputEdges.size() == 1);
  44
  45         auto input = _inputEdges[0]->input();
  46         auto output = _outputEdges[0]->output();
  47
  48         DataMap<DimsOrder> out;
  49
  50         out[input] = DimsOrder::NCHW;
  51         out[output] = DimsOrder::NCHW;
  52
  53         return out;
  54     }
  55
  56     DataMap<StridesRequirement> getDataStridesRequirementsImpl() const override {
  57         IE_ASSERT(_inputEdges.size() == 1);
  58         IE_ASSERT(_outputEdges.size() == 1);
  59
  60         auto output = _outputEdges[0]->output();
  61
  62         DataMap<StridesRequirement> out;
  63
  64         out[output] = StridesRequirement().add(1, DimStride::Aligned);
  65
  66         return out;
  67     }
  68
  69     void finalizeDataLayoutImpl() override {
  70     }
  71
  72     DataMap<BatchSupport> getBatchSupportInfoImpl() const override {
  73         IE_ASSERT(_inputEdges.size() == 1);
  74         IE_ASSERT(_outputEdges.size() == 1);
  75
  76         auto input = _inputEdges[0]->input();
  77         auto output = _outputEdges[0]->output();
  78
  79         DataMap<BatchSupport> out;
  80
  81         out[input] = BatchSupport::Split;
  82         out[output] = BatchSupport::Split;
  83
  84         return out;
  85     }
  86
  87     StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
  88         return StageSHAVEsRequirements::TwoOrOne;
  89     }
  90
  91     void finalCheckImpl() const override {
  92     }
  93
  94     void serializeParamsImpl(BlobSerializer& serializer) const override {
  95         auto scaleX = attrs().get<int>("upsampling_factorx_x");
  96         auto scaleY = attrs().get<int>("upsampling_factorx_y");
  97         auto scaleZ = attrs().get<int>("upsampling_factorx_z");
  98         auto pad_l_x = attrs().get<int>("pad_l_x");
  99         auto pad_r_x = attrs().get<int>("pad_r_x");
 100         auto pad_l_y = attrs().get<int>("pad_l_y");
 101         auto pad_r_y = attrs().get<int>("pad_r_y");
 102         auto pad_l_z = attrs().get<int>("pad_l_z");
 103         auto pad_r_z = attrs().get<int>("pad_r_z");
 104
 105         serializer.append(static_cast<int32_t>(scaleX));
 106         serializer.append(static_cast<int32_t>(scaleY));
 107         serializer.append(static_cast<int32_t>(scaleZ));
 108         serializer.append(static_cast<int32_t>(pad_l_x));
 109         serializer.append(static_cast<int32_t>(pad_r_x));
 110         serializer.append(static_cast<int32_t>(pad_l_y));
 111         serializer.append(static_cast<int32_t>(pad_r_y));
 112         serializer.append(static_cast<int32_t>(pad_l_z));
 113         serializer.append(static_cast<int32_t>(pad_r_z));
 114     }
 115
 116     void serializeDataImpl(BlobSerializer& serializer) const override {
 117         IE_ASSERT(_inputEdges.size() == 1);
 118         IE_ASSERT(_outputEdges.size() == 1);
 119         IE_ASSERT(_tempBufferEdges.empty());
 120
 121         auto input = _inputEdges[0]->input();
 122         auto output = _outputEdges[0]->output();
 123
 124         input->serializeNewBuffer(serializer);
 125         output->serializeNewBuffer(serializer);
 126     }
 127 };
 128
 129
 130 class DeconvolutionToConvolutionContent final : public CalculatedDataContent {
 131 public:
 132     DeconvolutionToConvolutionContent(
 133             const DataContent::Ptr& origContent,
 134             int kernelSizeX, int kernelSizeY) :
 135             CalculatedDataContent({origContent}),
 136             _kerneSizeX(kernelSizeX), _kernelSizeY(kernelSizeY) {
 137     }
 138
 139     void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const {
 140         VPU_PROFILE(DeconvolutionToConvolutionContent);
 141
 142         IE_ASSERT(baseContents.size() == 1);
 143         IE_ASSERT(_desc.type() == DataType::FP16);
 144
 145         deconv_to_conv(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
 146     }
 147
 148 private:
 149     int _kerneSizeX;
 150     int _kernelSizeY;
 151 };
 152
 153
 154 class PassImpl final : public Pass {
 155 public:
 156     explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
 157
 158     void run(const Model::Ptr& model) override;
 159
 160 private:
 161     StageBuilder::Ptr _stageBuilder;
 162 };
 163
 164 void PassImpl::run(const Model::Ptr& model) {
 165     VPU_PROFILE(replaceDeconvByConv);
 166
 167     auto stages = model->getStages();
 168     for (const auto& stage : stages) {
 169         if (stage->type() != StageType::StubDeconv) {
 170             continue;
 171         }
 172
 173         auto kernelSizeX = stage->attrs().get<int>("kernelSizeX");
 174         auto kernelSizeY = stage->attrs().get<int>("kernelSizeY");
 175         auto kernelStrideX = stage->attrs().get<int>("kernelStrideX");
 176         auto kernelStrideY = stage->attrs().get<int>("kernelStrideY");
 177         auto groupSize = stage->attrs().get<int>("groupSize");
 178
 179         auto padLeft  = stage->attrs().get<int>("padLeft");
 180         auto padRight = stage->attrs().get<int>("padRight");
 181         auto padTop = stage->attrs().get<int>("padTop");
 182         auto padBottom = stage->attrs().get<int>("padBottom");
 183         auto deconvScale = stage->attrs().getOrDefault<float>("scaleFactor", 1.0);
 184
 185         /* Upsampling layer does not support negative paddings */
 186         if ((kernelSizeX - 1 - padLeft < 0) || (kernelSizeX - 1 - padRight < 0) ||
 187             (kernelSizeY - 1 - padTop < 0) || (kernelSizeY - 1 - padBottom < 0)) {
 188             continue;
 189         }
 190
 191         if (groupSize != 1) {
 192             continue;
 193         }
 194
 195         if ((padTop != padBottom) || (padLeft != padRight)) {
 196             continue;
 197         }
 198
 199         if (kernelSizeX > 15 || kernelSizeY > 15) {
 200             continue;
 201         }
 202
 203         auto input = stage->input(0);
 204         auto weights = stage->input(1);
 205         auto biases  = stage->input(2);
 206         auto output = stage->output(0);
 207         const auto& env = CompileEnv::get();
 208
 209         if (env.netConfig.hwDisabled(stage->origLayer()->name)) {
 210             continue;
 211         }
 212
 213         if (output->desc().numDims() < 4) {
 214             continue;
 215         }
 216
 217         // problem with Deconv/CommonSingleLayerTest
 218         auto origOutputX = kernelStrideX * (input->desc().dim(Dim::W)  - 1) + kernelSizeX - padLeft - padRight;
 219         auto origOutputY = kernelStrideY * (input->desc().dim(Dim::H)  - 1) + kernelSizeY - padTop - padBottom;
 220
 221         if ((origOutputX != output->desc().dim(Dim::W)) || (origOutputY != output->desc().dim(Dim::H))) {
 222             continue;
 223         }
 224
 225         model->disconnectStageDatas(stage);
 226
 227         DataDesc newDesc({1, 1, output->desc().dim(Dim::C), output->desc().dim(Dim::N)});
 228         newDesc.setDim(Dim::N, 1);
 229         newDesc.setDim(Dim::C, input->desc().dim(Dim::C));
 230         newDesc.setDim(Dim::H, (input->desc().dim(Dim::H) - 1) * kernelStrideY + 1 + (kernelSizeY - 1) * 2 - padTop - padBottom);
 231         newDesc.setDim(Dim::W, (input->desc().dim(Dim::W) - 1) * kernelStrideX + 1 + (kernelSizeX - 1) * 2 - padLeft - padRight);
 232
 233         auto newOutput = model->duplicateData(output, "@upsampleData", newDesc);
 234         auto newWeights = model->duplicateData(weights, "@upsampleData", weights->desc(),
 235                      std::make_shared<DeconvolutionToConvolutionContent>(weights->content(), kernelSizeX, kernelSizeY));
 236
 237         auto upsampleStage = model->addNewStage<UpsamplingStage>(
 238                 stage->origLayerName() + "@Upsample",
 239                 StageType::Upsampling,
 240                 stage->origLayer(),
 241                 {input},
 242                 {newOutput});
 243
 244         upsampleStage->attrs().set<int>("upsampling_factorx_x", kernelStrideX);
 245         upsampleStage->attrs().set<int>("upsampling_factorx_y", kernelStrideY);
 246         upsampleStage->attrs().set<int>("upsampling_factorx_z", 1);
 247         upsampleStage->attrs().set<int>("pad_l_x", (kernelSizeX - 1) - padLeft);
 248         upsampleStage->attrs().set<int>("pad_r_x", (kernelSizeX - 1) - padRight);
 249         upsampleStage->attrs().set<int>("pad_l_y", (kernelSizeY - 1) - padTop);
 250         upsampleStage->attrs().set<int>("pad_r_y", (kernelSizeY - 1) - padBottom);
 251         upsampleStage->attrs().set<int>("pad_l_z", 0);
 252         upsampleStage->attrs().set<int>("pad_r_z", 0);
 253
 254         auto newStage = model->addNewStage<StubStage>(
 255                 stage->origLayerName() + "@UpsampleConv",
 256                 StageType::StubConv,
 257                 stage->origLayer(),
 258                 {newOutput, newWeights, biases},
 259                 {output});
 260
 261         newStage->attrs().set<int>("kernelSizeX", kernelSizeX);
 262         newStage->attrs().set<int>("kernelSizeY", kernelSizeY);
 263         newStage->attrs().set<int>("kernelStrideX", 1);
 264         newStage->attrs().set<int>("kernelStrideY", 1);
 265         newStage->attrs().set<int>("padLeft", 0);
 266         newStage->attrs().set<int>("padRight", 0);
 267         newStage->attrs().set<int>("padTop", 0);
 268         newStage->attrs().set<int>("padBottom", 0);
 269         newStage->attrs().set<int>("dilationX", 1);
 270         newStage->attrs().set<int>("dilationY", 1);
 271         newStage->attrs().set<int>("groupSize", 1);
 272         newStage->attrs().set<bool>("tryHW", true);
 273         newStage->attrs().set<float>("scaleFactor", deconvScale);
 274
 275         model->removeStage(stage);
 276     }
 277 }
 278
 279 }  // namespace
 280
 281 Pass::Ptr PassManager::replaceDeconvByConv() {
 282     return std::make_shared<PassImpl>(_stageBuilder);
 283 }
 284
 285 }  // namespace vpu