Publishing 2019 R3 content
[platform/upstream/dldt.git] / inference-engine / src / vpu / graph_transformer / src / frontend / pre_process.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <vpu/frontend/frontend.hpp>
6
7 #include <vector>
8 #include <memory>
9 #include <string>
10
11 #include <details/caseless.hpp>
12 #include <cpp/ie_cnn_network.h>
13 #include <precision_utils.h>
14 #include <ie_parallel.hpp>
15
16 #include <vpu/sw/utility.hpp>
17 #include <vpu/utils/ie_helpers.hpp>
18 #include <vpu/compile_env.hpp>
19
20 namespace vpu {
21
22 namespace {
23
24 class MeanImageContent final : public CalculatedDataContent {
25 public:
26     explicit MeanImageContent(const ie::PreProcessInfo& info) : _info(info) {}
27
28 protected:
29     size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const override {
30         auto countElem = _desc.dim(Dim::W) * _desc.dim(Dim::H) * _desc.dim(Dim::C);
31
32         if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
33             countElem *= 2;
34         }
35
36         return countElem * sizeof(fp16_t);
37     }
38
39     void fillTempBuf(const SmallVector<DataContent::Ptr, 2>&, void* tempBuf) const override {
40         VPU_PROFILE(MeanImageContent);
41
42         auto numOfChannel = _info.getNumberOfChannels();
43
44         auto imagePixels = _desc.dim(Dim::W) * _desc.dim(Dim::H);
45         auto countElem = _desc.dim(Dim::W) * _desc.dim(Dim::H) * _desc.dim(Dim::C);
46
47         auto dstPtr = static_cast<fp16_t*>(tempBuf);
48         auto dstPtr2 = dstPtr;
49
50         if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
51             dstPtr2 += countElem;
52         }
53
54         ie::parallel_for(numOfChannel, [=](int i) {
55             auto meanDataBlob = _info[i]->meanData;
56
57             ie::PrecisionUtils::f32tof16Arrays(
58                 dstPtr2 + i * imagePixels,
59                 meanDataBlob->buffer().as<const float*>(),
60                 imagePixels,
61                 -1.0f);
62         });
63
64         if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
65             kchw_to_hwck(dstPtr2, dstPtr, _desc);
66         }
67     }
68
69 private:
70     ie::PreProcessInfo _info;
71 };
72
73 class MeanValueContent final : public CalculatedDataContent {
74 public:
75     explicit MeanValueContent(const ie::PreProcessInfo& info) : _info(info) {}
76
77 protected:
78     size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const override {
79         return _info.getNumberOfChannels() * sizeof(fp16_t);
80     }
81
82     void fillTempBuf(const SmallVector<DataContent::Ptr, 2>&, void* tempBuf) const override {
83         VPU_PROFILE(MeanValueContent);
84
85         IE_ASSERT(_desc.totalDimSize() == _info.getNumberOfChannels());
86
87         auto dstPtr = static_cast<fp16_t*>(tempBuf);
88
89         ie::parallel_for(_info.getNumberOfChannels(), [dstPtr, this](int i) {
90             dstPtr[i] = ie::PrecisionUtils::f32tof16(-_info[i]->meanValue);
91         });
92     }
93
94 private:
95     ie::PreProcessInfo _info;
96 };
97
98 }  // namespace
99
100 void FrontEnd::addPreProcessStages(const Model::Ptr& model) {
101     VPU_PROFILE(addPreProcessStages);
102
103     const auto& env = CompileEnv::get();
104
105     for (const auto& inputInfo : _ieNetworkParser.networkInputs) {
106         auto netInput = inputInfo.second;
107         IE_ASSERT(netInput != nullptr);
108
109         auto ieData = netInput->getInputData();
110         IE_ASSERT(ieData != nullptr);
111
112         const auto& preProcess = netInput->getPreProcess();
113
114         if (preProcess.getMeanVariant() != ie::NONE) {
115             auto input = getVpuData(ieData);
116             IE_ASSERT(input != nullptr);
117
118             int numOfChannel = preProcess.getNumberOfChannels();
119
120             env.log->debug("add pre-processing for input %s", input->name());
121
122             if (preProcess.getMeanVariant() == ie::MEAN_IMAGE) {
123                 auto meanImage = model->addConstData(
124                     input->name() + "@mean-image",
125                     input->desc(),
126                     std::make_shared<MeanImageContent>(preProcess));
127
128                 auto newInput = model->duplicateData(
129                     input,
130                     "@after-mean-image");
131
132                 bindData(newInput, ieData);
133
134                 _stageBuilder->addSumStage(
135                     model,
136                     meanImage->name(),
137                     nullptr,
138                     input, meanImage,
139                     newInput);
140
141                 input = newInput;
142             } else {
143                 auto meanValues = model->addConstData(
144                     input->name() + "@mean-values",
145                     DataDesc({numOfChannel}),
146                     std::make_shared<MeanValueContent>(preProcess));
147
148                 auto newInput = model->duplicateData(
149                     input,
150                     "@after-mean-values");
151
152                 bindData(newInput, ieData);
153
154                 _stageBuilder->addBiasStage(
155                     model,
156                     meanValues->name(),
157                     nullptr,
158                     input, meanValues,
159                     newInput);
160
161                 input = newInput;
162             }
163
164             if (preProcess[0]->stdScale != 1.0f) {
165                 for (int i = 1; i < numOfChannel; i++) {
166                     if (!isFloatEqual(preProcess[i - 1]->stdScale, preProcess[i]->stdScale)) {
167                         VPU_THROW_EXCEPTION << "Different values of stdScale are not supported";
168                     }
169                 }
170
171                 auto newInput = model->duplicateData(
172                     input,
173                     "@after-std-scale");
174
175                 bindData(newInput, ieData);
176
177                 _stageBuilder->addPowerStage(
178                     model,
179                     input->name() + "@stdScale=" + std::to_string(preProcess[0]->stdScale),
180                     nullptr,
181                     preProcess[0]->stdScale,
182                     1.0f,
183                     0.0f,
184                     input,
185                     newInput);
186
187                 input = newInput;
188             }
189         }
190     }
191 }
192
193 }  // namespace vpu