1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <vpu/frontend/frontend.hpp>
11 #include <details/caseless.hpp>
12 #include <cpp/ie_cnn_network.h>
13 #include <precision_utils.h>
14 #include <ie_parallel.hpp>
16 #include <vpu/sw/utility.hpp>
17 #include <vpu/utils/ie_helpers.hpp>
18 #include <vpu/compile_env.hpp>
24 class MeanImageContent final : public CalculatedDataContent {
26 explicit MeanImageContent(const ie::PreProcessInfo& info) : _info(info) {}
29 size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const override {
30 auto countElem = _desc.dim(Dim::W) * _desc.dim(Dim::H) * _desc.dim(Dim::C);
32 if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
36 return countElem * sizeof(fp16_t);
39 void fillTempBuf(const SmallVector<DataContent::Ptr, 2>&, void* tempBuf) const override {
40 VPU_PROFILE(MeanImageContent);
42 auto numOfChannel = _info.getNumberOfChannels();
44 auto imagePixels = _desc.dim(Dim::W) * _desc.dim(Dim::H);
45 auto countElem = _desc.dim(Dim::W) * _desc.dim(Dim::H) * _desc.dim(Dim::C);
47 auto dstPtr = static_cast<fp16_t*>(tempBuf);
48 auto dstPtr2 = dstPtr;
50 if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
54 ie::parallel_for(numOfChannel, [=](int i) {
55 auto meanDataBlob = _info[i]->meanData;
57 ie::PrecisionUtils::f32tof16Arrays(
58 dstPtr2 + i * imagePixels,
59 meanDataBlob->buffer().as<const float*>(),
64 if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
65 kchw_to_hwck(dstPtr2, dstPtr, _desc);
70 ie::PreProcessInfo _info;
73 class MeanValueContent final : public CalculatedDataContent {
75 explicit MeanValueContent(const ie::PreProcessInfo& info) : _info(info) {}
78 size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const override {
79 return _info.getNumberOfChannels() * sizeof(fp16_t);
82 void fillTempBuf(const SmallVector<DataContent::Ptr, 2>&, void* tempBuf) const override {
83 VPU_PROFILE(MeanValueContent);
85 IE_ASSERT(_desc.totalDimSize() == _info.getNumberOfChannels());
87 auto dstPtr = static_cast<fp16_t*>(tempBuf);
89 ie::parallel_for(_info.getNumberOfChannels(), [dstPtr, this](int i) {
90 dstPtr[i] = ie::PrecisionUtils::f32tof16(-_info[i]->meanValue);
95 ie::PreProcessInfo _info;
100 void FrontEnd::addPreProcessStages(const Model::Ptr& model) {
101 VPU_PROFILE(addPreProcessStages);
103 const auto& env = CompileEnv::get();
105 for (const auto& inputInfo : _ieNetworkParser.networkInputs) {
106 auto netInput = inputInfo.second;
107 IE_ASSERT(netInput != nullptr);
109 auto ieData = netInput->getInputData();
110 IE_ASSERT(ieData != nullptr);
112 const auto& preProcess = netInput->getPreProcess();
114 if (preProcess.getMeanVariant() != ie::NONE) {
115 auto input = getVpuData(ieData);
116 IE_ASSERT(input != nullptr);
118 int numOfChannel = preProcess.getNumberOfChannels();
120 env.log->debug("add pre-processing for input %s", input->name());
122 if (preProcess.getMeanVariant() == ie::MEAN_IMAGE) {
123 auto meanImage = model->addConstData(
124 input->name() + "@mean-image",
126 std::make_shared<MeanImageContent>(preProcess));
128 auto newInput = model->duplicateData(
130 "@after-mean-image");
132 bindData(newInput, ieData);
134 _stageBuilder->addSumStage(
143 auto meanValues = model->addConstData(
144 input->name() + "@mean-values",
145 DataDesc({numOfChannel}),
146 std::make_shared<MeanValueContent>(preProcess));
148 auto newInput = model->duplicateData(
150 "@after-mean-values");
152 bindData(newInput, ieData);
154 _stageBuilder->addBiasStage(
164 if (preProcess[0]->stdScale != 1.0f) {
165 for (int i = 1; i < numOfChannel; i++) {
166 if (!isFloatEqual(preProcess[i - 1]->stdScale, preProcess[i]->stdScale)) {
167 VPU_THROW_EXCEPTION << "Different values of stdScale are not supported";
171 auto newInput = model->duplicateData(
175 bindData(newInput, ieData);
177 _stageBuilder->addPowerStage(
179 input->name() + "@stdScale=" + std::to_string(preProcess[0]->stdScale),
181 preProcess[0]->stdScale,